Caching Synapse metadata

If your Synapse source instance has a lot of tables, for example over 50 thousand, it may take a long time to execute the queries where we are reading database structure.

For this purpose, we have a special fast metadata read mode relying on the special cache tables you can build yourself if you have write access to your source database. You can execute the following script on the source (and you should re-run it when you want to refresh):

--WARNING
--WARNING - This script will DROP tables [dbo].[omniloader_cache_table_distribution] AND [dbo].[omniloader_cache_table_size]
--WARNING

SET NOCOUNT ON;
RAISERROR ('Creating table [dbo].[omniloader_cache_table_distribution]...', 0, 1) WITH NOWAIT

	IF (OBJECT_ID('dbo.omniloader_cache_table_distribution','U')) IS NOT NULL DROP TABLE dbo.omniloader_cache_table_distribution;
	CREATE TABLE dbo.omniloader_cache_table_distribution
	(
		table_schema			VARCHAR(512)	NOT NULL
	,	table_name				VARCHAR(512)	NOT NULL
	,	distribution_name		VARCHAR(512)	NULL
	,	distribution_ordinal	INT				NULL
	,	column_name				VARCHAR(512)	NULL
	)
	WITH
	(
		DISTRIBUTION = HASH(table_name)
	,	CLUSTERED INDEX (table_schema, table_name, distribution_ordinal)
	);

RAISERROR ('Table created.', 0, 1) WITH NOWAIT
RAISERROR ('Inserting data...', 0, 1) WITH NOWAIT

	INSERT INTO dbo.omniloader_cache_table_distribution
	SELECT
		s.[name]						AS table_schema
	,	t.[name]						AS table_name
	,	tdp.distribution_policy_desc	AS distribution_name
	,	cdp.distribution_ordinal		AS dist_ordinal
	,	c.[name]						AS dist_column_name
	FROM
					sys.schemas								AS s
		INNER JOIN	sys.tables								AS t	ON	t.[schema_id] = s.[schema_id]
		LEFT JOIN	sys.pdw_table_distribution_properties	AS tdp	ON	tdp.[object_id] = t.[object_id]
		LEFT JOIN	sys.pdw_column_distribution_properties	AS cdp	ON	cdp.[object_id] = tdp.[object_id]
																	AND	cdp.distribution_ordinal > 0
		LEFT JOIN	sys.columns								AS c	ON	c.[object_id] = cdp.[object_id]
																	AND	c.[column_id] = cdp.[column_id]
	WHERE 1=1;

RAISERROR ('Data inserted.', 0, 1) WITH NOWAIT
RAISERROR ('', 0, 1) WITH NOWAIT
RAISERROR ('Creating table [dbo].[omniloader_cache_table_size]...', 0, 1) WITH NOWAIT

	IF (OBJECT_ID('dbo.omniloader_cache_table_size','U')) IS NOT NULL DROP TABLE dbo.omniloader_cache_table_size;
	CREATE TABLE dbo.omniloader_cache_table_size
	(
		table_schema		VARCHAR(512)	NOT NULL
	,	table_name			VARCHAR(512)	NOT NULL
	,	partition_number	INT				NULL
	,	record_count		BIGINT			NULL
	,	byte_count			BIGINT			NULL
	)
	WITH
	(
		DISTRIBUTION = HASH(table_name)
	,	CLUSTERED INDEX (table_name, table_schema)
	);

RAISERROR ('Table created.', 0, 1) WITH NOWAIT
RAISERROR ('Inserting data...', 0, 1) WITH NOWAIT

	INSERT INTO dbo.omniloader_cache_table_size
	SELECT
		s.[name]									AS table_schema
	,	t.[name]									AS table_name
	,	nps.partition_number						AS partition_number
	,	SUM(nps.row_count)							AS record_count
	,	SUM(	nps.in_row_data_page_count
			+	nps.row_overflow_used_page_count
			+	nps.lob_used_page_count) * 8 * 1024	AS byte_count
	FROM
					sys.schemas							AS s
		INNER JOIN	sys.tables							AS t	ON	t.[schema_id] = s.[schema_id]
		LEFT JOIN	sys.indexes							AS i	ON	i.[object_id] = t.[object_id]
																AND	i.index_id <= 1
		LEFT JOIN	sys.pdw_table_mappings				AS tm	ON	tm.[object_id] = t.[object_id]
		LEFT JOIN	sys.pdw_nodes_tables				AS nt	ON	nt.[name] = tm.physical_name
		LEFT JOIN	sys.dm_pdw_nodes					AS pn	ON	pn.pdw_node_id = nt.pdw_node_id
																AND pn.[type] = 'COMPUTE'
		LEFT JOIN	sys.dm_pdw_nodes_db_partition_stats	AS nps	ON	nps.[object_id] = nt.[object_id]
																AND	nps.pdw_node_id = nt.pdw_node_id
																AND	nps.distribution_id = nt.distribution_id
	WHERE 1=1
	GROUP BY
		s.[name]
	,	t.[name]
	,	nps.partition_number;

RAISERROR ('Data inserted.', 0, 1) WITH NOWAIT
RAISERROR ('', 0, 1) WITH NOWAIT

Last updated