Caching Synapse metadata
If your Synapse source instance has a lot of tables, for example over 50 thousand, it may take a long time to execute the queries where we are reading database structure.
For this purpose, we have a special fast metadata read mode relying on the special cache tables you can build yourself if you have write access to your source database. You can execute the following script on the source (and you should re-run it when you want to refresh):
--WARNING
--WARNING - This script will DROP tables [dbo].[omniloader_cache_table_distribution] AND [dbo].[omniloader_cache_table_size]
--WARNING
SET NOCOUNT ON;
RAISERROR ('Creating table [dbo].[omniloader_cache_table_distribution]...', 0, 1) WITH NOWAIT
IF (OBJECT_ID('dbo.omniloader_cache_table_distribution','U')) IS NOT NULL DROP TABLE dbo.omniloader_cache_table_distribution;
CREATE TABLE dbo.omniloader_cache_table_distribution
(
table_schema VARCHAR(512) NOT NULL
, table_name VARCHAR(512) NOT NULL
, distribution_name VARCHAR(512) NULL
, distribution_ordinal INT NULL
, column_name VARCHAR(512) NULL
)
WITH
(
DISTRIBUTION = HASH(table_name)
, CLUSTERED INDEX (table_schema, table_name, distribution_ordinal)
);
RAISERROR ('Table created.', 0, 1) WITH NOWAIT
RAISERROR ('Inserting data...', 0, 1) WITH NOWAIT
INSERT INTO dbo.omniloader_cache_table_distribution
SELECT
s.[name] AS table_schema
, t.[name] AS table_name
, tdp.distribution_policy_desc AS distribution_name
, cdp.distribution_ordinal AS dist_ordinal
, c.[name] AS dist_column_name
FROM
sys.schemas AS s
INNER JOIN sys.tables AS t ON t.[schema_id] = s.[schema_id]
LEFT JOIN sys.pdw_table_distribution_properties AS tdp ON tdp.[object_id] = t.[object_id]
LEFT JOIN sys.pdw_column_distribution_properties AS cdp ON cdp.[object_id] = tdp.[object_id]
AND cdp.distribution_ordinal > 0
LEFT JOIN sys.columns AS c ON c.[object_id] = cdp.[object_id]
AND c.[column_id] = cdp.[column_id]
WHERE 1=1;
RAISERROR ('Data inserted.', 0, 1) WITH NOWAIT
RAISERROR ('', 0, 1) WITH NOWAIT
RAISERROR ('Creating table [dbo].[omniloader_cache_table_size]...', 0, 1) WITH NOWAIT
IF (OBJECT_ID('dbo.omniloader_cache_table_size','U')) IS NOT NULL DROP TABLE dbo.omniloader_cache_table_size;
CREATE TABLE dbo.omniloader_cache_table_size
(
table_schema VARCHAR(512) NOT NULL
, table_name VARCHAR(512) NOT NULL
, partition_number INT NULL
, record_count BIGINT NULL
, byte_count BIGINT NULL
)
WITH
(
DISTRIBUTION = HASH(table_name)
, CLUSTERED INDEX (table_name, table_schema)
);
RAISERROR ('Table created.', 0, 1) WITH NOWAIT
RAISERROR ('Inserting data...', 0, 1) WITH NOWAIT
INSERT INTO dbo.omniloader_cache_table_size
SELECT
s.[name] AS table_schema
, t.[name] AS table_name
, nps.partition_number AS partition_number
, SUM(nps.row_count) AS record_count
, SUM( nps.in_row_data_page_count
+ nps.row_overflow_used_page_count
+ nps.lob_used_page_count) * 8 * 1024 AS byte_count
FROM
sys.schemas AS s
INNER JOIN sys.tables AS t ON t.[schema_id] = s.[schema_id]
LEFT JOIN sys.indexes AS i ON i.[object_id] = t.[object_id]
AND i.index_id <= 1
LEFT JOIN sys.pdw_table_mappings AS tm ON tm.[object_id] = t.[object_id]
LEFT JOIN sys.pdw_nodes_tables AS nt ON nt.[name] = tm.physical_name
LEFT JOIN sys.dm_pdw_nodes AS pn ON pn.pdw_node_id = nt.pdw_node_id
AND pn.[type] = 'COMPUTE'
LEFT JOIN sys.dm_pdw_nodes_db_partition_stats AS nps ON nps.[object_id] = nt.[object_id]
AND nps.pdw_node_id = nt.pdw_node_id
AND nps.distribution_id = nt.distribution_id
WHERE 1=1
GROUP BY
s.[name]
, t.[name]
, nps.partition_number;
RAISERROR ('Data inserted.', 0, 1) WITH NOWAIT
RAISERROR ('', 0, 1) WITH NOWAITLast updated