2014-10-30 205 views
1

我正在爲數據倉庫生成[Dim_Calendar]表。我在下面開發了一個查詢,需要29秒來執行&插入27k行。如果可能,我想優化它。我明白,雖然循環不幫助性能,我不知道如何取代它來實現相同的結果。如何優化INSERT語句

我正在使用SQL Server 2012 BI版本。

IF EXISTS(SELECT * FROM sys.indexes WHERE name='PK_Dim_Calendar_1' AND object_id = OBJECT_ID('Dim_Calendar')) 
BEGIN 
    ALTER TABLE [dbo].[Dim_Calendar] DROP CONSTRAINT [PK_Dim_Calendar_1] 
END 

SET DATEFIRST 1--Sets Monday as 1st day of the week. 
DECLARE @today DATETIME = (SELECT GETDATE()) 
DECLARE @start DATETIME = DATEADD(dd, 1, (SELECT Max(date) FROM Dim_Calendar)) 

IF @start IS NULL 
BEGIN 
    INSERT INTO [dbo].[Dim_Calendar] 
    VALUES (19000101, '1900-01-01', 'Monday', 1 ,'Unknown', 1, 'January', 1900, 1) 
    SET @start = '1940-01-01' 
END 


DECLARE @end DATETIME = (SELECT DATEFROMPARTS(YEAR(@today), 12, 31)) 


WHILE @start <= @end 
    BEGIN 

     INSERT INTO [dbo].[Dim_Calendar] 
      SELECT 
       YEAR(@start) * 10000 + MONTH(@start) * 100 + DAY(@start) 
       ,@start 
       ,DATENAME(dw, @start) 
       ,DATEPART(wk, @start) 
       ,'w/c ' + CONVERT(char(8), DATEADD(dd, 1 - DATEPART(dw, @start), @start), 3) 
       ,DATEPART(mm, @start) 
       ,DATENAME(MONTH, @start) 
       ,YEAR(@start) 
       ,DATEPART(QQ, @start) 
     SET @start = DATEADD(dd, 1, @start) 
    END 


ALTER TABLE [dbo].[Dim_Calendar] ADD CONSTRAINT [PK_Dim_Calendar_1] PRIMARY KEY CLUSTERED 
(
    [FullDateID] ASC 
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 
GO 

回答

2

查詢很慢,因爲您使用循環一次創建一個值。

處理值序列時,有一個'數字'表格,數字從1到數字無論您想要多少都是很有用的。通過選擇或加入該表,您可以生成序列,識別空位等。Aaron Bertrand編寫了一個series of articles關於如何創建Numbers表並使用它創建一組日期。

假設你已經有了這樣一個Numbers(n),創建一個日曆表很簡單,只要:

DECLARE @start DATE = '2005-07-01'; 
DECLARE @end DATE = DATEADD(DAY, -1, DATEADD(YEAR, 30, @start)); 

DECLARE @days int = DATEDIFF(DAY, @start, @end) + 1 

SELECT TOP (@days) 
    d = CONVERT(DATE, DATEADD(DAY, n-1, @start)) 
INTO dbo.Calendar 
FROM dbo.Numbers ORDER BY n; 

在你的情況SELECT部分將是這樣的:

;WITH Dates (d) 
AS (
    SELECT TOP (@days) 
     d = CONVERT(DATE, DATEADD(DAY, n-1, @start)) 
    FROM dbo.Numbers 
    ORDER BY d) 
SELECT 
    YEAR(d) * 10000 + MONTH(d) * 100 + DAY(d) 
    ,d 
    ,DATENAME(dw, d) ,DATEPART(wk, d) 
    ,'w/c ' + CONVERT(char(8), DATEADD(dd, 1 - DATEPART(dw, d), d), 3) 
    ,DATEPART(mm, d) 
    ,DATENAME(MONTH, d) 
    ,YEAR(d) 
    ,DATEPART(QQ, d) 
from Dates 

要生成的數字表可以使用下面的語句:

SELECT TOP (1000000) n = CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])) 
INTO dbo.Numbers 
FROM sys.all_objects AS s1 CROSS JOIN sys.all_objects AS s2 
OPTION (MAXDOP 1); 

CREATE UNIQUE CLUSTERED INDEX n ON dbo.Numbers(n) 
2

這應該爲你工作(快得):

;WITH [dates] 
AS 
    (SELECT @start AS [date] 
    UNION ALL 
    SELECT DATEADD(d, 1, [date]) AS [date] 
    FROM [dates] 
    WHERE [date] < @end) 

SELECT 
     YEAR([date]) * 10000 + MONTH([date]) * 100 + DAY([date]) 
       ,[date] 
       ,DATENAME(dw, [date]) 
       ,DATEPART(wk, [date]) 
       ,'w/c ' + CONVERT(char(8), DATEADD(dd, 1 - DATEPART(dw, [date]), [date]), 3) 
       ,DATEPART(mm, [date]) 
       ,DATENAME(MONTH, [date]) 
       ,YEAR([date]) 
       ,DATEPART(QQ, [date]) 
FROM [dates] 
OPTION (MAXRECURSION 32747); 
+0

這是14慢於Numbers表格的小數字慢了大量的時間,35倍,按[阿龍貝特朗的基準(HTTP ://sqlperformance.com/2013/01/t-sql-queries/generate-a-set-2)。你已經用循環替換循環,但是你仍然產生一個沒有索引好處的值 – 2014-10-30 10:24:23

+0

@PanagiotisKanavos OP要求加快插入速度的方法,這可能不是最快的方法,但是在我的開發箱中仍然只花了643毫秒才能完成,這比他的29秒要快得多。 – 2014-10-30 10:34:29

0
Try this 

IF EXISTS(SELECT * 
      FROM sys.indexes 
      WHERE name = 'PK_Dim_Calendar_1' 
       AND object_id = OBJECT_ID('Dim_Calendar')) 
    BEGIN 
     ALTER TABLE [dbo].[Dim_Calendar] 
     DROP CONSTRAINT [PK_Dim_Calendar_1] 
    END 

SET DATEFIRST 1--Sets Monday as 1st day of the week. 
DECLARE @today DATETIME = (SELECT GETDATE()) 
DECLARE @start DATETIME = DATEADD(dd, 1, (SELECT Max(date) 
        FROM Dim_Calendar)) 

IF @start IS NULL 
    BEGIN 
     INSERT INTO [dbo].[Dim_Calendar] 
     VALUES  (19000101, 
        '1900-01-01', 
        'Monday', 
        1, 
        'Unknown', 
        1, 
        'January', 
        1900, 
        1) 

     SET @start = '1940-01-01' 
    END 

DECLARE @end DATETIME = (SELECT DATEFROMPARTS(YEAR(@today), 12, 31)); 

WITH T(start) 
    AS (SELECT @start 
     UNION ALL 
     SELECT start + 1 
     FROM T 
     WHERE T.start < @end) 
INSERT INTO [dbo].[Dim_Calendar] 
SELECT YEAR(start) * 10000 + MONTH(start) * 100 + DAY(start), 
     start, 
     DATENAME(dw, start), 
     DATEPART(wk, start), 
     'w/c ' 
     + CONVERT(CHAR(8), DATEADD(dd, 1 - DATEPART(dw, start), start), 3), 
     DATEPART(mm, start), 
     DATENAME(MONTH, start), 
     YEAR(start), 
     DATEPART(QQ, start) 
FROM T 
OPTION (MAXRECURSION 0); 

ALTER TABLE [dbo].[Dim_Calendar] 
    ADD CONSTRAINT [PK_Dim_Calendar_1] PRIMARY KEY CLUSTERED ([FullDateID] ASC)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 

GO 
+0

爲什麼以及這個答案如何幫助OP實現目標?請解釋您的答案,不僅是爲了OP,而且是爲了進一步的迴應。謝謝。欲瞭解更多信息,請閱讀[答] – Pred 2014-10-30 10:49:49

+0

我們的最終目標是消除while循環。如果我們看實際的代碼,@ start值已經增加,即使這是這個代碼中的關鍵部分。我們可以在CTE的幫助下做到這一點,而不是while循環來加速插入語句。 – StackUser 2014-10-30 10:58:38

+0

這是史蒂夫福特發佈的同樣的遞歸CTE。 – 2014-10-30 11:08:00