填充間隙我有一個數據表,如下SQL Server 2008帶有尺寸
#data
---------------
Account AccountType
---------------
1 2
2 0
3 5
4 2
5 1
6 5
ACCOUNTTYPE 2是報頭和5是總計。類型2的含義賬戶必須照看下一個1或0以確定其Dim值是1還是0.類型5的彙總必須查找最接近的1或0來確定其Dim值。類型1或0的帳戶的類型爲Dim。 2型
賬戶顯示爲島嶼所以它不夠的,只是檢查ROWNUMBER + 1和同樣爲5型
我一直在使用CTE的下表在到達的accounsts。但無法找到一個快速的方式,從這裏到我的賬戶,ACCOUNTTYPE的最終結果,昏暗的所有帳戶的
T3
-------------------
StartRow EndRow AccountType Dim
-------------------
1 1 2 0
2 2 0 0
3 3 5 0
4 4 2 1
5 5 0 1
6 6 5 1
下面的代碼是MS TSQL複製粘貼這一切,並看到它運行。在CTE select語句中的最後一次連接對於需要30秒的500行甚至是非常緩慢的。我有100.000行我需要處理。我完成了一個基於遊標的解決方案,它在10-20秒內完成了這一工作,並提供了一個快速遞歸CTE解決方案,可在5秒內完成100.000行,但它依賴於#data表的碎片。我應該補充說這個簡化的實際問題有很多需要考慮的維度。但對於這個簡單的問題,它將起到同樣的作用。
無論如何,有沒有一種快速的方法來做到這一點使用連接或其他基於集的解決方案。
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#data') IS NOT NULL
DROP TABLE #data
CREATE TABLE #data
(
Account INTEGER IDENTITY(1,1),
AccountType INTEGER,
)
BEGIN -- TEST DATA
DECLARE @Counter INTEGER = 0
DECLARE @MaxDataRows INTEGER = 50 -- Change here to check performance
DECLARE @Type INTEGER
WHILE(@Counter < @MaxDataRows)
BEGIN
SET @Type = CASE
WHEN @Counter % 10 < 3 THEN 2
WHEN @Counter % 10 >= 8 THEN 5
WHEN @Counter % 10 >= 3 THEN (CASE WHEN @Counter < @MaxDataRows/2.0 THEN 0 ELSE 1 END)
ELSE 0
END
INSERT INTO #data VALUES(@Type)
SET @Counter = @Counter + 1
END
END -- TEST DATA END
;WITH groupIds_cte AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY AccountType ORDER BY Account) - Account AS GroupId
FROM #data
),
islandRanges_cte AS
(
SELECT
MIN(Account) AS StartRow,
MAX(Account) AS EndRow,
AccountType
FROM groupIds_cte
GROUP BY GroupId,AccountType
),
T3 AS
(
SELECT I.*, J.AccountType AS Dim
FROM islandRanges_cte I
INNER JOIN islandRanges_cte J
ON (I.EndRow + 1 = J.StartRow AND I.AccountType = 2)
UNION ALL
SELECT I.*, J.AccountType AS Dim
FROM islandRanges_cte I
INNER JOIN islandRanges_cte J
ON (I.StartRow - 1 = J.EndRow AND I.AccountType = 5)
UNION ALL
SELECT *, AccountType AS Dim
FROM islandRanges_cte
WHERE AccountType = 0 OR AccountType = 1
),
T4 AS
(
SELECT Account, Dim
FROM (
SELECT FlattenRow AS Account, StartRow, EndRow, Dim
FROM T3 I
CROSS APPLY (VALUES(StartRow),(EndRow)) newValues (FlattenRow)
) T
)
--SELECT * FROM T3 ORDER BY StartRow
--SELECT * FROM T4 ORDER BY Account
-- Final correct result but very very slow
SELECT D.Account, D.AccountType, I.Dim FROM T3 I
INNER JOIN #data D
ON D.Account BETWEEN I.StartRow AND I.EndRow
ORDER BY Account
編輯與一段時間的測試
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#data') IS NULL
CREATE TABLE #times
(
RecId INTEGER IDENTITY(1,1),
Batch INTEGER,
Method NVARCHAR(255),
MethodDescription NVARCHAR(255),
RunTime INTEGER
)
IF OBJECT_ID('tempdb..#batch') IS NULL
CREATE TABLE #batch
(
Batch INTEGER IDENTITY(1,1),
Bit BIT
)
INSERT INTO #batch VALUES(0)
IF OBJECT_ID('tempdb..#data') IS NOT NULL
DROP TABLE #data
CREATE TABLE #data
(
Account INTEGER
)
CREATE NONCLUSTERED INDEX data_account_index ON #data (Account)
IF OBJECT_ID('tempdb..#islands') IS NOT NULL
DROP TABLE #islands
CREATE TABLE #islands
(
AccountFrom INTEGER ,
AccountTo INTEGER,
Dim INTEGER,
)
CREATE NONCLUSTERED INDEX islands_from_index ON #islands (AccountFrom, AccountTo, Dim)
BEGIN -- TEST DATA
INSERT INTO #data
SELECT TOP 100000 ROW_NUMBER() OVER(ORDER BY t1.number) AS N
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
INSERT INTO #islands
SELECT MIN(Account) AS Start, MAX(Account), Grp
FROM (SELECT *, NTILE(10) OVER (ORDER BY Account) AS Grp FROM #data) T
GROUP BY Grp ORDER BY Start
END -- TEST DATA END
--SELECT * FROM #data
--SELECT * FROM #islands
--PRINT CONVERT(varchar(20),DATEDIFF(MS,@RunDate,GETDATE()))+' ms Sub Query'
DECLARE @RunDate datetime
SET @RunDate=GETDATE()
SELECT Account, (SELECT Dim From #islands WHERE Account BETWEEN AccountFrom AND AccountTo) AS Dim
FROM #data
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'subquery','',DATEDIFF(MS,@RunDate,GETDATE()))
SET @RunDate=GETDATE()
SELECT D.Account, V.Dim
FROM #data D
CROSS APPLY
(
SELECT Dim From #islands V
WHERE D.Account BETWEEN V.AccountFrom AND V.AccountTo
) V
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'crossapply','',DATEDIFF(MS,@RunDate,GETDATE()))
SET @RunDate=GETDATE()
SELECT D.Account, I.Dim
FROM #data D
JOIN #islands I
ON D.Account BETWEEN I.AccountFrom AND I.AccountTo
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'join','',DATEDIFF(MS,@RunDate,GETDATE()))
SET @RunDate=GETDATE()
;WITH cte AS
(
SELECT Account, AccountFrom, AccountTo, Dim, 1 AS Counting
FROM #islands
CROSS APPLY (VALUES(AccountFrom),(AccountTo)) V (Account)
UNION ALL
SELECT Account + 1 ,AccountFrom, AccountTo, Dim, Counting + 1
FROM cte
WHERE (Account + 1) > AccountFrom AND (Account + 1) < AccountTo
)
SELECT Account, Dim, Counting FROM cte OPTION(MAXRECURSION 32767)
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'recursivecte','',DATEDIFF(MS,@RunDate,GETDATE()))
可以從#times表中選擇,看的運行時間:)
。 。如果您有工作代碼,請將其發佈在您的問題中。 –
我試着發佈最低工作示例!謝謝 – CodeMonkey
我編輯了這個問題,以更好地反映我在找什麼,幷包含示例代碼。謝謝! – CodeMonkey