2009-11-06 111 views
10

我想做一些快速插入,但避免重複到表中。 爲了論證的緣故,我們稱之爲MarketPrices,我一直在嘗試兩種做法,但不知道如何進行基準測試,速度會更快。SQL插入,但避免重複

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
EXCEPT 
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices 
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool) as j 

OR

DECLARE @MktId int 
SET @MktId = (SELECT SecurityId FROM MarketPrices 
       where SecurityCode = @SecurityCode 
       and [email protected] 
       and SellPrice = @SellPrice) 

IF (@MktId is NULL) 
BEGIN 
    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen) 
END 

假設@whatever是在存儲過程中的輸入參數。

我希望能夠在BuyPrice或SellPrice或兩者都不同於以前的每一次發生時爲每個SecurityCode插入新記錄。我不關心IsMarketOpen。

有沒有什麼明顯愚蠢的關於上述任何一種方法?一個比另一個快嗎?

+3

記住,第二方法應該被包含在一個交易中,其他明智的你可能會遇到併發問題。 – 2009-11-06 16:37:57

+1

難道你不能只是創建一個唯一的索引?我沒有在MS SQL的經驗,但我認爲應該有這樣的傾向 – 2009-11-06 16:41:19

+3

@valya:有趣的人們如何懷疑SQL Server甚至可以做到最簡單的事情。我甚至不確定是否可以在沒有*支持唯一索引的情況下實現關係數據庫引擎*。 – Tomalak 2009-11-06 16:50:31

回答

11

EDIT:防止race conditions併發環境中,在相關子查詢使用WITH (UPDLOCK)EXCEPT倒是SELECT。我在下面寫的測試腳本不需要它,因爲它使用只對當前連接可見的臨時表,但是在真實環境中,對用戶表進行操作時,這是非常必要的。

MERGE不需要UPDLOCK


通過MCL的回答再次啓發:唯一索引&讓數據庫拋出一個錯誤,我決定把基準conditional insertstry/catch

結果似乎支持了try/catch語句條件插入,但情況因人而異。這是一個非常簡單的場景(一列,小桌子等),一臺機器上執行,等等

下面是結果(SQL Server 2008中,構建10.0.1600.2):

duplicates (short table)  
    try/catch:    14440 milliseconds/100000 inserts 
    conditional insert:  2983 milliseconds/100000 inserts 
    except:     2966 milliseconds/100000 inserts 
    merge:      2983 milliseconds/100000 inserts 

uniques 
    try/catch:     3920 milliseconds/100000 inserts 
    conditional insert:  3860 milliseconds/100000 inserts 
    except:     3873 milliseconds/100000 inserts 
    merge:      3890 milliseconds/100000 inserts 

    straight insert:   3173 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:    14436 milliseconds/100000 inserts 
    conditional insert:  3063 milliseconds/100000 inserts 
    except:     3063 milliseconds/100000 inserts 
    merge:      3030 milliseconds/100000 inserts 

通知,即使在獨特的插入上,也有略微比嘗試/ catch更多的開銷比條件插入。我想知道這是否因版本,CPU,內核數量等而異。

我沒有基準IF條件插入,只是WHERE。我認爲IF變種會顯示更多的開銷,因爲a)你會有兩個語句,b)你需要將兩個語句包裝在一個事務中,並將隔離級別設置爲可序列化(!)。如果有人想要來測試這個,你需要將臨時表更改爲常規用戶表(可序列化不適用於本地臨時表)。

下面是腳本:

-- tested on SQL 2008. 
-- to run on SQL 2005, comment out the statements using MERGE 
set nocount on 

if object_id('tempdb..#temp') is not null drop table #temp 
create table #temp (col1 int primary key) 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 1 record 

------------------------------------------------------- 

insert #temp values (1) 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

------------------------------------------------------- 

-- unique insert test against an initially empty table 

------------------------------------------------------- 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, straight insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, except: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

-- comment this batch out for SQL 2005 
truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 1, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, merge: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 100000 records 

------------------------------------------------------- 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 
+1

在這裏使用唯一索引的主要原因是爲了保證數據的完整性。我懷疑try/catch塊中的失敗插入不會成爲大多數應用程序的瓶頸,尤其是在沒有大量嘗試插入重複的情況下(因爲您的基準測試顯示類似的性能案件)。但是我懷疑擁有一個沒有強制的數據模型會在某個時候出現問題。另外,在SQL Server 2008上,我會建議探索MERGE在其他策略中的使用。 – mlibby 2009-11-06 17:47:42

+1

@mcl re:獨特的索引,我完全同意,他應該有一個數據完整性的索引,如果他想要合理的性能,他將需要一個索引。回覆:MERGE,我只是測試了它,它在所有場景中執行*非常類似於條件插入。 – 2009-11-06 17:51:51

+0

謝謝你們,我希望我能接受你們的答案。爲了數據完整性,我將在其上放置一個唯一的索引,然後使用條件插入,因爲它在性能和可讀性方面似乎是最好的。 – Ravi 2009-11-06 23:34:48

6

EDIT:爲防止race conditions在併發環境中,請在相關子查詢中使用WITH (UPDLOCK)


我認爲這將是標準的方法:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
WHERE NOT EXISTS (
    SELECT * FROM MarketPrices WITH (UPDLOCK) 
    WHERE SecurityCode = @SecurityCode 
    AND BuyPrice = @BuyPrice 
    AND SellPrice = @SellPrice 
) 

如果您的字段是空的,你必須將它添加到條件。

你的第一種方法很有趣,但對於EXCEPT的要求你有沒有經歷過。這個方法本質上是一樣的,但它會讓你圍繞着列匹配問題。

或者:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen 
FROM (
    SELECT @SecurityCode, @BuyPrice, @SellPrice 
    EXCEPT 
    SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK) 
) a (SecurityCode, BuyPrice, SellPrice) 

有關除了在這種情況下做的好處是,它處理空值,而無需您任何額外的編碼。爲了在第一個例子中達到同樣的效果,你需要長時間測試每一對NULL和平等。

你的第二種方法是可以的,但你不需要變量。看到Tomalak的解決方案,他很好地清理了它。此外,如果這是一個問題,您需要明確處理併發插入的可能性。

3

我會隨時爲您提供語義解決方案。你的兩個建議對我來說似乎很模糊(雖然後者比前者好)。

IF NOT EXISTS (
    SELECT 1 
    FROM MarketPrices 
    WHERE SecurityCode = @SecurityCode 
     AND BuyPrice = @BuyPrice 
     AND SellPrice = @SellPrice 
) 
BEGIN 
    INSERT MarketPrices 
    (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen) 
END 

擁有超過SecurityCode, BuyPrice, SellPriceEXISTS查詢應該相當快的礫岩指數。

基準測試是一個計時WHILE循環的問題,我會說。測試一下,看看你自己。

2

另一種選擇:在相關字段(SecurityCode,BuyPrice,SellPrice)上創建一個唯一索引,發出一個簡單的插入,並讓數據庫確定記錄是否重複。插入嘗試插入重複時會失敗。

使用代碼(無論是外部語言還是SQL處理程序)來保證唯一性不夠嚴格,最終會導致您希望防止的重複。

+0

我在想你可能是對的,特別是當涉及到併發插入時 – Ravi 2009-11-06 16:57:17

+0

我很想看到這個基準。假設一個唯一的索引,它有更多的開銷:條件插入的WHERE子句,或TRY/CATCH塊的異常處理?如果您希望99%的插入*爲*不重複,我想TRY/CATCH塊可能更有效。 – 2009-11-06 16:59:03

+0

我打算做到這一點,當我回家 - 將在這裏發佈結果 – Ravi 2009-11-06 17:04:46

0

,如果你不需要重複的陷阱,你總是可以創建一個「忽略重複」設置爲true的唯一索引。 SQL Server將爲您處理此問題。

1

下面我已經加入了頂級的答案從Only inserting a row if it's not already there彼得Radocchia的出色答卷。

外賣是使用race safe with try/catch技術是輕微(〜1%),比race safe with updlock, holdlock技術快時,有沒有實際的衝突(即你期望的碰撞將是非常罕見的 - 這是uniques情況),並且是當總是發生碰撞時,速度稍慢(〜20%)(這是duplicates方案)。這並沒有將鎖升級等複雜問題考慮在內。

下面是結果(SQL服務器2014年建立12.0.2000。8):

duplicates (short table)  
    try/catch:      15546 milliseconds/100000 inserts 
    conditional insert:    1460 milliseconds/100000 inserts 
    except:       1490 milliseconds/100000 inserts 
    merge:       1420 milliseconds/100000 inserts 
    race safe with try/catch:   1650 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1330 milliseconds/100000 inserts 

uniques 
    try/catch:      2266 milliseconds/100000 inserts 
    conditional insert:    2156 milliseconds/100000 inserts 
    except:       2273 milliseconds/100000 inserts 
    merge:       2136 milliseconds/100000 inserts 
    race safe with try/catch:   2400 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 2430 milliseconds/100000 inserts 

    straight insert:     1686 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:      15826 milliseconds/100000 inserts 
    conditional insert:    1530 milliseconds/100000 inserts 
    except:       1506 milliseconds/100000 inserts 
    merge:       1443 milliseconds/100000 inserts 
    race safe with try/catch:   1636 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1426 milliseconds/100000 inserts 

重複(短表)部分:

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

不重複部分

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

重複(高表)部分

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go