2
我是SQL Server的新手,正在處理記錄日誌的項目。SQL服務器如果不存在,插入並將插入的ID插入到另一個表中
該表的URL列varchar(max)具有重複值。我創建了只存儲不同的URL和ID存儲在主表
這裏是我的存儲過程來做到這一點另一個表:
CREATE TABLE #TestData (
logdate DATETIME,
id CHAR(15),
value VARCHAR(max)
)
DECLARE @sql VARCHAR(max)
SET @sql = 'BULK INSERT [dbo].[#TestData] FROM ''' + @pfile + ''' WITH (
firstrow = 2,
fieldterminator = ''\t'',
rowterminator = ''\n''
)'
EXEC (@sql)
create table #testurl(fld varchar(max))
INSERT INTO #testurl(fld) (
SELECT distinct (
CASE
WHEN (PATINDEX('%url="%', value) > 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), ''))
END
) FROM #TestData)
INSERT INTO url (urlvalue) (
SELECT tu.fld FROM #testurl tu WHERE NOT EXISTS (
SELECT urlid
FROM url u
WHERE u.urlvalue = tu.fld))
INSERT INTO [Cyberoam].[dbo].[logmst] (
DATETIME,
c1c2,
c3c4,
c5c6,
c7,
c8to12,
STATUS,
username,
usergrp,
application,
category,
categorytype,
urlid,
recvbytes,
sentbytes,
fw_rule_id,
srcip,
dstip,
contenttype
)
SELECT logdate,
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 7), 2),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 9), 2),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 11), 2),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 13), 1),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 14), 5),
CASE
WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Allow'
THEN '1'
WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Deny'
THEN '0'
ELSE NULL
END,
CASE
WHEN (ISNULL(PATINDEX('%user_name="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%user_name="%', value) + 11),(CHARINDEX('"', value, (PATINDEX('%user_name="%', value) + 11)) - (PATINDEX('%user_name="%', value) + 11))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%user_gp="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%user_gp="%', value) + 9), (CHARINDEX('"', value, (PATINDEX('%user_gp="%', value) + 9)) - (PATINDEX('%user_gp="%', value) + 9))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%application="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%application="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%application="%', value) + 13)) - (PATINDEX('%application="%', value) + 13))), ''))
WHEN (isnull(PATINDEX('%application_name="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%application_name="%', value) + 18), (CHARINDEX('"', value, (PATINDEX('%application_name="%', value) + 18)) - (PATINDEX('%application_name="%', value) + 18))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%category="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%category="%', value) + 10), (CHARINDEX('"', value, (PATINDEX('%category="%', value) + 10)) - (PATINDEX('%category="%', value) + 10))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%category_type="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%category_type="%', value) + 15), (CHARINDEX('"', value, (PATINDEX('%category_type="%', value) + 15)) - (PATINDEX('%category_type="%', value) + 15))), ''))
ELSE NULL
END,
(
SELECT urlid
FROM url
WHERE urlvalue = (
CASE
WHEN (isnull(PATINDEX('%url="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), ''))
ELSE NULL
END
)
),
CASE
WHEN (isnull(PATINDEX('%recv_bytes=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), 20), ''))))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%sent_bytes=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), 20), ''))))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%fw_rule_id=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%fw_rule_id=%', value) + 11), (CHARINDEX(' ', value, (PATINDEX('%fw_rule_id=%', value) + 11)) - (PATINDEX('%fw_rule_id=%', value) + 11))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%src_ip=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%src_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%src_ip=%', value) + 7)) - (PATINDEX('%src_ip=%', value) + 7))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%dst_ip=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%dst_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%dst_ip=%', value) + 7)) - (PATINDEX('%dst_ip=%', value) + 7))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%contenttype="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%contenttype="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%contenttype="%', value) + 13)) - (PATINDEX('%contenttype="%', value) + 13))), ''))
ELSE NULL
END
FROM #TestData
此代碼工作正常,但問題是,花費大約5K記錄文件運行批量插入所花費的時間會逐漸增加(因爲URL表增加到20分鐘,其中約有5k條記錄)。將插入許多此類文件。
需要,我怎樣才能提高性能您的建議,或者如果我做錯事
感謝您的幫助很大。謝謝!
注意:如果URL列在同一個表中,則需要大約4-7秒。 如果它在同一個表中或分開,它會使性能有所不同嗎?
如果您只運行BULK INSERT而沒有後續的表格構建邏輯,需要多長時間? – toddsonofodin
它需要4-7秒,如果我保持在同一個表中的URL字段,但我認爲這將影響我的選擇語句,這就是爲什麼我分開它的表現。 – Kai
Value varchchar(max)長於7971個字符的記錄的百分比是多少?這個領域的平均長度是多少? – Stoleg