2014-03-27 56 views
2

我是SQL Server的新手,正在處理記錄日誌的項目。SQL服務器如果不存在,插入並將插入的ID插入到另一個表中

該表的URL列varchar(max)具有重複值。我創建了只存儲不同的URL和ID存儲在主表

這裏是我的存儲過程來做到這一點另一個表:

CREATE TABLE #TestData (
    logdate DATETIME, 
    id CHAR(15), 
    value VARCHAR(max) 
    ) 

DECLARE @sql VARCHAR(max) 

SET @sql = 'BULK INSERT [dbo].[#TestData] FROM ''' + @pfile + ''' WITH (
    firstrow = 2, 
    fieldterminator = ''\t'', 
    rowterminator = ''\n'' 
    )' 

EXEC (@sql) 

create table #testurl(fld varchar(max)) 

INSERT INTO #testurl(fld) (
     SELECT distinct (
      CASE 
       WHEN (PATINDEX('%url="%', value) > 0) 
        THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), '')) 
       END 
      ) FROM #TestData) 

    INSERT INTO url (urlvalue) (
     SELECT tu.fld FROM #testurl tu WHERE NOT EXISTS (
      SELECT urlid 
      FROM url u 
      WHERE u.urlvalue = tu.fld)) 

    INSERT INTO [Cyberoam].[dbo].[logmst] (
     DATETIME, 
     c1c2, 
     c3c4, 
     c5c6, 
     c7, 
     c8to12, 
     STATUS, 
     username, 
     usergrp, 
     application, 
     category, 
     categorytype, 
     urlid, 
     recvbytes, 
     sentbytes, 
     fw_rule_id, 
     srcip, 
     dstip, 
     contenttype 
     ) 
    SELECT logdate, 
     SUBSTRING(value, (PATINDEX('%log_id=%', value) + 7), 2), 
     SUBSTRING(value, (PATINDEX('%log_id=%', value) + 9), 2), 
     SUBSTRING(value, (PATINDEX('%log_id=%', value) + 11), 2), 
     SUBSTRING(value, (PATINDEX('%log_id=%', value) + 13), 1), 
     SUBSTRING(value, (PATINDEX('%log_id=%', value) + 14), 5), 
     CASE 
      WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Allow' 
       THEN '1' 
      WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Deny' 
       THEN '0' 
      ELSE NULL 
      END, 
     CASE 
      WHEN (ISNULL(PATINDEX('%user_name="%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%user_name="%', value) + 11),(CHARINDEX('"', value, (PATINDEX('%user_name="%', value) + 11)) - (PATINDEX('%user_name="%', value) + 11))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%user_gp="%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%user_gp="%', value) + 9), (CHARINDEX('"', value, (PATINDEX('%user_gp="%', value) + 9)) - (PATINDEX('%user_gp="%', value) + 9))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%application="%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%application="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%application="%', value) + 13)) - (PATINDEX('%application="%', value) + 13))), '')) 
      WHEN (isnull(PATINDEX('%application_name="%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%application_name="%', value) + 18), (CHARINDEX('"', value, (PATINDEX('%application_name="%', value) + 18)) - (PATINDEX('%application_name="%', value) + 18))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%category="%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%category="%', value) + 10), (CHARINDEX('"', value, (PATINDEX('%category="%', value) + 10)) - (PATINDEX('%category="%', value) + 10))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%category_type="%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%category_type="%', value) + 15), (CHARINDEX('"', value, (PATINDEX('%category_type="%', value) + 15)) - (PATINDEX('%category_type="%', value) + 15))), '')) 
      ELSE NULL 
      END, 
      (
      SELECT urlid 
      FROM url 
      WHERE urlvalue = (
        CASE 
         WHEN (isnull(PATINDEX('%url="%', value), 0) <> 0) 
          THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), '')) 
         ELSE NULL 
         END 
        ) 
      ), 
     CASE 
      WHEN (isnull(PATINDEX('%recv_bytes=%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), 20), ''))))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%sent_bytes=%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), 20), ''))))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%fw_rule_id=%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%fw_rule_id=%', value) + 11), (CHARINDEX(' ', value, (PATINDEX('%fw_rule_id=%', value) + 11)) - (PATINDEX('%fw_rule_id=%', value) + 11))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%src_ip=%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%src_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%src_ip=%', value) + 7)) - (PATINDEX('%src_ip=%', value) + 7))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%dst_ip=%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%dst_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%dst_ip=%', value) + 7)) - (PATINDEX('%dst_ip=%', value) + 7))), '')) 
      ELSE NULL 
      END, 
     CASE 
      WHEN (isnull(PATINDEX('%contenttype="%', value), 0) <> 0) 
       THEN (nullif(SUBSTRING(value, (PATINDEX('%contenttype="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%contenttype="%', value) + 13)) - (PATINDEX('%contenttype="%', value) + 13))), '')) 
      ELSE NULL 
      END 
    FROM #TestData 

此代碼工作正常,但問題是,花費大約5K記錄文件運行批量插入所花費的時間會逐漸增加(因爲URL表增加到20分鐘,其中約有5k條記錄)。將插入許多此類文件。

需要,我怎樣才能提高性能您的建議,或者如果我做錯事

感謝您的幫助很大。謝謝!

注意:如果URL列在同一個表中,則需要大約4-7秒。 如果它在同一個表中或分開,它會使性能有所不同嗎?

+0

如果您只運行BULK INSERT而沒有後續的表格構建邏輯,需要多長時間? – toddsonofodin

+0

它需要4-7秒,如果我保持在同一個表中的URL字段,但我認爲這將影響我的選擇語句,這就是爲什麼我分開它的表現。 – Kai

+0

Value varchchar(max)長於7971個字符的記錄的百分比是多少?這個領域的平均長度是多少? – Stoleg

回答

2

找到了解決方案。這裏有解決方案,如果有人需要它。 由於URL是varchar(max),所花的時間很長。 我刪除了聚集PK索引。爲url的校驗和添加了一個新列,並在其上創建了聚集索引。 而改變:

SELECT urlid 
     FROM url 
     WHERE urlvalue = (@value) 

到:

SELECT urlid 
     FROM url 
     WHERE checksum_urlvalue = checksum(@value) nad urlvalue = (@value) 

這減少了執行時間7-8秒。 謝謝大家的回覆。 快樂編碼:)

相關問題