我已經編寫了以下兩個字符串(以逗號分隔)的函數,將它們分成兩個不同的臨時表,然後使用這些臨時表來查找這兩個臨時表中匹配的單詞的百分比。問題是,當我在每行約200k行的數據集上使用它時,查詢超時! 是否有任何可以完成的優化?Sql UDF優化
ALTER FUNCTION [GetWordSimilarity](@String varchar(8000),
@String2 varchar(8000),@Delimiter char(1))
returns decimal(16,2)
as
begin
declare @result as decimal (16,2)
declare @temptable table (items varchar(8000))
declare @temptable2 table (items varchar(8000))
declare @numberOfCommonWords decimal(16,2)
declare @countTable1 decimal(16,2)
declare @countTable2 decimal(16,2)
declare @denominator decimal(16,2)
set @result = 0.0 --dummy value
declare @idx int
declare @slice varchar(8000)
select @idx = 1
if len(@String)<1 or @String is null or len(@String2) = 0 or @String2 is null return 0.0
--populating @temptable
while @idx!= 0
begin
set @idx = charindex(@Delimiter,@String)
if @idx!=0
set @slice = left(@String,@idx - 1)
else
set @slice = @String
if(len(@slice)>0)
insert into @temptable(Items) values(ltrim(rtrim(@slice)))
set @String = right(@String,len(@String) - @idx)
if len(@String) = 0 break
end
select @idx = 1
----populating @temptable2
while @idx!= 0
begin
set @idx = charindex(@Delimiter,@String2)
if @idx!=0
set @slice = left(@String2,@idx - 1)
else
set @slice = @String2
if(len(@slice)>0)
insert into @temptable2(Items) values(ltrim(rtrim(@slice)))
set @String2 = right(@String2,len(@String2) - @idx)
if len(@String2) = 0 break
end
--calculating percentage of words match
if (((select COUNT(*) from @temptable) = 0) or ((select COUNT(*) from @temptable2) = 0))
return 0.0
select @numberOfCommonWords = COUNT(*) from
(
select distinct items from @temptable
intersect
select distinct items from @temptable2
) a
select @countTable1 = COUNT (*) from @temptable
select @countTable2 = COUNT (*) from @temptable2
if(@countTable1 > @countTable2) set @denominator = @countTable1
else set @denominator = @countTable2
set @result = @numberOfCommonWords/@denominator
return @result
end
非常感謝!
您可以使用數字表優化分割。在sqlcentral上有一個例子,並解釋它如何工作。我會看看我能否找到它。 – Dreamwalker
謝謝!請讓我知道你是否可以找到它。再次感謝! –
我找到了鏈接http://www.sqlservercentral.com/articles/T-SQL/62867/ Essentialy你用數字製作一個表格並使用它來執行循環。文章是在這裏渴望的方式。 – Dreamwalker