2016-01-19 28 views
0

我一直在琢磨如何實現這個算法在處理SQL個月的實施,https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance在處理SQL的哈羅溫克勒距離算法

如何能不能做到?

+0

這不是真的應該在應用程序的數據層中做的事情的類型;這是一種商業/邏輯層的東西。 –

+0

@ roryap爲什麼假定這是針對應用程序的?我受到啓發,因爲需要比較SQL查詢中的字符串,與開發中的任何類型的應用程序無關:) – Maritim

+0

只是指出了遇到此問題的人,可能會誤導人們認爲這是一個好主意這種東西在具有數據訪問層的應用程序中。你沒有提到這件事,所以我認爲這很重要。 –

回答

3

今天我終於偶然發現了this堆棧溢出 - leebickmtu顯示了C#中的一個實現,最初是從Java移植過來的。我冒昧地將其移植到Transact SQL函數中,盡情享受吧!

IF OBJECT_ID (N'dbo.InlineMax', N'FN') IS NOT NULL 
    DROP FUNCTION dbo.InlineMax; 
GO 

CREATE FUNCTION dbo.InlineMax(@valueOne int, @valueTwo int) 
    RETURNS FLOAT 
AS 
BEGIN 
    IF @valueOne > @valueTwo 
    BEGIN 
     RETURN @valueOne 
    END 

    RETURN ISNULL(@valueTwo, @valueOne) 
END; 
GO 

IF OBJECT_ID (N'dbo.InlineMin', N'FN') IS NOT NULL 
    DROP FUNCTION dbo.InlineMin; 
GO 

CREATE FUNCTION dbo.InlineMin(@valueOne int, @valueTwo int) 
    RETURNS FLOAT 
AS 
BEGIN 
    IF @valueOne < @valueTwo 
     RETURN @valueOne 

    RETURN @valueTwo 
END; 
GO 

IF OBJECT_ID (N'dbo.JaroWinklerDistance', N'FN') IS NOT NULL 
    DROP FUNCTION dbo.JaroWinklerDistance; 
GO 

CREATE FUNCTION dbo.JaroWinklerDistance(@stringOne varchar(MAX), @stringTwo varchar(MAX)) 
RETURNS FLOAT 
WITH EXECUTE AS CALLER 
AS 
BEGIN 
    DECLARE @mWeightThreshold FLOAT; SET @mWeightThreshold = 0.7; 
    DECLARE @mNuMChars INT; SET @mNumChars = 4; 
    DECLARE @lLen1 int; SET @lLen1 = LEN(@stringOne) 
    DECLARE @lLen2 int; SET @lLen2 = LEN(@stringTwo) 

    IF @lLen1 = 0 
     RETURN CASE WHEN @lLen2 = 0 THEN 1 ELSE 0 END 

    DECLARE @lSearchRange int; SET @lSearchRange = dbo.InlineMax(0,dbo.InlineMax(@lLen1, @lLen2)/2 - 1); 

    DECLARE @lMatched1 TABLE (position int not null, [status] bit not null) 
    DECLARE @lMatched2 TABLE (position int not null, [status] bit not null) 

    DECLARE @lNumCommon int; SET @lNumCommon = 0 
    DECLARE @i int; SET @i = 1; WHILE(@i <= @lLen1) 
    BEGIN 
     DECLARE @lStart int; SET @lStart = dbo.InlineMax(1, @i - @lSearchRange) 
     DECLARE @lEnd int; SET @lEnd = dbo.InlineMin(@i + @lSearchRange + 1, @lLen2) 

     DECLARE @j int; SET @j = @lStart; WHILE(@j <= @lEnd) 
     BEGIN 
      IF((SELECT [status] FROM @lMatched2 WHERE position = @j) = 1) 
      BEGIN 
       SET @j = @j + 1 
       CONTINUE 
      END 

      IF (SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @j, 1)) 
      BEGIN 
       SET @j = @j + 1 
       CONTINUE 
      END 

      INSERT INTO @lMatched1 (position, [status]) VALUES(@i, 1) 
      INSERT INTO @lMatched2 (position, [status]) VALUES(@j, 1) 

      SET @lNumCommon = @lNumCommon + 1 
      BREAK 
     END 

     SET @i = @i + 1 
    END 

    IF @lNumCommon = 0 
    BEGIN 
     RETURN 0.0; 
    END 

    DECLARE @lNumHalfTransposed int; SET @lNumHalfTransposed = 0 
    DECLARE @k INT; SET @k = 1; 
    DECLARE @stopLoop bit; SET @stopLoop = 0; 
    SET @i = 1; WHILE(@i <= @lLen1) 
    BEGIN 
     IF ((SELECT [status] FROM @lMatched1 WHERE position = @i) = 1) 
     BEGIN 
      SET @i = @i + 1 
      CONTINUE; 
     END 

     WHILE(@stopLoop = 0) 
     BEGIN 
      IF((SELECT [status] FROM @lMatched2 WHERE position = @k) = 0) 
       SET @k = @k + 1 
      ELSE 
       BREAK 

      IF((SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @k, 1))) 
       SET @lNumHalfTransposed = @lNumHalfTransposed + 1 

      SET @k = @k + 1 
     END 

     SET @i = @i + 1 
    END 

    DECLARE @lNumTransposed INT; SET @lNumTransposed = @lNumHalfTransposed/2; 

    DECLARE @lNumCommonD FLOAT; SET @lNumCommonD = @lNumCommon 
    DECLARE @lWeight FLOAT; SET @lWeight = (@lNumCommonD/@lLen1 + @lNumCommonD/@lLen2 + (@lNumCommon - @lNumTransposed)/@lNumCommonD)/3.0; 

    IF(@lWeight <= @mWeightThreshold) 
     RETURN @lWeight 
    DECLARE @lMax INT; SET @lMax = dbo.InlineMin(@mNumChars, dbo.InlineMin(@lLen1, @lLen2)) 
    DECLARE @lPos INT; SET @lPos = 0 
    WHILE(@lPos < @lMax AND (SELECT SUBSTRING(@stringOne, @lPos, 1)) = (SELECT SUBSTRING(@stringTwo, @lPos, 1))) 
    BEGIN 
     SET @lPos = @lPos + 1 
    END 

    IF @lPos = 0 
     RETURN @lWeight 

    RETURN @lWeight + 0.1 * @lPos * (1.0 - @lWeight) 
END; 
GO 
+1

現在在更多的行上試試這個,並等待.............. mots DBMS允許在C/Java /等中創建UDF是有原因的。 :-) – dnoeth