Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/algorithm/12.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Transact-SQL中Jaro-Winkler距离算法的实现_Sql_Algorithm_Tsql_Jaro Winkler - Fatal编程技术网

Transact-SQL中Jaro-Winkler距离算法的实现

Transact-SQL中Jaro-Winkler距离算法的实现,sql,algorithm,tsql,jaro-winkler,Sql,Algorithm,Tsql,Jaro Winkler,几个月来,我一直在想如何在Transact-SQL中实现此算法 如何做到这一点?今天,我终于偶然发现了leebickmtu给出的堆栈溢出答案,它展示了一个C#实现,最初是从Java移植而来的。我冒昧地将它移植到了Transact-SQL函数中,祝您愉快 IF OBJECT_ID (N'dbo.InlineMax', N'FN') IS NOT NULL DROP FUNCTION dbo.InlineMax; GO CREATE FUNCTION dbo.InlineMax(@valu

几个月来,我一直在想如何在Transact-SQL中实现此算法


如何做到这一点?

今天,我终于偶然发现了leebickmtu给出的堆栈溢出答案,它展示了一个C#实现,最初是从Java移植而来的。我冒昧地将它移植到了Transact-SQL函数中,祝您愉快

IF OBJECT_ID (N'dbo.InlineMax', N'FN') IS NOT NULL
    DROP FUNCTION dbo.InlineMax;
GO

CREATE FUNCTION dbo.InlineMax(@valueOne int, @valueTwo int)
    RETURNS FLOAT
AS
BEGIN
    IF @valueOne > @valueTwo
    BEGIN
        RETURN @valueOne
    END

    RETURN ISNULL(@valueTwo, @valueOne)
END;
GO

IF OBJECT_ID (N'dbo.InlineMin', N'FN') IS NOT NULL
    DROP FUNCTION dbo.InlineMin;
GO

CREATE FUNCTION dbo.InlineMin(@valueOne int, @valueTwo int)
    RETURNS FLOAT
AS
BEGIN
    IF @valueOne < @valueTwo
        RETURN @valueOne

    RETURN @valueTwo
END;
GO

IF OBJECT_ID (N'dbo.JaroWinklerDistance', N'FN') IS NOT NULL
    DROP FUNCTION dbo.JaroWinklerDistance;
GO

CREATE FUNCTION dbo.JaroWinklerDistance(@stringOne varchar(MAX), @stringTwo varchar(MAX))
RETURNS FLOAT
WITH EXECUTE AS CALLER
AS
BEGIN
    DECLARE @mWeightThreshold FLOAT; SET @mWeightThreshold = 0.7;
    DECLARE @mNuMChars INT; SET @mNumChars = 4;
    DECLARE @lLen1 int; SET @lLen1 = LEN(@stringOne)
    DECLARE @lLen2 int; SET @lLen2 = LEN(@stringTwo)

    IF @lLen1 = 0
        RETURN CASE WHEN @lLen2 = 0 THEN 1 ELSE 0 END

    DECLARE @lSearchRange int; SET @lSearchRange = dbo.InlineMax(0,dbo.InlineMax(@lLen1, @lLen2)/2 - 1);

    DECLARE @lMatched1 TABLE (position int not null, [status] bit not null)
    DECLARE @lMatched2 TABLE (position int not null, [status] bit not null)

    DECLARE @lNumCommon int; SET @lNumCommon = 0
    DECLARE @i int; SET @i = 1; WHILE(@i <= @lLen1)
    BEGIN
        DECLARE @lStart int; SET @lStart = dbo.InlineMax(1, @i - @lSearchRange)
        DECLARE @lEnd int; SET @lEnd = dbo.InlineMin(@i + @lSearchRange + 1, @lLen2)

        DECLARE @j int; SET @j = @lStart; WHILE(@j <= @lEnd)
        BEGIN
            IF((SELECT [status] FROM @lMatched2 WHERE position = @j) = 1)
            BEGIN
                SET @j = @j + 1
                CONTINUE
            END

            IF (SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @j, 1))
            BEGIN
                SET @j = @j + 1
                CONTINUE
            END

            INSERT INTO @lMatched1 (position, [status]) VALUES(@i, 1)
            INSERT INTO @lMatched2 (position, [status]) VALUES(@j, 1)

            SET @lNumCommon = @lNumCommon + 1
            BREAK
        END

        SET @i = @i + 1
    END

    IF @lNumCommon = 0
    BEGIN
        RETURN 0.0;
    END

    DECLARE @lNumHalfTransposed int; SET @lNumHalfTransposed = 0
    DECLARE @k INT; SET @k = 1;
    DECLARE @stopLoop bit; SET @stopLoop = 0;
    SET @i = 1; WHILE(@i <= @lLen1)
    BEGIN
        IF ((SELECT [status] FROM @lMatched1 WHERE position = @i) = 1)
        BEGIN
            SET @i = @i + 1
            CONTINUE;
        END

        WHILE(@stopLoop = 0)
        BEGIN
            IF((SELECT [status] FROM @lMatched2 WHERE position = @k) = 0)
                SET @k = @k + 1
            ELSE
                BREAK

            IF((SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @k, 1)))
                SET @lNumHalfTransposed = @lNumHalfTransposed + 1

            SET @k = @k + 1
        END

        SET @i = @i + 1
    END

    DECLARE @lNumTransposed INT; SET @lNumTransposed = @lNumHalfTransposed/2;

    DECLARE @lNumCommonD FLOAT; SET @lNumCommonD = @lNumCommon
    DECLARE @lWeight FLOAT; SET @lWeight = (@lNumCommonD / @lLen1 + @lNumCommonD / @lLen2 + (@lNumCommon - @lNumTransposed) / @lNumCommonD) / 3.0;

    IF(@lWeight <= @mWeightThreshold)
        RETURN @lWeight
    DECLARE @lMax INT; SET @lMax = dbo.InlineMin(@mNumChars, dbo.InlineMin(@lLen1, @lLen2))
    DECLARE @lPos INT; SET @lPos = 0
    WHILE(@lPos < @lMax AND (SELECT SUBSTRING(@stringOne, @lPos, 1)) = (SELECT SUBSTRING(@stringTwo, @lPos, 1)))
    BEGIN
        SET @lPos = @lPos + 1
    END

    IF @lPos = 0
        RETURN @lWeight

    RETURN @lWeight + 0.1 * @lPos * (1.0 - @lWeight)
END;
GO
如果对象ID(N'dbo.InlineMax',N'FN')不为空
DROP函数dbo.InlineMax;
去
创建函数dbo.InlineMax(@valueOne int、@valueTwo int)
返回浮动
作为
开始
如果@valueOne>@valueTwo
开始
返回@valueOne
结束
返回值为空(@valueTwo,@valueOne)
结束;
去
如果对象ID(N'dbo.InlineMin',N'FN')不为空
DROP函数dbo.InlineMin;
去
创建函数dbo.InlineMin(@valueOne int,@valueTwo int)
返回浮动
作为
开始
如果@valueOne<@valueTwo
返回@valueOne
返回@valueTwo
结束;
去
如果对象ID(N'dbo.JaroWinklerDistance',N'FN')不为空
DROP函数dbo.JaroWinklerDistance;
去
创建函数dbo.JaroWinklerDistance(@stringOne-varchar(最大值),@stringTwo-varchar(最大值))
返回浮动
以EXECUTE作为调用方
作为
开始
宣布@mWeightThreshold浮动;设置@mWeightThreshold=0.7;
声明@mNuMChars INT;设置@mNumChars=4;
声明@lLen1 int;设置@lLen1=LEN(@stringOne)
声明@lLen2 int;设置@lLen2=LEN(@stringTwo)
如果@lLen1=0
当@lLen2=0时返回案例,然后1否则0结束
声明@lSearchRange int;设置@lSearchRange=dbo.InlineMax(0,dbo.InlineMax(@lLen1,@lLen2)/2-1);
声明@lMatched1表(位置int不为null,[状态]位不为null)
声明@lMatched2表(位置int不为null,[状态]位不为null)
声明@lNumCommon int;设置@lNumCommon=0

声明@i int;设置@i=1;而(@i今天,我终于偶然发现了leebickmtu给出的堆栈溢出答案,它展示了一个C#实现,最初是从Java移植的。我冒昧地将它移植到Transact-SQL函数,尽情享受吧

IF OBJECT_ID (N'dbo.InlineMax', N'FN') IS NOT NULL
    DROP FUNCTION dbo.InlineMax;
GO

CREATE FUNCTION dbo.InlineMax(@valueOne int, @valueTwo int)
    RETURNS FLOAT
AS
BEGIN
    IF @valueOne > @valueTwo
    BEGIN
        RETURN @valueOne
    END

    RETURN ISNULL(@valueTwo, @valueOne)
END;
GO

IF OBJECT_ID (N'dbo.InlineMin', N'FN') IS NOT NULL
    DROP FUNCTION dbo.InlineMin;
GO

CREATE FUNCTION dbo.InlineMin(@valueOne int, @valueTwo int)
    RETURNS FLOAT
AS
BEGIN
    IF @valueOne < @valueTwo
        RETURN @valueOne

    RETURN @valueTwo
END;
GO

IF OBJECT_ID (N'dbo.JaroWinklerDistance', N'FN') IS NOT NULL
    DROP FUNCTION dbo.JaroWinklerDistance;
GO

CREATE FUNCTION dbo.JaroWinklerDistance(@stringOne varchar(MAX), @stringTwo varchar(MAX))
RETURNS FLOAT
WITH EXECUTE AS CALLER
AS
BEGIN
    DECLARE @mWeightThreshold FLOAT; SET @mWeightThreshold = 0.7;
    DECLARE @mNuMChars INT; SET @mNumChars = 4;
    DECLARE @lLen1 int; SET @lLen1 = LEN(@stringOne)
    DECLARE @lLen2 int; SET @lLen2 = LEN(@stringTwo)

    IF @lLen1 = 0
        RETURN CASE WHEN @lLen2 = 0 THEN 1 ELSE 0 END

    DECLARE @lSearchRange int; SET @lSearchRange = dbo.InlineMax(0,dbo.InlineMax(@lLen1, @lLen2)/2 - 1);

    DECLARE @lMatched1 TABLE (position int not null, [status] bit not null)
    DECLARE @lMatched2 TABLE (position int not null, [status] bit not null)

    DECLARE @lNumCommon int; SET @lNumCommon = 0
    DECLARE @i int; SET @i = 1; WHILE(@i <= @lLen1)
    BEGIN
        DECLARE @lStart int; SET @lStart = dbo.InlineMax(1, @i - @lSearchRange)
        DECLARE @lEnd int; SET @lEnd = dbo.InlineMin(@i + @lSearchRange + 1, @lLen2)

        DECLARE @j int; SET @j = @lStart; WHILE(@j <= @lEnd)
        BEGIN
            IF((SELECT [status] FROM @lMatched2 WHERE position = @j) = 1)
            BEGIN
                SET @j = @j + 1
                CONTINUE
            END

            IF (SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @j, 1))
            BEGIN
                SET @j = @j + 1
                CONTINUE
            END

            INSERT INTO @lMatched1 (position, [status]) VALUES(@i, 1)
            INSERT INTO @lMatched2 (position, [status]) VALUES(@j, 1)

            SET @lNumCommon = @lNumCommon + 1
            BREAK
        END

        SET @i = @i + 1
    END

    IF @lNumCommon = 0
    BEGIN
        RETURN 0.0;
    END

    DECLARE @lNumHalfTransposed int; SET @lNumHalfTransposed = 0
    DECLARE @k INT; SET @k = 1;
    DECLARE @stopLoop bit; SET @stopLoop = 0;
    SET @i = 1; WHILE(@i <= @lLen1)
    BEGIN
        IF ((SELECT [status] FROM @lMatched1 WHERE position = @i) = 1)
        BEGIN
            SET @i = @i + 1
            CONTINUE;
        END

        WHILE(@stopLoop = 0)
        BEGIN
            IF((SELECT [status] FROM @lMatched2 WHERE position = @k) = 0)
                SET @k = @k + 1
            ELSE
                BREAK

            IF((SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @k, 1)))
                SET @lNumHalfTransposed = @lNumHalfTransposed + 1

            SET @k = @k + 1
        END

        SET @i = @i + 1
    END

    DECLARE @lNumTransposed INT; SET @lNumTransposed = @lNumHalfTransposed/2;

    DECLARE @lNumCommonD FLOAT; SET @lNumCommonD = @lNumCommon
    DECLARE @lWeight FLOAT; SET @lWeight = (@lNumCommonD / @lLen1 + @lNumCommonD / @lLen2 + (@lNumCommon - @lNumTransposed) / @lNumCommonD) / 3.0;

    IF(@lWeight <= @mWeightThreshold)
        RETURN @lWeight
    DECLARE @lMax INT; SET @lMax = dbo.InlineMin(@mNumChars, dbo.InlineMin(@lLen1, @lLen2))
    DECLARE @lPos INT; SET @lPos = 0
    WHILE(@lPos < @lMax AND (SELECT SUBSTRING(@stringOne, @lPos, 1)) = (SELECT SUBSTRING(@stringTwo, @lPos, 1)))
    BEGIN
        SET @lPos = @lPos + 1
    END

    IF @lPos = 0
        RETURN @lWeight

    RETURN @lWeight + 0.1 * @lPos * (1.0 - @lWeight)
END;
GO
如果对象ID(N'dbo.InlineMax',N'FN')不为空
DROP函数dbo.InlineMax;
去
创建函数dbo.InlineMax(@valueOne int、@valueTwo int)
返回浮动
作为
开始
如果@valueOne>@valueTwo
开始
返回@valueOne
结束
返回值为空(@valueTwo,@valueOne)
结束;
去
如果对象ID(N'dbo.InlineMin',N'FN')不为空
DROP函数dbo.InlineMin;
去
创建函数dbo.InlineMin(@valueOne int,@valueTwo int)
返回浮动
作为
开始
如果@valueOne<@valueTwo
返回@valueOne
返回@valueTwo
结束;
去
如果对象ID(N'dbo.JaroWinklerDistance',N'FN')不为空
DROP函数dbo.JaroWinklerDistance;
去
创建函数dbo.JaroWinklerDistance(@stringOne-varchar(最大值),@stringTwo-varchar(最大值))
返回浮动
以EXECUTE作为调用方
作为
开始
声明@mWeightThreshold FLOAT;设置@mWeightThreshold=0.7;
声明@mNuMChars INT;设置@mNuMChars=4;
声明@lLen1 int;设置@lLen1=LEN(@stringOne)
声明@lLen2 int;设置@lLen2=LEN(@stringTwo)
如果@lLen1=0
当@lLen2=0时返回案例,然后1否则0结束
声明@lSearchRange int;设置@lSearchRange=dbo.InlineMax(0,dbo.InlineMax(@lLen1,@lLen2)/2-1);
声明@lMatched1表(位置int不为null,[状态]位不为null)
声明@lMatched2表(位置int不为null,[状态]位不为null)
声明@lNumCommon int;设置@lNumCommon=0

声明@i int;设置@i=1;而(@i这并不是你应该在应用程序的数据层做的事情;这是一种业务/逻辑层的事情。@roryap为什么假设这是针对应用程序的?我之所以这样做,是因为需要比较SQL查询中的字符串,而不是与开发中的任何应用程序相关:)只是为那些遇到这种情况的人指出这一点,他们可能会被误导,认为在具有数据访问层的应用程序中执行此类操作是一个好主意。您没有对此做任何说明,因此我认为这一点很重要。这并不是您应该在应用程序的数据层中执行的类型;我认为“这是一种业务/逻辑层类型的东西。@roryap为什么假设这是针对应用程序的?我之所以这样做,是因为需要比较SQL查询中的字符串,而与开发中的任何类型的应用程序无关:)只是为那些遇到这种情况的人指出这一点,他们可能会被误导,认为在具有数据访问层的应用程序中执行此类操作是一个好主意。您没有对此做任何说明,所以我认为这一点很重要。现在在更多行上尝试这一点,然后等待…………这就是为什么mots DBMS都是这样的原因如何在C/Java/etc中创建UDF:-)现在在更多行上尝试此操作,然后等待。。。。。。。。。。。。。。mots DBMS允许在C/Java/etc中创建UDF是有原因的:-)