Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/string/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
String 雅罗–;C语言中的Winkler距离算法#_String_Comparison_Distance_Jaro Winkler - Fatal编程技术网

String 雅罗–;C语言中的Winkler距离算法#

String 雅罗–;C语言中的Winkler距离算法#,string,comparison,distance,jaro-winkler,String,Comparison,Distance,Jaro Winkler,如何在C#中实现Jaro–Winkler距离字符串比较算法?公共静态类JaroWinkler距离 { /*除非 *匹配百分比等于或高于mWeightThreshold百分比 *没有修改。 *温克勒的论文使用了默认值0.7 */ 专用静态只读双mWeightThreshold=0.7; /*要由Winkler修改协调的前缀的大小。 *温克勒的论文使用了默认值4 */ 私有静态只读int mNumChars=4; /// ///返回指定对象之间的Jaro-Winkler距离 ///字符串。该距离是

如何在C#中实现Jaro–Winkler距离字符串比较算法?

公共静态类JaroWinkler距离
{
/*除非
*匹配百分比等于或高于mWeightThreshold百分比
*没有修改。
*温克勒的论文使用了默认值0.7
*/
专用静态只读双mWeightThreshold=0.7;
/*要由Winkler修改协调的前缀的大小。
*温克勒的论文使用了默认值4
*/
私有静态只读int mNumChars=4;
/// 
///返回指定对象之间的Jaro-Winkler距离
///字符串。该距离是对称的,并将在
///范围0(完全匹配)到1(不匹配)。
/// 
///第一串
///二线
/// 
公共静态双距离(字符串收敛1、字符串收敛2){
返回1.0-接近度(收敛1,收敛2);
}
/// 
///返回指定对象之间的Jaro-Winkler距离
///字符串。该距离是对称的,并将在
///范围0(不匹配)到1(完全匹配)。
/// 
///第一串
///二线
/// 
公共静态双接近(字符串收敛1、字符串收敛2)
{
int lLen1=收敛1.长度;
int lLen2=收敛2.长度;
如果(lLen1==0)
返回lLen2==0?1.0:0.0;
int lSearchRange=Math.Max(0,Math.Max(lLen1,lLen2)/2-1);
//默认值初始化为false
bool[]lMatched1=新bool[lLen1];
bool[]lMatched2=新bool[lLen2];
int lNumCommon=0;
对于(int i=0;i如果(lWeight您可以在Lucene.Net上查看

它实现了Jaro–Winkler距离算法

它的分数和leebickmtu发布的不同

你可以把它作为参考

网址如下:


在课堂下使用jaro winkler。 我已经定制了jaro和jaro winkler算法

访问以获取DLL

使用系统;
使用System.Linq;
名称空间搜索
{
公共静态类编辑距离
{
私有结构计量学
{
公共整数匹配;
公共换位;
}
私有静态EditDistance.jarMetrics匹配(字符串s1、字符串s2)
{
字符串文本;
字符串text2;
如果(s1.长度>s2.长度)
{
text=s1;
text2=s2;
}
其他的
{
text=s2;
text2=s1;
}
int num=Math.Max(text.Length/2-1,0);
int[]数组=新的int[text2.Length];
int i;
对于(i=0;it!=ms2[mi]).Count();
EditDistance.JaroMetrics结果;
结果:匹配项=num2;
结果:换位=num5/2;
返回结果;
}
公共静态浮点JaroWinkler(此字符串s1、字符串s2、浮点前缀刻度、浮点boostThreshold)
{
prefixScale=((prefixScale>0.25f)?0.25f:prefixScale);
prefixScale=((prefixScale<0f)?0f:prefixScale);
float num=s1.Jaro(s2);
int num2=0;
using System;
using System.Linq;

namespace Search
{
    public static class EditDistance
    {
        private struct JaroMetrics
        {
            public int Matches;

            public int Transpositions;
        }

        private static EditDistance.JaroMetrics Matches(string s1, string s2)
        {
            string text;
            string text2;
            if (s1.Length > s2.Length)
            {
                text = s1;
                text2 = s2;
            }
            else
            {
                text = s2;
                text2 = s1;
            }
            int num = Math.Max(text.Length / 2 - 1, 0);
            int[] array = new int[text2.Length];
            int i;
            for (i = 0; i < array.Length; i++)
            {
                array[i] = -1;
            }
            bool[] array2 = new bool[text.Length];
            int num2 = 0;
            for (int j = 0; j < text2.Length; j++)
            {
                char c = text2[j];
                int k = Math.Max(j - num, 0);
                int num3 = Math.Min(j + num + 1, text.Length);
                while (k < num3)
                {
                    if (!array2[k] && c == text[k])
                    {
                        array[j] = k;
                        array2[k] = true;
                        num2++;
                        break;
                    }
                    k++;
                }
            }
            char[] array3 = new char[num2];
            char[] ms2 = new char[num2];
            i = 0;
            int num4 = 0;
            while (i < text2.Length)
            {
                if (array[i] != -1)
                {
                    array3[num4] = text2[i];
                    num4++;
                }
                i++;
            }
            i = 0;
            num4 = 0;
            while (i < text.Length)
            {
                if (array2[i])
                {
                    ms2[num4] = text[i];
                    num4++;
                }
                i++;
            }
            int num5 = array3.Where((char t, int mi) => t != ms2[mi]).Count<char>();
            EditDistance.JaroMetrics result;
            result.Matches = num2;
            result.Transpositions = num5 / 2;
            return result;
        }



        public static float JaroWinkler(this string s1, string s2, float prefixScale, float boostThreshold)
        {
            prefixScale = ((prefixScale > 0.25f) ? 0.25f : prefixScale);
            prefixScale = ((prefixScale < 0f) ? 0f : prefixScale);
            float num = s1.Jaro(s2);
            int num2 = 0;
            for (int i = 0; i < Math.Min(s1.Length, s2.Length); i++)
            {
                if (s1[i] != s2[i])
                {
                    break;
                }
                num2++;
            }
            return (num < boostThreshold) ? num : (num + prefixScale * (float)num2 * (1f - num));
        }

        public static float JaroWinkler(this string s1, string s2, float prefixScale)
        {
            return s1.JaroWinkler(s2, prefixScale, 0.7f);
        }

        public static float JaroWinkler(this string s1, string s2)
        {
            return s1.JaroWinkler(s2, 0.1f, 0.7f);
        }

        public static float Jaro(this string s1, string s2)
        {
            EditDistance.JaroMetrics jaroMetrics = EditDistance.Matches(s1, s2);
            float num = (float)jaroMetrics.Matches;
            int transpositions = jaroMetrics.Transpositions;
            float result;
            if (num == 0f)
            {
                result = 0f;
            }
            else
            {
                float num2 = (num / (float)s1.Length + num / (float)s2.Length + (num - (float)transpositions) / num) / 3f;
                result = num2;
            }
            return result;
        }

        public static int LevenshteinDistance(this string source, string target)
        {
            int result;
            if (string.IsNullOrEmpty(source))
            {
                if (string.IsNullOrEmpty(target))
                {
                    result = 0;
                }
                else
                {
                    result = target.Length;
                }
            }
            else if (string.IsNullOrEmpty(target))
            {
                result = source.Length;
            }
            else
            {
                if (source.Length > target.Length)
                {
                    string text = target;
                    target = source;
                    source = text;
                }
                int length = target.Length;
                int length2 = source.Length;
                int[,] array = new int[2, length + 1];
                for (int i = 1; i <= length; i++)
                {
                    array[0, i] = i;
                }
                int num = 0;
                for (int j = 1; j <= length2; j++)
                {
                    num = (j & 1);
                    array[num, 0] = j;
                    int num2 = num ^ 1;
                    for (int i = 1; i <= length; i++)
                    {
                        int num3 = (target[i - 1] == source[j - 1]) ? 0 : 1;
                        array[num, i] = Math.Min(Math.Min(array[num2, i] + 1, array[num, i - 1] + 1), array[num2, i - 1] + num3);
                    }
                }
                result = array[num, length];
            }
            return result;
        }
    }
}
    public class JaroWinkler
{
    private const double defaultMismatchScore = 0.0;
    private const double defaultMatchScore = 1.0;

    /// <summary>
    /// Gets the similarity between two strings by using the Jaro-Winkler algorithm.
    /// A value of 1 means perfect match. A value of zero represents an absolute no match
    /// </summary>
    /// <param name="_firstWord"></param>
    /// <param name="_secondWord"></param>
    /// <returns>a value between 0-1 of the similarity</returns>
    /// 
    public static double RateSimilarity(string _firstWord, string _secondWord)
    {
        // Converting to lower case is not part of the original Jaro-Winkler implementation
        // But we don't really care about case sensitivity in DIAMOND and wouldn't decrease security names similarity rate just because
        // of Case sensitivity
        _firstWord = _firstWord.ToLower();
        _secondWord = _secondWord.ToLower();

        if ((_firstWord != null) && (_secondWord != null))
        {
            if (_firstWord == _secondWord)
                //return (SqlDouble)defaultMatchScore;
                return defaultMatchScore;
            else
            {
                // Get half the length of the string rounded up - (this is the distance used for acceptable transpositions)
                int halfLength = Math.Min(_firstWord.Length, _secondWord.Length) / 2 + 1;

                // Get common characters
                StringBuilder common1 = GetCommonCharacters(_firstWord, _secondWord, halfLength);
                int commonMatches = common1.Length;

                // Check for zero in common
                if (commonMatches == 0)
                    //return (SqlDouble)defaultMismatchScore;
                    return defaultMismatchScore;

                StringBuilder common2 = GetCommonCharacters(_secondWord, _firstWord, halfLength);

                // Check for same length common strings returning 0 if is not the same
                if (commonMatches != common2.Length)
                    //return (SqlDouble)defaultMismatchScore;
                    return defaultMismatchScore;

                // Get the number of transpositions
                int transpositions = 0;
                for (int i = 0; i < commonMatches; i++)
                {
                    if (common1[i] != common2[i])
                        transpositions++;
                }

                int j = 0;
                j += 1;

                // Calculate Jaro metric
                transpositions /= 2;
                double jaroMetric = commonMatches / (3.0 * _firstWord.Length) + commonMatches / (3.0 * _secondWord.Length) + (commonMatches - transpositions) / (3.0 * commonMatches);
                //return (SqlDouble)jaroMetric;
                return jaroMetric;
            }
        }

        //return (SqlDouble)defaultMismatchScore;
        return defaultMismatchScore;
    }

    /// <summary>
    /// Returns a string buffer of characters from string1 within string2 if they are of a given
    /// distance seperation from the position in string1.
    /// </summary>
    /// <param name="firstWord">string one</param>
    /// <param name="secondWord">string two</param>
    /// <param name="separationDistance">separation distance</param>
    /// <returns>A string buffer of characters from string1 within string2 if they are of a given
    /// distance seperation from the position in string1</returns>
    private static StringBuilder GetCommonCharacters(string firstWord, string secondWord, int separationDistance)
    {
        if ((firstWord != null) && (secondWord != null))
        {
            StringBuilder returnCommons = new StringBuilder(20);
            StringBuilder copy = new StringBuilder(secondWord);
            int firstWordLength = firstWord.Length;
            int secondWordLength = secondWord.Length;

            for (int i = 0; i < firstWordLength; i++)
            {
                char character = firstWord[i];
                bool found = false;

                for (int j = Math.Max(0, i - separationDistance); !found && j < Math.Min(i + separationDistance, secondWordLength); j++)
                {
                    if (copy[j] == character)
                    {
                        found = true;
                        returnCommons.Append(character);
                        copy[j] = '#';
                    }
                }
            }
            return returnCommons;
        }
        return null;
    }
}