String 雅罗–;C语言中的Winkler距离算法#
如何在C#中实现Jaro–Winkler距离字符串比较算法?String 雅罗–;C语言中的Winkler距离算法#,string,comparison,distance,jaro-winkler,String,Comparison,Distance,Jaro Winkler,如何在C#中实现Jaro–Winkler距离字符串比较算法?公共静态类JaroWinkler距离 { /*除非 *匹配百分比等于或高于mWeightThreshold百分比 *没有修改。 *温克勒的论文使用了默认值0.7 */ 专用静态只读双mWeightThreshold=0.7; /*要由Winkler修改协调的前缀的大小。 *温克勒的论文使用了默认值4 */ 私有静态只读int mNumChars=4; /// ///返回指定对象之间的Jaro-Winkler距离 ///字符串。该距离是
公共静态类JaroWinkler距离
{
/*除非
*匹配百分比等于或高于mWeightThreshold百分比
*没有修改。
*温克勒的论文使用了默认值0.7
*/
专用静态只读双mWeightThreshold=0.7;
/*要由Winkler修改协调的前缀的大小。
*温克勒的论文使用了默认值4
*/
私有静态只读int mNumChars=4;
///
///返回指定对象之间的Jaro-Winkler距离
///字符串。该距离是对称的,并将在
///范围0(完全匹配)到1(不匹配)。
///
///第一串
///二线
///
公共静态双距离(字符串收敛1、字符串收敛2){
返回1.0-接近度(收敛1,收敛2);
}
///
///返回指定对象之间的Jaro-Winkler距离
///字符串。该距离是对称的,并将在
///范围0(不匹配)到1(完全匹配)。
///
///第一串
///二线
///
公共静态双接近(字符串收敛1、字符串收敛2)
{
int lLen1=收敛1.长度;
int lLen2=收敛2.长度;
如果(lLen1==0)
返回lLen2==0?1.0:0.0;
int lSearchRange=Math.Max(0,Math.Max(lLen1,lLen2)/2-1);
//默认值初始化为false
bool[]lMatched1=新bool[lLen1];
bool[]lMatched2=新bool[lLen2];
int lNumCommon=0;
对于(int i=0;i 如果(lWeight您可以在Lucene.Net上查看
它实现了Jaro–Winkler距离算法
它的分数和leebickmtu发布的不同
你可以把它作为参考
网址如下:
在课堂下使用jaro winkler。
我已经定制了jaro和jaro winkler算法
访问以获取DLL
使用系统;
使用System.Linq;
名称空间搜索
{
公共静态类编辑距离
{
私有结构计量学
{
公共整数匹配;
公共换位;
}
私有静态EditDistance.jarMetrics匹配(字符串s1、字符串s2)
{
字符串文本;
字符串text2;
如果(s1.长度>s2.长度)
{
text=s1;
text2=s2;
}
其他的
{
text=s2;
text2=s1;
}
int num=Math.Max(text.Length/2-1,0);
int[]数组=新的int[text2.Length];
int i;
对于(i=0;it!=ms2[mi]).Count();
EditDistance.JaroMetrics结果;
结果:匹配项=num2;
结果:换位=num5/2;
返回结果;
}
公共静态浮点JaroWinkler(此字符串s1、字符串s2、浮点前缀刻度、浮点boostThreshold)
{
prefixScale=((prefixScale>0.25f)?0.25f:prefixScale);
prefixScale=((prefixScale<0f)?0f:prefixScale);
float num=s1.Jaro(s2);
int num2=0;
using System;
using System.Linq;
namespace Search
{
public static class EditDistance
{
private struct JaroMetrics
{
public int Matches;
public int Transpositions;
}
private static EditDistance.JaroMetrics Matches(string s1, string s2)
{
string text;
string text2;
if (s1.Length > s2.Length)
{
text = s1;
text2 = s2;
}
else
{
text = s2;
text2 = s1;
}
int num = Math.Max(text.Length / 2 - 1, 0);
int[] array = new int[text2.Length];
int i;
for (i = 0; i < array.Length; i++)
{
array[i] = -1;
}
bool[] array2 = new bool[text.Length];
int num2 = 0;
for (int j = 0; j < text2.Length; j++)
{
char c = text2[j];
int k = Math.Max(j - num, 0);
int num3 = Math.Min(j + num + 1, text.Length);
while (k < num3)
{
if (!array2[k] && c == text[k])
{
array[j] = k;
array2[k] = true;
num2++;
break;
}
k++;
}
}
char[] array3 = new char[num2];
char[] ms2 = new char[num2];
i = 0;
int num4 = 0;
while (i < text2.Length)
{
if (array[i] != -1)
{
array3[num4] = text2[i];
num4++;
}
i++;
}
i = 0;
num4 = 0;
while (i < text.Length)
{
if (array2[i])
{
ms2[num4] = text[i];
num4++;
}
i++;
}
int num5 = array3.Where((char t, int mi) => t != ms2[mi]).Count<char>();
EditDistance.JaroMetrics result;
result.Matches = num2;
result.Transpositions = num5 / 2;
return result;
}
public static float JaroWinkler(this string s1, string s2, float prefixScale, float boostThreshold)
{
prefixScale = ((prefixScale > 0.25f) ? 0.25f : prefixScale);
prefixScale = ((prefixScale < 0f) ? 0f : prefixScale);
float num = s1.Jaro(s2);
int num2 = 0;
for (int i = 0; i < Math.Min(s1.Length, s2.Length); i++)
{
if (s1[i] != s2[i])
{
break;
}
num2++;
}
return (num < boostThreshold) ? num : (num + prefixScale * (float)num2 * (1f - num));
}
public static float JaroWinkler(this string s1, string s2, float prefixScale)
{
return s1.JaroWinkler(s2, prefixScale, 0.7f);
}
public static float JaroWinkler(this string s1, string s2)
{
return s1.JaroWinkler(s2, 0.1f, 0.7f);
}
public static float Jaro(this string s1, string s2)
{
EditDistance.JaroMetrics jaroMetrics = EditDistance.Matches(s1, s2);
float num = (float)jaroMetrics.Matches;
int transpositions = jaroMetrics.Transpositions;
float result;
if (num == 0f)
{
result = 0f;
}
else
{
float num2 = (num / (float)s1.Length + num / (float)s2.Length + (num - (float)transpositions) / num) / 3f;
result = num2;
}
return result;
}
public static int LevenshteinDistance(this string source, string target)
{
int result;
if (string.IsNullOrEmpty(source))
{
if (string.IsNullOrEmpty(target))
{
result = 0;
}
else
{
result = target.Length;
}
}
else if (string.IsNullOrEmpty(target))
{
result = source.Length;
}
else
{
if (source.Length > target.Length)
{
string text = target;
target = source;
source = text;
}
int length = target.Length;
int length2 = source.Length;
int[,] array = new int[2, length + 1];
for (int i = 1; i <= length; i++)
{
array[0, i] = i;
}
int num = 0;
for (int j = 1; j <= length2; j++)
{
num = (j & 1);
array[num, 0] = j;
int num2 = num ^ 1;
for (int i = 1; i <= length; i++)
{
int num3 = (target[i - 1] == source[j - 1]) ? 0 : 1;
array[num, i] = Math.Min(Math.Min(array[num2, i] + 1, array[num, i - 1] + 1), array[num2, i - 1] + num3);
}
}
result = array[num, length];
}
return result;
}
}
}
public class JaroWinkler
{
private const double defaultMismatchScore = 0.0;
private const double defaultMatchScore = 1.0;
/// <summary>
/// Gets the similarity between two strings by using the Jaro-Winkler algorithm.
/// A value of 1 means perfect match. A value of zero represents an absolute no match
/// </summary>
/// <param name="_firstWord"></param>
/// <param name="_secondWord"></param>
/// <returns>a value between 0-1 of the similarity</returns>
///
public static double RateSimilarity(string _firstWord, string _secondWord)
{
// Converting to lower case is not part of the original Jaro-Winkler implementation
// But we don't really care about case sensitivity in DIAMOND and wouldn't decrease security names similarity rate just because
// of Case sensitivity
_firstWord = _firstWord.ToLower();
_secondWord = _secondWord.ToLower();
if ((_firstWord != null) && (_secondWord != null))
{
if (_firstWord == _secondWord)
//return (SqlDouble)defaultMatchScore;
return defaultMatchScore;
else
{
// Get half the length of the string rounded up - (this is the distance used for acceptable transpositions)
int halfLength = Math.Min(_firstWord.Length, _secondWord.Length) / 2 + 1;
// Get common characters
StringBuilder common1 = GetCommonCharacters(_firstWord, _secondWord, halfLength);
int commonMatches = common1.Length;
// Check for zero in common
if (commonMatches == 0)
//return (SqlDouble)defaultMismatchScore;
return defaultMismatchScore;
StringBuilder common2 = GetCommonCharacters(_secondWord, _firstWord, halfLength);
// Check for same length common strings returning 0 if is not the same
if (commonMatches != common2.Length)
//return (SqlDouble)defaultMismatchScore;
return defaultMismatchScore;
// Get the number of transpositions
int transpositions = 0;
for (int i = 0; i < commonMatches; i++)
{
if (common1[i] != common2[i])
transpositions++;
}
int j = 0;
j += 1;
// Calculate Jaro metric
transpositions /= 2;
double jaroMetric = commonMatches / (3.0 * _firstWord.Length) + commonMatches / (3.0 * _secondWord.Length) + (commonMatches - transpositions) / (3.0 * commonMatches);
//return (SqlDouble)jaroMetric;
return jaroMetric;
}
}
//return (SqlDouble)defaultMismatchScore;
return defaultMismatchScore;
}
/// <summary>
/// Returns a string buffer of characters from string1 within string2 if they are of a given
/// distance seperation from the position in string1.
/// </summary>
/// <param name="firstWord">string one</param>
/// <param name="secondWord">string two</param>
/// <param name="separationDistance">separation distance</param>
/// <returns>A string buffer of characters from string1 within string2 if they are of a given
/// distance seperation from the position in string1</returns>
private static StringBuilder GetCommonCharacters(string firstWord, string secondWord, int separationDistance)
{
if ((firstWord != null) && (secondWord != null))
{
StringBuilder returnCommons = new StringBuilder(20);
StringBuilder copy = new StringBuilder(secondWord);
int firstWordLength = firstWord.Length;
int secondWordLength = secondWord.Length;
for (int i = 0; i < firstWordLength; i++)
{
char character = firstWord[i];
bool found = false;
for (int j = Math.Max(0, i - separationDistance); !found && j < Math.Min(i + separationDistance, secondWordLength); j++)
{
if (copy[j] == character)
{
found = true;
returnCommons.Append(character);
copy[j] = '#';
}
}
}
return returnCommons;
}
return null;
}
}