C# Soundex算法实现的输出在以下情况下是错误的-“;“提姆扎克”;及;“普菲斯特”;
当我根据测试算法C# Soundex算法实现的输出在以下情况下是错误的-“;“提姆扎克”;及;“普菲斯特”;,c#,soundex,C#,Soundex,当我根据测试算法Soundex时,我发现Tymczak返回的是T520,而不是T522,Pfister返回的是P123,而不是P236 我不知道为什么输出不正确 我的代码: using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespac
Soundex
时,我发现Tymczak返回的是T520,而不是T522,Pfister返回的是P123,而不是P236
我不知道为什么输出不正确
我的代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ConsoleApplication4
{
class Program
{
static void Main(string[] args)
{
string s = "Tymczak";
string result = SoundexByWord(s);
Console.WriteLine(result);
}
private static string Soundex(string data)
{
string first = "pv";
string second = "xz";
string third = "dt";
string forth = "mn";
string fifth = "bf";
string sixth = "cgj";
string seventh = "kqs";
//ana ast5dtmt string builder 34an 3aml zy al list fy apend
// 34an a apend mn al a5r al string
StringBuilder result = new StringBuilder();
if (data != null && data.Length > 0)
{
string previousCode = "", currentCode = "", currentLetter = "";
// append on the string from the last
// get the first characheter of the string data
// append it on the result
//according to algorithm first charachter stays the same
result.Append(data.Substring(0, 1));
RemoveUnwantedChar(data);
for (int i = 1; i < data.Length; i++)
{
// nb2d al algorithm first take the second characheter in data
//n7wlo la lower
currentLetter = data.Substring(i, 1).ToLower();
currentCode = "";
// No string for zero because we will remove it
if (first.IndexOf(currentLetter) > -1)
//search for bfpv in the current letter return number
// -1 is out of string index
currentCode = "1";
else if (fifth.IndexOf(currentLetter) > -1)
//search for bfpv in the current letter return number
// -1 is out of string index
currentCode = "1";
else if (sixth.IndexOf(currentLetter) > -1)
//search for bfpv in the current letter return number
// -1 is out of string index
currentCode = "2";
else if (seventh.IndexOf(currentLetter) > -1)
//search for bfpv in the current letter return number
// -1 is out of string index
currentCode = "2";
else if (second.IndexOf(currentLetter) > -1)
currentCode = "2";
else if (third.IndexOf(currentLetter) > -1)
currentCode = "3";
else if (currentLetter == "l")
currentCode = "4";
else if (forth.IndexOf(currentLetter) > -1)
currentCode = "5";
else if (currentLetter == "r")
currentCode = "6";
if (currentCode != previousCode)
result.Append(currentCode);
if (result.Length == 4) break;
if (currentCode != "")
previousCode = currentCode;
}
}
if (result.Length < 4)
result.Append(new String('0', 4 - result.Length));
return result.ToString().ToUpper();
}
public static string RemoveUnwantedChar(string input)
{
return Regex.Replace(input, "aeiouyhw", "");
}
private static string SoundexByWord(string data)
{
var soundexes = new List<string>();
foreach (var str in data.Split(' '))
{
soundexes.Add(Soundex(str));
}
#if Net35OrLower
// string.Join in .Net 3.5 and
//before require the second parameter to be an array.
return string.Join(" ", soundexes.ToArray());
#endif
// string.Join in .Net 4 has an overload
//that takes IEnumerable<string>
return string.Join(" ", soundexes);
}
}
}
使用系统;
使用System.Collections.Generic;
使用System.Linq;
使用系统文本;
使用System.Text.RegularExpressions;
使用System.Threading.Tasks;
命名空间控制台应用程序4
{
班级计划
{
静态void Main(字符串[]参数)
{
字符串s=“Tymczak”;
字符串结果=SoundexByWord;
控制台写入线(结果);
}
专用静态字符串Soundex(字符串数据)
{
string first=“pv”;
字符串second=“xz”;
字符串third=“dt”;
string forth=“mn”;
字符串fifth=“bf”;
string sixth=“cgj”;
string seventh=“kqs”;
//ana ast5dtmt字符串生成器34A 3aml zy al列表fy apend
//34A a端锰铝合金a5r铝串
StringBuilder结果=新建StringBuilder();
if(data!=null&&data.Length>0)
{
字符串previousCode=“”,currentCode=“”,currentLetter=“”;
//追加到最后一个
//获取字符串数据的第一个字符
//将其附加到结果上
//根据算法,第一个字符保持不变
追加(data.Substring(0,1));
移除UnwantedChar(数据);
for(int i=1;i-1)
//在当前信函返回编号中搜索bfpv
//-1是字符串外索引
currentCode=“1”;
else if(第五个索引of(currentLetter)>-1)
//在当前信函返回编号中搜索bfpv
//-1是字符串外索引
currentCode=“1”;
else if(第六个索引of(currentLetter)>-1)
//在当前信函返回编号中搜索bfpv
//-1是字符串外索引
currentCode=“2”;
else if(第七个索引of(currentLetter)>-1)
//在当前信函返回编号中搜索bfpv
//-1是字符串外索引
currentCode=“2”;
else if(第二个索引of(currentLetter)>-1)
currentCode=“2”;
else if(第三个索引of(currentLetter)>-1)
currentCode=“3”;
else if(currentLetter==“l”)
currentCode=“4”;
否则如果(第四个索引of(currentLetter)>-1)
currentCode=“5”;
else if(currentLetter==“r”)
currentCode=“6”;
如果(当前代码!=以前的代码)
result.Append(当前代码);
如果(result.Length==4)中断;
如果(当前代码!=“”)
previousCode=当前代码;
}
}
如果(结果长度<4)
Append(新字符串('0',4-result.Length));
返回结果.ToString().ToUpper();
}
公共静态字符串RemoveUnwantedChar(字符串输入)
{
返回Regex.Replace(输入“aeiouyhw”和“”);
}
专用静态字符串SoundexByWord(字符串数据)
{
var soundexes=新列表();
foreach(data.Split(“”)中的var str)
{
添加(Soundex(str));
}
#如果Net35OrLower
//加入.NET3.5和
//之前,需要将第二个参数设置为数组。
返回字符串.Join(“,soundexes.ToArray());
#恩迪夫
//Net 4中的string.Join具有重载
//那需要无数的时间
返回字符串。Join(“,soundexes);
}
}
}
这不会告诉您代码出错的地方,甚至可能不是最快的解决方案,但它似乎正确地给出了示例,而且只需要几行代码
它实现了测试的六个步骤
stringsoundex(字符串输入)
{
//字符组:第一个是要删除的元音
//其他组是要由组索引替换的字符
列表组=新列表()
{“aeiouy”、“bfpv”、“cgjkqsxz”、“dt”、“l”、“mn”、“r”};
//保存第一个字符(1)
string first=input.Substring(0,1);
字符串s=input.ToLower();
//无条件删除(1)
s=s.Replace(“h”),Replace(“w”);
//替换所有替换组中的字符(2)
对于(int g=1;g 如果((s[0]>='0')&&(s[0]您是否尝试实现第二个算法?@TaW我不知道如何将代码更新为第二个算法我认为它们与我在代码中所做的步骤相同可能我只是缺少它,但保存第一个字母的位置在哪里。删除除第一个字母外的所有出现的“h”和“w”。部分?@TaW在此处保存第一个字母结果。追加(数据。子字符串(0)
string Soundex(string input)
{
// character groups: the 1st one are vowels to remove
// the other groups are characters to replace by the group index
List<string> groups = new List<string>()
{ "aeiouy", "bfpv", "cgjkqsxz", "dt", "l", "mn", "r" };
// save the 1st character (1)
string first = input.Substring(0, 1);
string s = input.ToLower();
// remove unconditionally (1)
s = s.Replace("h", "").Replace("w", "");
// replace characters in all replacement groups (2)
for (int g = 1; g < groups.Count; g++)
for (int i = 0; i < groups[g].Length; i++)
s = s.Replace(groups[g][i], ((char)(g + (byte)'0')));
// replace repeating digits (3)
// NOTE: this step actually should be repeated until the length no longer changes!!!
for (int i = 1; i < 10; i++) s = s.Replace(i + "" + i, i + "");
// now remove characters from group 0: (4)
for (int i = 0; i < groups[0].Length; i++) s = s.Replace(groups[0][i].ToString(), "");
// remove the first if it is a digit (5)
if ( (s[0] >= '0') && (s[0] <= '9') ) s = s.Substring(1);
// add saved first to max 3 digits and pad if needed (6)
return (first + s.Substring(0, Math.Min(3, s.Length))).PadRight(4, '0');
}