C# 如何将字符串中的土耳其字符转换为英语字符?
字符串strurkish=“ÜST”C# 如何将字符串中的土耳其字符转换为英语字符?,c#,encoding,C#,Encoding,字符串strurkish=“ÜST” 如何将strurkish的值设置为“UST”?我不是这方面的专家,但我认为您可以通过分解值,然后有效地删除非ASCII字符来实现这一点: using System; using System.Linq; using System.Text; class Test { static void Main() { string text = "\u00DCST"; string normalized = text.
如何将strurkish的值设置为“UST”?我不是这方面的专家,但我认为您可以通过分解值,然后有效地删除非ASCII字符来实现这一点:
using System;
using System.Linq;
using System.Text;
class Test
{
static void Main()
{
string text = "\u00DCST";
string normalized = text.Normalize(NormalizationForm.FormD);
string asciiOnly = new string(normalized.Where(c => c < 128).ToArray());
Console.WriteLine(asciiOnly);
}
}
使用系统;
使用System.Linq;
使用系统文本;
课堂测试
{
静态void Main()
{
字符串text=“\u00DCST”;
字符串规格化=text.Normalize(NormalizationForm.FormD);
string ascionly=新字符串(标准化的.Where(c=>c<128.ToArray());
控制台写入线(ASCIONLY);
}
}
但在某些情况下,这完全有可能造成可怕的后果。您可以使用以下方法来解决问题。其他方法无法正确转换“Turkish Lowercase I(\u0131)”
var text = "ÜST";
var unaccentedText = String.Join("", text.Normalize(NormalizationForm.FormD)
.Where(c => char.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark));
public static string RemoveDiacritics(string text)
{
Encoding srcEncoding = Encoding.UTF8;
Encoding destEncoding = Encoding.GetEncoding(1252); // Latin alphabet
text = destEncoding.GetString(Encoding.Convert(srcEncoding, destEncoding, srcEncoding.GetBytes(text)));
string normalizedString = text.Normalize(NormalizationForm.FormD);
StringBuilder result = new StringBuilder();
for (int i = 0; i < normalizedString.Length; i++)
{
if (!CharUnicodeInfo.GetUnicodeCategory(normalizedString[i]).Equals(UnicodeCategory.NonSpacingMark))
{
result.Append(normalizedString[i]);
}
}
return result.ToString();
}
publicstaticstringremovediacritics(字符串文本)
{
Encoding srcEncoding=Encoding.UTF8;
Encoding destEncoding=Encoding.GetEncoding(1252);//拉丁字母
text=destcodencing.GetString(Encoding.Convert(srccodencing,destcodencing,srccodencing.GetBytes(text));
字符串normalizedString=text.Normalize(NormalizationForm.FormD);
StringBuilder结果=新建StringBuilder();
for(int i=0;i
这不是一个需要通用解决方案的问题。众所周知,土耳其语字母表中只有12个特殊字符需要规范化。这些是ı,İ,Ö,ç,Ü,Ü,Ğ,ş,Ş。您可以编写12条规则来替换这些规则:i、i、o、o、c、c、u、u、g、g、s、s。公共字符串TurkishCharacterToEnglish(字符串文本)
Public Function Ceng(ByVal _String As String) As String
Dim Source As String = "ığüşöçĞÜŞİÖÇ"
Dim Destination As String = "igusocGUSIOC"
For i As Integer = 0 To Source.Length - 1
_String = _String.Replace(Source(i), Destination(i))
Next
Return _String
End Function
{
char[]turkishChars={'ı'、Ğ'、Ğ'、Ğ'、Ğ'、ş'、Ş'、ö'、Ö'、Ü'};
char[]englishChars={'i','g','i','g','c','s','s','o','o','u','u'};
//火柴
for(int i=0;iı
。任何其他解决方案?var text=“ÜST”;var uncentedtext=String.Join(“,text.Normalize(NormalizationForm.FormD)。其中(c=>char.getunicodecegory(c)!=unicodecegory.nonspacemark))。替换(“i”)代码>//swh
public string TurkishCharacterToEnglish(string text)
{
char[] turkishChars = {'ı', 'ğ', 'İ', 'Ğ', 'ç', 'Ç', 'ş', 'Ş', 'ö', 'Ö', 'ü', 'Ü'};
char[] englishChars = {'i', 'g', 'I', 'G', 'c', 'C', 's', 'S', 'o', 'O', 'u', 'U'};
// Match chars
for (int i = 0; i < turkishChars.Length; i++)
text = text.Replace(turkishChars[i], englishChars[i]);
return text;
}