C# 如何将字符串中的土耳其字符转换为英语字符?

C# 如何将字符串中的土耳其字符转换为英语字符?,c#,encoding,C#,Encoding,字符串strurkish=“ÜST” 如何将strurkish的值设置为“UST”?我不是这方面的专家,但我认为您可以通过分解值,然后有效地删除非ASCII字符来实现这一点: using System; using System.Linq; using System.Text; class Test { static void Main() { string text = "\u00DCST"; string normalized = text.

字符串strurkish=“ÜST”


如何将strurkish的值设置为“UST”?

我不是这方面的专家,但我认为您可以通过分解值,然后有效地删除非ASCII字符来实现这一点:

using System;
using System.Linq;
using System.Text;

class Test
{
    static void Main()
    {
        string text = "\u00DCST";
        string normalized = text.Normalize(NormalizationForm.FormD);
        string asciiOnly = new string(normalized.Where(c => c < 128).ToArray());
        Console.WriteLine(asciiOnly);
    }    
}
使用系统;
使用System.Linq;
使用系统文本;
课堂测试
{
静态void Main()
{
字符串text=“\u00DCST”;
字符串规格化=text.Normalize(NormalizationForm.FormD);
string ascionly=新字符串(标准化的.Where(c=>c<128.ToArray());
控制台写入线(ASCIONLY);
}    
}

但在某些情况下,这完全有可能造成可怕的后果。

您可以使用以下方法来解决问题。其他方法无法正确转换“Turkish Lowercase I(\u0131)”

var text = "ÜST";
var unaccentedText  = String.Join("", text.Normalize(NormalizationForm.FormD)
        .Where(c => char.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark));
public static string RemoveDiacritics(string text)
{
    Encoding srcEncoding = Encoding.UTF8;
    Encoding destEncoding = Encoding.GetEncoding(1252); // Latin alphabet

    text = destEncoding.GetString(Encoding.Convert(srcEncoding, destEncoding, srcEncoding.GetBytes(text)));

    string normalizedString = text.Normalize(NormalizationForm.FormD);
    StringBuilder result = new StringBuilder();

    for (int i = 0; i < normalizedString.Length; i++)
    {
        if (!CharUnicodeInfo.GetUnicodeCategory(normalizedString[i]).Equals(UnicodeCategory.NonSpacingMark))
        {
            result.Append(normalizedString[i]);
        }
    }

    return result.ToString();
}
publicstaticstringremovediacritics(字符串文本)
{
Encoding srcEncoding=Encoding.UTF8;
Encoding destEncoding=Encoding.GetEncoding(1252);//拉丁字母
text=destcodencing.GetString(Encoding.Convert(srccodencing,destcodencing,srccodencing.GetBytes(text));
字符串normalizedString=text.Normalize(NormalizationForm.FormD);
StringBuilder结果=新建StringBuilder();
for(int i=0;i
这不是一个需要通用解决方案的问题。众所周知,土耳其语字母表中只有12个特殊字符需要规范化。这些是ı,İ,Ö,ç,Ü,Ü,Ğ,ş,Ş。您可以编写12条规则来替换这些规则:i、i、o、o、c、c、u、u、g、g、s、s。公共字符串TurkishCharacterToEnglish(字符串文本)
Public Function Ceng(ByVal _String As String) As String
    Dim Source As String = "ığüşöçĞÜŞİÖÇ"
    Dim Destination As String = "igusocGUSIOC"
    For i As Integer = 0 To Source.Length - 1
        _String = _String.Replace(Source(i), Destination(i))
    Next
    Return _String
End Function
{ char[]turkishChars={'ı'、Ğ'、Ğ'、Ğ'、Ğ'、ş'、Ş'、ö'、Ö'、Ü'}; char[]englishChars={'i','g','i','g','c','s','s','o','o','u','u'}; //火柴 for(int i=0;i这是某种UTF到ASCII的映射吗?所有土耳其语字符是否只映射到a-Z a-Z范围内的一个字符?这不会规范化
ı
。任何其他解决方案?
var text=“ÜST”;var uncentedtext=String.Join(“,text.Normalize(NormalizationForm.FormD)。其中(c=>char.getunicodecegory(c)!=unicodecegory.nonspacemark))。替换(“i”)//swh
public string TurkishCharacterToEnglish(string text)
{
    char[] turkishChars = {'ı', 'ğ', 'İ', 'Ğ', 'ç', 'Ç', 'ş', 'Ş', 'ö', 'Ö', 'ü', 'Ü'};
    char[] englishChars = {'i', 'g', 'I', 'G', 'c', 'C', 's', 'S', 'o', 'O', 'u', 'U'};
    
    // Match chars
    for (int i = 0; i < turkishChars.Length; i++)
        text = text.Replace(turkishChars[i], englishChars[i]);

    return text;
}