C# 使用CurrentCultureInogoreCase首次取消匹配索引_C#_String

C# 使用CurrentCultureInogoreCase首次取消匹配索引

c# string

C# 使用CurrentCultureInogoreCase首次取消匹配索引,c#,string,C#,String,我需要支持可以在输入文本中使用非ascii字母的语言，因此我需要为FirstUnmatchedIndex实现StringComparison.CurrentCultureInogoreCase。忽略大小写并不坏，但我不知道如何将组合符号转换为标准表示，然后进行比较。所以这里有几个例子，函数应该返回-1，但是返回其他的东西 encyclopædia = encyclopaedia Archæology = Archaeology ARCHÆOLOGY = archaeology Archæolog

我需要支持可以在输入文本中使用非ascii字母的语言，因此我需要为FirstUnmatchedIndex实现StringComparison.CurrentCultureInogoreCase。忽略大小写并不坏，但我不知道如何将组合符号转换为标准表示，然后进行比较。所以这里有几个例子，函数应该返回-1，但是返回其他的东西

encyclopædia = encyclopaedia
Archæology = Archaeology
ARCHÆOLOGY = archaeology
Archæology = archaeology
Weißbier = WEISSBIER

如何知道一个字符是否需要扩展，并在需要时将每个字符转换为扩展形式

/// <summary>
/// Gets a first different char occurence index
/// </summary>
/// <param name="a">First string</param>
/// <param name="b">Second string</param>
/// <param name="compareSmallest">
/// If true, Returns the first difference found or -1 if the end of a string is reached without finding a difference.
/// IE, Return -1 if the smallest string is contained in the other.
/// Otherwise returns -1 only if both string are really the same and will return the position where the smallest string ends if no difference is found.
/// </param>
/// <returns>
/// Returns first difference index or -1 if no difference is found
/// </returns>
public static int FirstUnmatchedIndex(this string a, string b, bool compareSmallest = false, StringComparison comparisonType = StringComparison.CurrentCulture)
{
    //Treat null as empty
    if (String.IsNullOrEmpty(a)) {
        if (String.IsNullOrEmpty(b)) {
            //Equal, both empty.
            return -1;
        } else {
            //If compareSmallest, empty is always found in longest.
            //Otherwise, difference at pos 0.
            return compareSmallest ? -1 : 0;
        }
    }
    if (object.ReferenceEquals(a, b)) {
        //Same Ref.
        return -1;
    }

    //Convert strings before compare.
    switch (comparisonType) {
        case StringComparison.CurrentCulture:
            //FIXME
            break;
        case StringComparison.CurrentCultureIgnoreCase:
            //FIXME
            var currentCulture = System.Globalization.CultureInfo.CurrentCulture;
            a = a.ToLower(currentCulture);
            b = b.ToLower(currentCulture);
            break;
        case StringComparison.InvariantCulture:
            //FIXME
            break;
        case StringComparison.InvariantCultureIgnoreCase:
            //FIXME
            a = a.ToLowerInvariant();
            b = b.ToLowerInvariant();
            break;
        case StringComparison.OrdinalIgnoreCase:
            a = a.ToLower();
            b = b.ToLower();
            break;
        case StringComparison.Ordinal:
            //Ordinal(Binary) comprare, nothing special to do.
        default:
            break;
    }

    string longStr = a.Length > b.Length ? a : b;
    string shortStr = a.Length > b.Length ? b : a;

    int count = shortStr.Length;
    for (int idx = 0; idx < count; idx++) {
        //FIXME Check if char needs to be expanded ?
        if (shortStr[idx] != longStr[idx]) {
            return idx;
        }
    }
    return compareSmallest || longStr.Length == count ? -1 : count;
}

//
///获取第一个不同的字符出现索引
/// 
///第一串
///二线
/// 
///如果为true，则返回找到的第一个差异；如果到达字符串结尾时未找到差异，则返回-1。
///即，如果最小字符串包含在另一个字符串中，则返回-1。
///否则，仅当两个字符串实际上相同时才返回-1，如果没有发现差异，则返回最小字符串结束的位置。
/// 
/// 
///返回第一个差异索引，如果未找到差异，则返回-1
/// 
public static int firstunmatchdindex（此字符串a、字符串b、bool comparesmalest=false、stringcomparationcomparisontype=StringComparison.CurrentCulture）
{
//将null视为空
if（String.IsNullOrEmpty（a））{
if（String.IsNullOrEmpty（b））{
//相等，都是空的。
返回-1；
}否则{
//如果compareSmallest，则总是在最长路径中找到空。
//否则，位置0处的差异。
返回compareSmallest？-1:0；
}
}
if（object.ReferenceEquals（a，b））{
//相同的参考号。
返回-1；
}
//在比较之前转换字符串。
开关（比较类型）{
case StringComparison.CurrentCulture：
//修理工
打破
case StringComparison.CurrentCultureInoRecase：
//修理工
var currentCulture=System.Globalization.CultureInfo.currentCulture；
a=a.ToLower（当前文化）；
b=b.ToLower（当前文化）；
打破
case StringComparison.InvariantCulture：
//修理工
打破
case StringComparison.InvariantCultureInogoreCase：
//修理工
a=a.ToLowerInvariant（）；
b=b.ToLowerInvariant（）；
打破
案例StringComparison.OrdinalIgnoreCase：
a=a.ToLower（）；
b=b.ToLower（）；
打破
案例比较。序数：
//序数（二进制）压缩，没有什么特别的事情要做。
违约：
打破
}
字符串长度TR=a.长度>b.长度？a:b；
字符串短STR=a.长度>b.长度？b:a；
int count=短str.Length；
for（int idx=0；idx

我不确定是否正确理解了您的问题，但您可以使用“字典+正则表达式”组合。这个想法是用您想要扩展的字符创建字典，并在regex的帮助下查找它们。下面的代码显示了如何执行此操作的示例

正则表达式的解释：

```
（？i）
```
-这将启用不区分大小写的搜索（与
```
RegexOptions.IgnoreCase
```
，但为串联）
```
[^\p{IsBasicLatin}]+
```
- 这将搜索所有不符合基本拉丁语的字符字符集（从
```
\u0000
```
到
```
\u007F
```
）

代码使用

ToLower

方法，避免向字典中添加大写非拉丁字符。当然，如果您希望显式（即，将所有小写和大写字符都添加到字典中，并删除

ToLower

），您可以不这样做

var dic=新字典
{
[“æ”]=“ae”，
[“ß”]=“ss”
};
var words=新[]{“百科全书”、“拱门学”、“拱门学”、“拱门学”、“魏比尔”}；
var模式=@“（？i）[^\p{IsBasicLatin}]+”；
int x=-1；
foreach（单词中的var单词）
{
//每个匹配项（m.Value）都传递给字典
单词[++x]=Regex.Replace（单词，模式，m=>dic[m.Value.ToLower（））；
}
words.ToList（）.ForEach（WriteLine）；
/*
输出：
百科全书
考古
考古学
考古
白啤酒
*/

由于我已经对字符串进行了迭代，我认为使用regex并对输入进行两次解析（这种情况并不总是发生）并不是最好的选择。我想我必须手动检查每个字符是否不大于\u007F，如果大于，则检查是否有扩展该字符的值。遗憾的是，由于Net女士已经在处理这些案件，目前还没有此类角色的列表。。。

var dic = new Dictionary<string, string>
{
    ["æ"] = "ae",
    ["ß"] = "ss"
};

var words = new[] { "encyclopædia", "Archæology", "ARCHÆOLOGY", "Archæology", "Weißbier" };
var pattern = @"(?i)[^\p{IsBasicLatin}]+";

int x = -1;
foreach(var word in words)
{
    // Each match (m.Value) is passed to dictionary 
    words[++x] = Regex.Replace(word, pattern, m => dic[m.Value.ToLower()]);
}
words.ToList().ForEach(WriteLine);

/*
    Output:
        encyclopaedia
        Archaeology
        ARCHaeOLOGY
        Archaeology
        Weissbier
*/