Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/331.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 以最快的方式替换字符串中的多个字符?_C#_Regex_String_Performance_Replace - Fatal编程技术网

C# 以最快的方式替换字符串中的多个字符?

C# 以最快的方式替换字符串中的多个字符?,c#,regex,string,performance,replace,C#,Regex,String,Performance,Replace,我正在将一些带有多个string字段的记录从旧数据库导入新数据库。它似乎非常慢,我怀疑这是因为我这样做: foreach (var oldObj in oldDB) { NewObject newObj = new NewObject(); newObj.Name = oldObj.Name.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š') .Replace(']', 'Ć').Replace

我正在将一些带有多个
string
字段的记录从旧数据库导入新数据库。它似乎非常慢,我怀疑这是因为我这样做:

foreach (var oldObj in oldDB)
{
    NewObject newObj = new NewObject();
    newObj.Name = oldObj.Name.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    newObj.Surname = oldObj.Surname.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    newObj.Address = oldObj.Address.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    newObj.Note = oldObj.Note.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    /*
    ... some processing ...
    */
}
现在,我通过网络阅读了一些帖子和文章,在那里我看到了许多不同的想法。有人说如果我使用
MatchEvaluator
来处理正则表达式会更好,有人说最好保持原样

虽然我可能更容易为自己做一个基准案例,但我决定在这里问一个问题,以防其他人对相同的问题感到疑惑,或者有人提前知道

那么在C#中,最快的方法是什么呢

编辑

我已经发布了基准。乍一看,理查德的路线可能是最快的。然而,由于错误的正则表达式模式,他的方法和马克的方法都不会起任何作用。在从

@"\^@\[\]`\}~\{\\" 

似乎使用chained.Replace()调用的旧方法是最快的

尝试以下方法:

Dictionary<char, char> replacements = new Dictionary<char, char>();
// populate replacements

string str = "mystring";
char []charArray = str.ToCharArray();

for (int i = 0; i < charArray.Length; i++)
{
    char newChar;
    if (replacements.TryGetValue(str[i], out newChar))
    charArray[i] = newChar;
}

string newStr = new string(charArray);
字典替换=新字典();
//填充替换项
string str=“mystring”;
char[]charArray=str.ToCharArray();
for(int i=0;i
一种可能的解决方案是为此使用
StringBuilder

您可以首先将代码重构为单个方法

public string DoGeneralReplace(string input)
{
    var sb = new StringBuilder(input);
    sb.Replace("^", "Č")
      .Replace("@", "Ž") ...;
}


//usage
foreach (var oldObj in oldDB)
{
    NewObject newObj = new NewObject();
    newObj.Name = DoGeneralReplace(oldObj.Name);
    ...
}
嗯,我会尝试做一些类似的事情:

    static readonly Dictionary<char, string> replacements =
       new Dictionary<char, string>
    {
        {']',"Ć"}, {'~', "č"} // etc
    };
    static readonly Regex replaceRegex;
    static YourUtilityType() // static initializer
    {
        StringBuilder pattern = new StringBuilder().Append('[');
        foreach(var key in replacements.Keys)
            pattern.Append(Regex.Escape(key.ToString()));
        pattern.Append(']');
        replaceRegex = new Regex(pattern.ToString(), RegexOptions.Compiled);
    }
    public static string Sanitize(string input)
    {
        return replaceRegex.Replace(input, match =>
        {
            return replacements[match.Value[0]];
        });
    }
newObj.Name = Regex.Replace(oldObj.Name.Trim(), @"[@^\[\]`}~{\\]", match => {
  switch (match.Value) {
    case "^": return "Č";
    case "@": return "Ž";
    case "[": return "Š";
    case "]": return "Ć";
    case "`": return "ž";
    case "}": return "ć";
    case "~": return "č";
    case "{": return "š";
    case "\\": return "Đ";
    default: throw new Exception("Unexpected match!");
  }
});
静态只读字典替换=
新词典
{
{']'、“Ć”}、{'~'、“č”}//等
};
静态只读正则表达式replaceRegex;
静态YourUtilityType()//静态初始值设定项
{
StringBuilder模式=新建StringBuilder();
foreach(替换中的var键。键)
Append(Regex.Escape(key.ToString());
pattern.Append(']');
replaceRegex=new Regex(pattern.ToString(),RegexOptions.Compiled);
}
公共静态字符串清理(字符串输入)
{
返回replaceRegex.Replace(输入,匹配=>
{
返回替换项[match.Value[0]];
});
}

它只有一个地方需要维护(在顶部),并构建一个预编译的
Regex
来处理替换。所有的开销都是一次完成的(因此
静态
)。

您可以使用lambda表达式在字符映射上使用聚合:

  //Method for replacing chars with a mapping
  static string Replace(string input, IDictionary<char, char> replacementMap) {
      return replacementMap.Keys
          .Aggregate(input, (current, oldChar) 
              => current.Replace(oldChar, replacementMap[oldChar]));
  }
//用映射替换字符的方法
静态字符串替换(字符串输入,IDictionary replacementMap){
返回replacementMap.Keys
.聚合(输入,(当前,oldChar)
=>当前.Replace(oldChar,replacementMap[oldChar]);
}
您可以按如下方式运行此操作:

  private static void Main(string[] args) {
      //Char to char map using <oldChar, newChar>
      var charMap = new Dictionary<char, char>();
      charMap.Add('-', 'D'); charMap.Add('|', 'P'); charMap.Add('@', 'A');

      //Your input string
      string myString = "asgjk--@dfsg||jshd--f@jgsld-kj|rhgunfh-@-nsdflngs";

      //Your own replacement method
      myString = Replace(myString, charMap);

      //out: myString = "asgjkDDAdfsgPPjshdDDfAjgsldDkjPrhgunfhDADnsdflngs"
  }
*** Pass 1 Old (Chained String.Replace()) way completed in 814 ms logicnp (ToCharArray) way completed in 916 ms oleksii (StringBuilder) way completed in 943 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2551 ms Richard (Regex w/ MatchEvaluator) way completed in 215 ms Marc Gravell (Static Regex) way completed in 1008 ms *** Pass 2 Old (Chained String.Replace()) way completed in 786 ms logicnp (ToCharArray) way completed in 920 ms oleksii (StringBuilder) way completed in 905 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2515 ms Richard (Regex w/ MatchEvaluator) way completed in 217 ms Marc Gravell (Static Regex) way completed in 1025 ms *** Pass 3 Old (Chained String.Replace()) way completed in 775 ms logicnp (ToCharArray) way completed in 903 ms oleksii (StringBuilder) way completed in 931 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2529 ms Richard (Regex w/ MatchEvaluator) way completed in 214 ms Marc Gravell (Static Regex) way completed in 1022 ms *** Pass 4 Old (Chained String.Replace()) way completed in 799 ms logicnp (ToCharArray) way completed in 908 ms oleksii (StringBuilder) way completed in 938 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2592 ms Richard (Regex w/ MatchEvaluator) way completed in 225 ms Marc Gravell (Static Regex) way completed in 1050 ms
private static void Main(字符串[]args){
//使用
var charMap=新字典();
charMap.Add('-','D');charMap.Add('|','P');charMap.Add('@','A');
//您的输入字符串
string myString=“asgjk--@dfsg | | jshd--f@jgsld-kj| rhgunfh-@-nsdflngs”;
//你自己的替换方法
myString=Replace(myString,charMap);
//out:myString=“asgjkddadfsgppjshdddfajgsldkjprhgunfhdnsdflngs”
}
最快的方法

唯一的方法是自己比较性能。使用
StringBuilder
Regex.Replace
尝试Q中的方法

但是微基准不考虑整个系统的范围。如果此方法只是整个系统的一小部分,那么其性能可能与整个应用程序的性能无关

一些注意事项:

  • 如上所述(我假设)使用
    String
    将创建大量中间字符串:GC需要做更多的工作。但这很简单
  • 使用
    StringBuilder
    可以在每次替换时修改相同的基础数据。这会减少垃圾的产生。它几乎和使用
    String
    一样简单
  • 使用表达式是最复杂的(因为您需要代码来完成替换),但只允许使用一个表达式。我希望这会更慢,除非替换列表非常大,并且在输入字符串中很少有替换(即,大多数替换方法调用不替换任何内容,只需搜索字符串)
  • 我预计#2会比重复使用(数千次)稍微快一点,因为GC负载更少

    对于regex方法,您需要类似以下内容:

        static readonly Dictionary<char, string> replacements =
           new Dictionary<char, string>
        {
            {']',"Ć"}, {'~', "č"} // etc
        };
        static readonly Regex replaceRegex;
        static YourUtilityType() // static initializer
        {
            StringBuilder pattern = new StringBuilder().Append('[');
            foreach(var key in replacements.Keys)
                pattern.Append(Regex.Escape(key.ToString()));
            pattern.Append(']');
            replaceRegex = new Regex(pattern.ToString(), RegexOptions.Compiled);
        }
        public static string Sanitize(string input)
        {
            return replaceRegex.Replace(input, match =>
            {
                return replacements[match.Value[0]];
            });
        }
    
    newObj.Name = Regex.Replace(oldObj.Name.Trim(), @"[@^\[\]`}~{\\]", match => {
      switch (match.Value) {
        case "^": return "Č";
        case "@": return "Ž";
        case "[": return "Š";
        case "]": return "Ć";
        case "`": return "ž";
        case "}": return "ć";
        case "~": return "č";
        case "{": return "š";
        case "\\": return "Đ";
        default: throw new Exception("Unexpected match!");
      }
    });
    

    这可以通过一种可重复使用的方式来完成,方法是使用一个参数来保存替换件并可重复使用。

    感谢大家的投入。 我编写了一个快速而肮脏的基准测试来测试您的输入。我已经测试了用500.000次迭代解析4个字符串,并完成了4次传递。结果如下:

      private static void Main(string[] args) {
          //Char to char map using <oldChar, newChar>
          var charMap = new Dictionary<char, char>();
          charMap.Add('-', 'D'); charMap.Add('|', 'P'); charMap.Add('@', 'A');
    
          //Your input string
          string myString = "asgjk--@dfsg||jshd--f@jgsld-kj|rhgunfh-@-nsdflngs";
    
          //Your own replacement method
          myString = Replace(myString, charMap);
    
          //out: myString = "asgjkDDAdfsgPPjshdDDfAjgsldDkjPrhgunfhDADnsdflngs"
      }
    
    *** Pass 1 Old (Chained String.Replace()) way completed in 814 ms logicnp (ToCharArray) way completed in 916 ms oleksii (StringBuilder) way completed in 943 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2551 ms Richard (Regex w/ MatchEvaluator) way completed in 215 ms Marc Gravell (Static Regex) way completed in 1008 ms *** Pass 2 Old (Chained String.Replace()) way completed in 786 ms logicnp (ToCharArray) way completed in 920 ms oleksii (StringBuilder) way completed in 905 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2515 ms Richard (Regex w/ MatchEvaluator) way completed in 217 ms Marc Gravell (Static Regex) way completed in 1025 ms *** Pass 3 Old (Chained String.Replace()) way completed in 775 ms logicnp (ToCharArray) way completed in 903 ms oleksii (StringBuilder) way completed in 931 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2529 ms Richard (Regex w/ MatchEvaluator) way completed in 214 ms Marc Gravell (Static Regex) way completed in 1022 ms *** Pass 4 Old (Chained String.Replace()) way completed in 799 ms logicnp (ToCharArray) way completed in 908 ms oleksii (StringBuilder) way completed in 938 ms André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2592 ms Richard (Regex w/ MatchEvaluator) way completed in 225 ms Marc Gravell (Static Regex) way completed in 1050 ms ***通过1 旧(链接字符串.Replace())方式在814毫秒内完成 logicnp(ToCharArray)方式在916毫秒内完成 oleksii(StringBuilder)通道在943毫秒内完成 安德烈·克里斯托弗·安徒生(Lambda w/骨料)道路在2551毫秒内完成 Richard(带MatchEvaluator的正则表达式)在215毫秒内完成 Marc Gravell(静态正则表达式)方式在1008毫秒内完成 ***通过2 旧(链式字符串.Replace())方式在786毫秒内完成 logicnp(ToCharArray)方式在920毫秒内完成 oleksii(StringBuilder)通道在905毫秒内完成 安德烈·克里斯托弗·安徒生(Lambda w/Aggregate)路2515毫秒完成 Richard(带MatchEvaluator的正则表达式)在217毫秒内完成 Marc Gravell(静态正则表达式)方式在1025毫秒内完成 ***通过3 在775毫秒内完成旧(链接字符串.Replace())方式 logicnp(ToCharArray)方式在903毫秒内完成 oleksii(StringBuilder)通道在931毫秒内完成 安德烈·克里斯托弗·安徒生(Lambda w/Aggregate)道路于2529毫秒内完工 Richard(带MatchEvaluator的正则表达式)在214毫秒内完成 Marc Gravell(静态正则表达式)方式在1022毫秒内完成 ***通过4 旧的(链接字符串.Replace())