String 如何以最快的方式在大字符串中进行许多小更改。visualc&x2B+;
我花了一个多月的时间试图解决这个问题。我需要在一个大字符串(字符串^)中进行多次替换(超过1000万次)。我也需要快点。我的方法是正确的,但程序运行了30分钟以上 问题: 我有一个变更表要做:String 如何以最快的方式在大字符串中进行许多小更改。visualc&x2B+;,string,visual-c++,c++-cli,String,Visual C++,C++ Cli,我花了一个多月的时间试图解决这个问题。我需要在一个大字符串(字符串^)中进行多次替换(超过1000万次)。我也需要快点。我的方法是正确的,但程序运行了30分钟以上 问题: 我有一个变更表要做:[strWas1,strWillBe1,strWas2,strWillBe2,…,strWas10^7,strWillBe10^7]。我还有一个大字符串,它可以包含一些strWasN,但也可以包含something-elsestrWas1,我不想更改它,因为“something-elsestrWas1”不是
[strWas1,strWillBe1,strWas2,strWillBe2,…,strWas10^7,strWillBe10^7]
。我还有一个大字符串,它可以包含一些strWasN
,但也可以包含something-elsestrWas1
,我不想更改它,因为“something-elsestrWas1
”不是“strWas1
”
例如,字符串是:
“我有两只狗,三只诺特狗,还有狗四公,五只狗,狗,狗,
狗,狗,33DoGs00“
现在我需要将所有孤立的“狗”从字母(“狗”是strWas1)改为“猫”(“猫”是strWillBe1)。结果应该是:
“我有两只猫,三只野狗,还有狗四公,五只猫,猫,猫,
猫,猫,33cats00“
我最后一次尝试是:
array<String^>^ strArray = gcnew array<String^>(9999999);
strArray[0] = gcnew String("dogs");
strArray[1] = gcnew String("cats");
//...
strArray[9999998] = gcnew String("whatReplace");
strArray[9999999] = gcnew String("newText");
bool found = false;
int index;
bool doThis = true;
String ^ notAllowed = u8"aąbcćdeęfghijklłmnńoópqrsśtuvwxyzźżAĄBCĆDEĘFGHIJKLŁMNŃOÓPQRSŚTUVWXYZŹŻёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ";
String ^ text = u8"I have two dogs, three notdogs, also dogsikong, 5dogs, -dogs. DOGS, Dogs, DoGs, 33DoGs00";
for (int i = 0; i < 9999999; i+=2) {
while (found = text->Contains(strArray[i])) {
index = text->IndexOf(strArray[i]);
MessageBox::Show(index.ToString());
doThis = true;
if (index == 0) {
for (int j = 0; j < notAllowed->Length; j++) {
if (text->Substring(strArray[i]->Length, 1) == notAllowed->Substring(j, 1)) doThis = false;
}
}
else if (text->Length - index - strArray[i]->Length) {
for (int j = 0; j < notAllowed->Length; j++) {
if (text->Substring(index-1, 1) == notAllowed->Substring(j, 1)) doThis = false;
}
}
else {
for (int j = 0; j < notAllowed->Length; j++) {
if ((text->Substring(index - 1, 1) == notAllowed->Substring(j, 1)) || (text->Substring(index+strArray[i]->Length,1)== notAllowed->Substring(j, 1))) doThis = false;
}
}
if (doThis) {
text = text->Substring(0, index) + strArray[i + 1] + text->Substring(index + strArray[i]->Length, text->Length - index - strArray[i]->Length);
}
}
}
array^strArray=gcnewarray(999999);
strArray[0]=gcnew字符串(“狗”);
strArray[1]=gcnew字符串(“猫”);
//...
strArray[999998]=gcnew字符串(“whatReplace”);
strArray[999999]=gcnew字符串(“newText”);
bool-found=false;
整数指数;
bool doThis=正确;
“一个不被允许的字符串、未被允许的未被允许的字符串(未被允许的准准准准准留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留留非非非非非非非巴巴西西西西西亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚亚巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴亚亚亚亚亚亚亚巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴巴ЖЖЯЧСМЮБЮ”;
String^text=u8“我有两只狗,三只notdogs,还有dogsikong,5只狗,-狗。狗,狗,狗,33只狗00”;
对于(int i=0;i<999999;i+=2){
while(found=text->Contains(strArray[i])){
index=text->IndexOf(strArray[i]);
MessageBox::Show(index.ToString());
这是真的;
如果(索引==0){
对于(int j=0;jLength;j++){
如果(text->Substring(strArray[i]->Length,1)==notAllowed->Substring(j,1)),则该值为false;
}
}
else if(text->Length-index-strArray[i]->Length){
对于(int j=0;jLength;j++){
如果(text->Substring(index-1,1)==notAllowed->Substring(j,1))doThis=false;
}
}
否则{
对于(int j=0;jLength;j++){
如果((text->Substring(index-1,1)==notAllowed->Substring(j,1))| |(text->Substring(index+strArray[i]->Length,1)==notAllowed->Substring(j,1)),则该值为false;
}
}
如果(做这件事){
text=text->Substring(0,index)+strArray[i+1]+text->Substring(index+strArray[i]->Length,text->Length-index-strArray[i]->Length);
}
}
}
但这是无休止的工作
新版本(感谢弗拉德·范斯坦):
array<String^>^ strArray = gcnew array<String^>(10);
strArray[0] = gcnew String("dogs");
strArray[1] = gcnew String("cats");
strArray[2] = gcnew String("dogs");
strArray[3] = gcnew String("cats");
strArray[4] = gcnew String("dogs");
strArray[5] = gcnew String("cats");
strArray[6] = gcnew String("dogs");
strArray[7] = gcnew String("cats");
strArray[8] = gcnew String("dogs");
strArray[9] = gcnew String("cats");
bool found = false;
int index;
bool doThis = true;
String ^ text = u8"I have two dogs, three notdogs, also dogsikong, 5dogs, -dogs. DOGS, Dogs, DoGs, 33DoGs00";
for (int i = 0; i < 10; i += 2)
{
int index = 0;
while ((index = text->ToLower()->IndexOf(strArray[i]->ToLower(), index)) != -1)
{
doThis = true;
// is there one more char?
if (index + strArray[i]->Length < text->Length)
{
if (Char::IsLetter(text[index+strArray[i]->Length]))
doThis = false;
}
// is there previous char?
if (index > 0)
{
if (Char::IsLetter(text[index - 1]))
doThis = false;
}
if (doThis)
text = text->Substring(0, index) + strArray[i + 1] +
text->Substring(index + strArray[i]->Length);
Debug::WriteLine(text);
index++;
}
}
array^strArray=gcnewarray(10);
strArray[0]=gcnew字符串(“狗”);
strArray[1]=gcnew字符串(“猫”);
strArray[2]=gcnew字符串(“狗”);
strArray[3]=gcnew字符串(“猫”);
strArray[4]=gcnew字符串(“狗”);
strArray[5]=gcnew字符串(“猫”);
strArray[6]=gcnew字符串(“狗”);
strArray[7]=gcnew字符串(“猫”);
strArray[8]=gcnew字符串(“狗”);
strArray[9]=gcnew字符串(“猫”);
bool-found=false;
整数指数;
bool doThis=正确;
String^text=u8“我有两只狗,三只notdogs,还有dogsikong,5只狗,-狗。狗,狗,狗,33只狗00”;
对于(int i=0;i<10;i+=2)
{
int指数=0;
而((index=text->ToLower()->IndexOf(strArray[i]->ToLower(),index))!=-1)
{
这是真的;
//还有一个字符吗?
if(索引+字符串[i]->长度<文本->长度)
{
if(Char::isleter(text[index+strArray[i]->Length]))
doThis=假;
}
//有以前的字符吗?
如果(索引>0)
{
if(Char::isleter(text[index-1]))
doThis=假;
}
如果(做这件事)
text=text->Substring(0,索引)+strArray[i+1]+
文本->子字符串(索引+字符串[i]->长度);
调试::WriteLine(文本);
索引++;
}
}
当然它仍然不是那么快的版本。快速版编写了David Yaw。您的代码中有许多问题可能会导致问题,但主要的逻辑错误是:
while(found=text->Contains(strArray[i]))
应该是
while(found==text->Contains(strArray[i]))
因为
=
是比较运算符,而=
是赋值运算符。因此,您总是在分配,因此您的while循环处于无限循环中。代码中存在许多问题,这些问题可能会导致问题,但主要的逻辑错误是:
while(found=text->Contains(strArray[i]))
应该是
while(found==text->Contains(strArray[i]))
因为=
是比较运算符,而=
是赋值运算符。所以你总是在赋值,所以你的while循环在一个无限循环中。Hm。。。没有
while (found == text->Contains(strArray[i]))
这是为了比较。但是我以前没有计算找到的。所以我计算了在while中找到的,并检查它是否为真。这是允许的
while (found = text->Contains(strArray[i]))
这正是:
found = text->Contains(strArray[i])
while (found==true)
至少在正常C++中是有效的。在这里,我也没有这个问题。
嗯。。。没有
while (found == text->Contains(strArray[i]))
这是为了比较。但是我以前没有计算找到的。所以我计算了在while中找到的,并检查它是否为真。这是允许的
while (found = text->Contains(strArray[i]))
这正是:
found = text->Contains(strArray[i])
while (found==true)
至少在正常C++中是有效的。这里我也没有这个问题。
有一个更好的方法来做这件事,而不是盲目地检查一百万个替换字符串中的每一个。让.Net散列字符串,并让它以这种方式进行检查
如果我们将find&replace字符串作为字典接收,我们可以使用.Net的哈希查找来查找str
array<String^>^ SplitIntoWords(String^ input)
{
List<String^> result;
StringBuilder currentWord;
bool currentIsWord = false;
for each (System::Char c in input)
{
// Words are made up of letters. Word separators are made up of
// everything else (numbers, whitespace, punctuation, etc.)
bool nextCharIsWord = Char::IsLetter(c);
if(nextCharIsWord != currentIsWord)
{
if(currentWord.Length > 0)
{
result.Add(currentWord.ToString());
currentWord.Clear();
}
currentIsWord = nextCharIsWord;
}
currentWord.Append(c);
}
if(currentWord.Length > 0)
{
result.Add(currentWord.ToString());
currentWord.Clear();
}
return result.ToArray();
}
String^ BigFindReplaceWords(
String^ originalString,
Dictionary<String^, String^>^ replacementPairs)
{
StringBuilder result;
// First, separate the input string into an array of words & non-words.
array<String^>^ asWords = SplitIntoWords(originalString);
// Go through each word & non-word that came out of the split. If a word or
// non-word is in the replacement list, add the replacement to the output.
// Otherwise, add the word/nonword to the output.
for each (String^ word in asWords)
{
String^ replaceWith;
if (replacementPairs->TryGetValue(word, replaceWith))
{
result.Append(replaceWith);
}
else
{
result.Append(word);
}
}
return result.ToString();
}
int main(array<System::String ^> ^args)
{
String^ text = "I have two dogs, three notdogs, also dogsikong, 5dogs, -dogs. DOGS, Dogs, DoGs, 33DoGs00";
array<String^>^ words = SplitIntoWords(text);
for (int i = 0; i < words->Length; i++)
{
Debug::WriteLine("words[{0}] = '{1}'", i, words[i]);
}
Dictionary<String^, String^>^ replacementPairs =
gcnew Dictionary<String^, String^>(StringComparer::CurrentCultureIgnoreCase);
replacementPairs->Add("dogs", "cats");
replacementPairs->Add("pigs", "cats");
replacementPairs->Add("mice", "cats");
replacementPairs->Add("rats", "cats");
replacementPairs->Add("horses", "cats");
String^ outText = BigFindReplaceWords(text, replacementPairs);
Debug::WriteLine(outText);
String^ text2 = "I have two dogs, three notpigs, also miceikong, 5rats, -dogs. RATS, Horses, DoGs, 33DoGs00";
String^ outText2 = BigFindReplaceWords(text2, replacementPairs);
Debug::WriteLine(outText2);
return 0;
}
words[0] = 'I'
words[1] = ' '
words[2] = 'have'
words[3] = ' '
words[4] = 'two'
words[5] = ' '
words[6] = 'dogs'
words[7] = ', '
words[8] = 'three'
words[9] = ' '
words[10] = 'notdogs'
words[11] = ', '
words[12] = 'also'
words[13] = ' '
words[14] = 'dogsikong'
words[15] = ', 5'
words[16] = 'dogs'
words[17] = ', -'
words[18] = 'dogs'
words[19] = '. '
words[20] = 'DOGS'
words[21] = ', '
words[22] = 'Dogs'
words[23] = ', '
words[24] = 'DoGs'
words[25] = ', 33'
words[26] = 'DoGs'
words[27] = '00'
I have two cats, three notdogs, also dogsikong, 5cats, -cats. cats, cats, cats, 33cats00
I have two cats, three notpigs, also miceikong, 5cats, -cats. cats, cats, cats, 33cats00
for (int i = 0; i < 9999999; i += 2)
{
int index = 0;
while ((index = text->IndexOf(strArray[i], index)) != -1)
{
doThis = true;
// is there one more char?
if (index + strArray[i]->Length < text->Length)
{
if(Char.IsLetter(text->Char[strArray[i]->Length]))
doThis = false;
}
// is there previous char?
if (index > 0)
{
if (Char.IsLetter(text->Char[index - 1]))
doThis = false;
}
if (doThis)
text = text->Substring(0, index) + strArray[i + 1] +
text->Substring(index + strArray[i]->Length);
}
}