查找字符串是否是C中的迭代子字符串算法？问题描述_C_String_Algorithm_Function_Sorting

查找字符串是否是C中的迭代子字符串算法？问题描述

c string algorithm function sorting

查找字符串是否是C中的迭代子字符串算法？问题描述,c,string,algorithm,function,sorting,C,String,Algorithm,Function,Sorting,我有一个字符串S。如何查找该字符串是否跟在S=nT后面示例：如果 1) S = "abab" 2) S = "abcdabcd" 3) S = "abcabcabc" 4) S = "zzxzzxzzx" 但是如果S=“abcb”返回false 我想也许我们可以在S的子串上反复调用KMP，然后决定 i、 e: 对于“abab”：在“a”上访问国民党。它返回2（两个实例）。现在2*len（“a”

我有一个字符串S。如何查找该字符串是否跟在

S=nT

后面

示例：如果

1) S = "abab"  
2) S = "abcdabcd"  
3) S = "abcabcabc"  
4) S = "zzxzzxzzx"

但是如果

S=“abcb”

返回false

我想也许我们可以在S的子串上反复调用KMP，然后决定

i、 e:

对于“abab”：在“a”上访问国民党。它返回2（两个实例）。现在

2*len（“a”）=镜头

在“ab”上访问KMP。它返回2。现在

2*len（“ab”）==len（s）

你能推荐更好的算法吗？

我能想到一种启发式方法，只有当Len（原始字符串）/Len of（子字符串）是正整数时，才对子字符串调用KMP

此外，子字符串的最大长度必须小于N/2

编辑使用这些启发式方法，我编写了下面的python代码，因为我的C目前已经生锈了

oldstr='ABCDABCD'    

for i in xrange(0,len(oldstr)/2):
       newslice=oldstr[0:i+1]
         if newslice*(len(oldstr)/len(newslice)) == oldstr:
             print 'pattern found', newslice
             break

我想您可以尝试以下算法：

使

成为生成原始单词的可能子字符串长度。对于

从

到

strlen（s）/2

检查

从1到

strlen（s）/L

的所有

L*i

位置是否都获取了第一个字符。如果是这样，那么它可能是一个可行的解决方案，您应该使用

memcmp

检查它，如果不是，请尝试下一个

。当然，您可以跳过一些未进行除法的

值

    char s[] = "abcabcabcabc";
int nStringLength = strlen (s);
int nMaxCheckLength = nStringLength / 2;
int nThisOffset;
int nNumberOfSubStrings;
char cMustMatch;
char cCompare;
BOOL bThisSubStringLengthRepeats;
// Check all sub string lengths up to half the total length
for (int nSubStringLength = 1;  nSubStringLength <= nMaxCheckLength;  nSubStringLength++)
{
    // How many substrings will there be?
    nNumberOfSubStrings = nStringLength / nSubStringLength;

    // Only check substrings that fit exactly
    if (nSubStringLength * nNumberOfSubStrings == nStringLength)
    {
        // Assume it's going to be ok
        bThisSubStringLengthRepeats = TRUE;

        // check each character in substring
        for (nThisOffset = 0;  nThisOffset < nSubStringLength;  nThisOffset++)
        {
            // What must it be?
            cMustMatch = s [nThisOffset];

            // check each substring's char in that position
            for (int nSubString = 1;  nSubString < nNumberOfSubStrings;  nSubString++)
            {
                cCompare = s [(nSubString * nSubStringLength) + nThisOffset];
                // Don't bother checking more if this doesn't match
                if (cCompare != cMustMatch)
                {
                    bThisSubStringLengthRepeats = FALSE;
                    break;
                }
            }

            // Stop checking this substring
            if (!bThisSubStringLengthRepeats)
            {
                break;
            }
        }

        // We have found a match!
        if (bThisSubStringLengthRepeats)
        {
            return TRUE;
        }
    }
}

// We went through the whole lot, but no matches found
return FALSE;

chars[]=“abcabc”；
int nStringLength=strlen（s）；
int nMaxCheckLength=nStringLength/2；
int-Fset；
int n子串的数目；
字符匹配；
炭比较；
BOOL BthisSubstringLength重复；
//检查所有子串长度，最大为总长度的一半
对于（int nSubStringLength=1；nSubStringLength我看不出KMP算法在这种情况下有什么帮助。这不是决定从哪里开始下一个匹配的问题。似乎减少搜索时间的一种方法是从最长的可能性（长度的一半）开始向下看。唯一需要测试的长度是平均分为总长度的长度。这是Ruby中的一个例子。我应该补充一点，我意识到问题被标记为C
，但这只是一种简单的方式来显示我所考虑的算法（并允许我测试它是否有效）
这是Java代码，但您应该了解：
        String str = "ababcababc";
    int repPos = 0;
    int repLen = 0;
    for( int i = 0; i < str.length(); i++ ) {
        if( repLen == 0 ) {
            repLen = 1;
        } else {
            char c = str.charAt( i );
            if( c == str.charAt( repPos ) ) {
                repPos = ++repPos % repLen;
            } else {
                repLen = i+1;
            }
        }
    }

String str=“ababcbc”；
int repPos=0；
int repLen=0；
对于（int i=0；i

这将返回最短重复块的长度，或者如果没有重复，则返回字符串的长度。
您可以构建字符串的后缀数组，并对其排序。

现在寻找一系列不断增加的后缀，当你达到一个等于整个字符串大小的后缀时，系列中的第一个后缀会给你T
例如：
abcd <-- T
abcdabcd <-- S
bcd
bcdabcd
cd
cdabcd
d
dabcd

x
xzzx
xzzxzzx
zx
zxzzx
zxzzxzzx
zzx <-- T
zzxzzx
zzxzzxzzx <-- S

a
apa
apapa
apapapa
pa <-- T
papa
papapa <-- Another T, not detected by this algo
papapapa <-- S

abcd实际上，您只需要关心测试子字符串长度，这些子字符串长度等于完整字符串长度除以素数。原因是：如果S包含T的n个副本，而n不是素数，则n=ab，因此S实际上也包含bT的一个副本（其中“bT”表示“T重复b次”）.这是本书的延伸
int primes[]={2,3,5,7,11,13,17}；/*还有一个或两个…）*/
int nPrimes=sizeof primes/sizeof primes[0]；
/*传入字符串长度而不是假定ASCIIZ字符串意味着
*不必就地修改字符串或为新副本分配内存
*处理递归*/
int是迭代的（char*s，int len）{
int i，j；
对于（i=0；i

请注意，当递归查找更短的重复子字符串时，我们不需要再次检查整个字符串，只需检查第一个较大的重复，因为我们已经确定剩余的大重复是第一个的重复。：）
 蛮力方法是选择所有可能的子字符串，看看它们是否可以构成整个字符串
我们可以使用以下观察结果做得更好：对于有效候选的子字符串len（str）%len（substr）==0
。这可以从问题陈述中推断出来
以下是完整的代码：
bool isRational(const string &str){
    int len = str.length();
    const auto &factors = getFactors(len); // this would include 1 but exclude len
    // sort(factors.begin(), factors.end()); To get out of the loop faster. Why? See https://stackoverflow.com/a/4698155/1043773
    for(auto iter = factors.rbegin(); iter != factors.rend(); ++iter){
        auto factor = *iter;
        bool result = true;
        for(int i = 0; i < factor && result; ++i){
            for(int j = i + factor; j < len; j += factor, ++cntr){
                if (str[i] != str[j]) { result = false; break; }
            }
        }

        if (result) { return true;}
    }
    return false;
}

bool isRational（常量字符串和str）{
int len=str.length（）；
const auto&factors=getFactors（len）；//这将包括1，但不包括len
//排序（factors.begin（）、factors.end（））；以更快地脱离循环。为什么？请参阅https://stackoverflow.com/a/4698155/1043773
对于（自动iter=factors.rbegin（）；iter！=factors.rend（）；++iter）{
自动系数=*iter；
布尔结果=真；
对于（int i=0；iint primes[] = { 2, 3, 5, 7, 11, 13, 17 };  /* There are one or two more... ;) */
int nPrimes = sizeof primes / sizeof primes[0];

/* Passing in the string length instead of assuming ASCIIZ strings means we
 * don't have to modify the string in-place or allocate memory for new copies
 * to handle recursion. */
int is_iterative(char *s, int len) {
    int i, j;
    for (i = 0; i < nPrimes && primes[i] < len; ++i) {
        if (len % primes[i] == 0) {
            int sublen = len / primes[i];
            /* Is it possible that s consists of repeats of length sublen? */
            for (j = sublen; j < len; j += sublen) {
                if (memcmp(s, s + j, sublen)) {
                    break;
                }
            }

            if (j == len) {
                /* All length-sublen substrings are equal.  We could stop here
                 * (meaning e.g. "abababab" will report a correct, but
                 * non-minimal repeated substring of length 4), but let's
                 * recurse to see if an even shorter repeated substring
                 * can be found. */
                return is_iterative(s, sublen);
            }
        }
    }

    return len;     /* Could not be broken into shorter, repeated substrings */
}

bool isRational(const string &str){
    int len = str.length();
    const auto &factors = getFactors(len); // this would include 1 but exclude len
    // sort(factors.begin(), factors.end()); To get out of the loop faster. Why? See https://stackoverflow.com/a/4698155/1043773
    for(auto iter = factors.rbegin(); iter != factors.rend(); ++iter){
        auto factor = *iter;
        bool result = true;
        for(int i = 0; i < factor && result; ++i){
            for(int j = i + factor; j < len; j += factor, ++cntr){
                if (str[i] != str[j]) { result = false; break; }
            }
        }

        if (result) { return true;}
    }
    return false;
}

#include <bits/stdc++.h>
using namespace std;
int main()
{
    bool check(string, string);
    string str="abcabcabc";
    string str2="abcabcabcabcabc";
    if(str2.size()<str.size()) swap(str,str2);

    for(int i=1;i<=str.size()/2;i++)
    {
        if(check(str.substr(0,i), str))
        {
            if(check(str.substr(0,i), str2))
            {
                cout<<str.substr(0,i);
                return 1;
            }
        }
    }
    cout<<0;
}

bool check(string substring, string str)
{
    int len=substring.size();
    for(int i=0;i<str.size();)
    {
        if(str.substr(i,substring.size())==substring)
        {
            i=i+substring.size();
        }
        else
            return false;
    }
    return true;
}