Knuth-Morris-Pratt在纯C中的实现_C_Algorithm_Implementation_Knuth Morris Pratt

Knuth-Morris-Pratt在纯C中的实现

c algorithm

Knuth-Morris-Pratt在纯C中的实现,c,algorithm,implementation,knuth-morris-pratt,C,Algorithm,Implementation,Knuth Morris Pratt,我有下一个KMP实施： #include <stdio.h> #include <stdlib.h> #include <string.h> int kmp(char substr[], char str[]) { int i, j, N, M; N = strlen(str); M = strlen(substr); int *d = (int*)malloc(M * sizeof(int)); d[0] = 0;

我有下一个KMP实施：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int kmp(char substr[], char str[])
{
   int i, j, N, M;

   N = strlen(str);
   M = strlen(substr);

   int *d = (int*)malloc(M * sizeof(int));
   d[0] = 0;

   for(i = 0, j = 0; i < M; i++)
   {
      while(j > 0 && substr[j] != substr[i])
      {
         j = d[j - 1];
      }

      if(substr[j] == substr[i])
      {
         j++;
         d[i] = j;
      }
   }

   for(i = 0, j = 0; i < N; i++)
   {
      while(j > 0 && substr[j] != str[i])
      {
         j = d[j - 1];
      }

      if(substr[j] == str[i])
      {
         j++;
      }

      if(j == M)
      {
         free(d);
         return i - j + 1;
      }
   }

   free(d);

   return -1;
}

int main(void)
{
   char substr[] = "World",
      str[] = "Hello World!";

   int pos = kmp(substr, str);

   printf("position starts at: %i\r\n", pos);

   return 0;
}

#包括
#包括
#包括
int kmp（字符子字符串[]，字符字符串[]）
{
int i，j，N，M；
N=strlen（str）；
M=strlen（substr）；
int*d=（int*）malloc（M*sizeof（int））；
d[0]=0；
对于（i=0，j=0；i0&&substr[j]！=substr[i]）
{
j=d[j-1]；
}
if（substr[j]==substr[i]）
{
j++；
d[i]=j；
}
}
对于（i=0，j=0；i0&&substr[j]！=str[i]）
{
j=d[j-1]；
}
if（substr[j]==str[i]）
{
j++；
}
如果（j==M）
{
免费（d）；
返回i-j+1；
}
}
免费（d）；
返回-1；
}
内部主（空）
{
char substr[]=“世界”，
str[]=“你好，世界！”；
int pos=kmp（substr，str）；
printf（“位置开始于：%i\r\n”，位置）；
返回0；
}

您可以在此处进行测试：

它在小字符串上工作得很好，我用一个大循环对它进行了测试，这样一切都很好

但如果我将要搜索的子字符串和完整字符串更改为：

char substr[] = "%end%",
str[] = "<h1>The result is: <%lua% oleg = { x = 0xa }
         table.insert(oleg, y) oleg.y = 5 print(oleg.y) %end%></h1>";

char substr[]=%end%”，
str[]=“结果是：”；

只有在第一次尝试后，此实现才会失败

请你帮我修复KMP的实现，使算法能够处理这些字符串中的数据…

在一个地方，您偏离了源代码，源代码

while(j>0 && p[j]!=p[i]) j = d[j-1];
    if(p[j]==p[i])
        j++;
        d[i]=j;

当你有

while(j > 0 && substr[j] != substr[i])
{
    j = d[j - 1];
}
if(substr[j] == substr[i])
{
    j++;
    d[i] = j;
}

被来源的缩进所欺骗。在源代码中，

if（）

分支周围没有大括号，因此只有增量

j++由if
控制<代码>d[i]=j是无条件的
然后，源有一个错误，可能是由于索引的不寻常使用。设置阵列的正确方法是
int *d = (int*)malloc(M * sizeof(int));
d[0] = 0;

for(i = 1, j = 0; i < M; i++)
{
    while(j > 0 && substr[j-1] != substr[i-1])
    {
        j = d[j - 1];
    }

    if(substr[j] == substr[i])
        j++;
    d[i] = j;
}

int*d=（int*）malloc（M*sizeof（int））；
d[0]=0；
对于（i=1，j=0；i0&&substr[j-1]！=substr[i-1]）
{
j=d[j-1]；
}
if（substr[j]==substr[i]）
j++；
d[i]=j；
}

但是这很混乱，因为这里的设置使用索引i-1
和j-1
以及i
和j
来确定d[i]
。通常的实施方式是不同的；它的实施方式。因为这是你在大多数资料中都能找到的形式，所以你更容易说服自己这是正确的。
它到底是如何失败的？你试过使用调试器吗？实际上，算法是不正确的，至少d
的某些元素没有初始化，但是简单地将它们初始化为零并不能解决问题，它仍然是不正确的。首先尝试调试d
数组的填充，您应该了解该数组的含义并使其工作。下面是一个简单的输入substr='ababc'，str='xyzababc'字符串查找算法的唯一真正测试是在substr
中包含重复字符，否则一切都很简单。@unkulunkulu此算法在某些示例中确实有效，如果您尝试：substr[]=“World”，str[]=“Hello World！”，它将从链接（在输出时）返回pos:6，同样是在这个长字符串中，模式为%lua%-在substr[]=%end%时失败。。。这个实现是从Wikibooks获得的，在substr“World”中，你没有重复，用substr='ababc'，str='xyzababc'尝试我的输入，它进入无限循环。