基于python的KMP算法
我在读clrs的书,偶然发现了用于字符串匹配的“kmp算法”。我使用python实现了它(原样)。然而,出于某种原因,它似乎不起作用。我的错在哪里 代码如下:基于python的KMP算法,python,algorithm,Python,Algorithm,我在读clrs的书,偶然发现了用于字符串匹配的“kmp算法”。我使用python实现了它(原样)。然而,出于某种原因,它似乎不起作用。我的错在哪里 代码如下: def kmp_matcher(t,p): n=len(t) m=len(p) # pi=[0]*n; pi = compute_prefix_function(p) q=-1 for i in range(n): while(q>0 and p[q]!=t[i]):
def kmp_matcher(t,p):
n=len(t)
m=len(p)
# pi=[0]*n;
pi = compute_prefix_function(p)
q=-1
for i in range(n):
while(q>0 and p[q]!=t[i]):
q=pi[q]
if(p[q]==t[i]):
q=q+1
if(q==m):
print "pattern occurs with shift "+str(i-m)
q=pi[q]
def compute_prefix_function(p):
m=len(p)
pi =range(m)
pi[1]=0
k=0
for q in range(2,m):
while(k>0 and p[k]!=p[q]):
k=pi[k]
if(p[k]==p[q]):
k=k+1
pi[q]=k
return pi
t = 'brownfoxlazydog'
p = 'lazy'
kmp_matcher(t,p)
试试这个:
def kmp_matcher(t, d):
n=len(t)
m=len(d)
pi = compute_prefix_function(d)
q = 0
i = 0
while i < n:
if d[q]==t[i]:
q=q+1
i = i + 1
else:
if q != 0:
q = pi[q-1]
else:
i = i + 1
if q == m:
print "pattern occurs with shift "+str(i-q)
q = pi[q-1]
def compute_prefix_function(p):
m=len(p)
pi =range(m)
k=1
l = 0
while k < m:
if p[k] <= p[l]:
l = l + 1
pi[k] = l
k = k + 1
else:
if l != 0:
l = pi[l-1]
else:
pi[k] = 0
k = k + 1
return pi
t = 'brownfoxlazydog'
p = 'lazy'
kmp_matcher(t, p)
def kmp_匹配器(t,d):
n=长度(t)
m=len(d)
pi=计算前缀函数(d)
q=0
i=0
而i 如果p[k]这是我基于CLRs KMP算法编写的一个类,它包含了您想要的内容。请注意,此处仅接受DNA“字符”
class KmpMatcher(object):
def __init__(self, pattern, string, stringName):
self.motif = pattern.upper()
self.seq = string.upper()
self.header = stringName
self.prefix = []
self.validBases = ['A', 'T', 'G', 'C', 'N']
#Matches the motif pattern against itself.
def computePrefix(self):
#Initialize prefix array
self.fillPrefixList()
k = 0
for pos in range(1, len(self.motif)):
#Check valid nt
if(self.motif[pos] not in self.validBases):
self.invalidMotif()
#Unique base in motif
while(k > 0 and self.motif[k] != self.motif[pos]):
k = self.prefix[k]
#repeat in motif
if(self.motif[k] == self.motif[pos]):
k += 1
self.prefix[pos] = k
#Initialize the prefix list and set first element to 0
def fillPrefixList(self):
self.prefix = [None] * len(self.motif)
self.prefix[0] = 0
#An implementation of the Knuth-Morris-Pratt algorithm for linear time string matching
def kmpSearch(self):
#Compute prefix array
self.computePrefix()
#Number of characters matched
match = 0
found = False
for pos in range(0, len(self.seq)):
#Check valid nt
if(self.seq[pos] not in self.validBases):
self.invalidSequence()
#Next character is not a match
while(match > 0 and self.motif[match] != self.seq[pos]):
match = self.prefix[match-1]
#A character match has been found
if(self.motif[match] == self.seq[pos]):
match += 1
#Motif found
if(match == len(self.motif)):
print(self.header)
print("Match found at position: " + str(pos-match+2) + ':' + str(pos+1))
found = True
match = self.prefix[match-1]
if(found == False):
print("Sorry '" + self.motif + "'" + " was not found in " + str(self.header))
#An invalid character in the motif message to the user
def invalidMotif(self):
print("Error: motif contains invalid DNA nucleotides")
exit()
#An invalid character in the sequence message to the user
def invalidSequence(self):
print("Error: " + str(self.header) + "sequence contains invalid DNA nucleotides")
exit()
您可能想试用我的代码:
def recursive_find_match(i, j, pattern, pattern_track):
if pattern[i] == pattern[j]:
pattern_track.append(i+1)
return {"append":pattern_track, "i": i+1, "j": j+1}
elif pattern[i] != pattern[j] and i == 0:
pattern_track.append(i)
return {"append":pattern_track, "i": i, "j": j+1}
else:
i = pattern_track[i-1]
return recursive_find_match(i, j, pattern, pattern_track)
def kmp(str_, pattern):
len_str = len(str_)
len_pattern = len(pattern)
pattern_track = []
if len_pattern == 0:
return
elif len_pattern == 1:
pattern_track = [0]
else:
pattern_track = [0]
i = 0
j = 1
while j < len_pattern:
data = recursive_find_match(i, j, pattern, pattern_track)
i = data["i"]
j = data["j"]
pattern_track = data["append"]
index_str = 0
index_pattern = 0
match_from = -1
while index_str < len_str:
if index_pattern == len_pattern:
break
if str_[index_str] == pattern[index_pattern]:
if index_pattern == 0:
match_from = index_str
index_pattern += 1
index_str += 1
else:
if index_pattern == 0:
index_str += 1
else:
index_pattern = pattern_track[index_pattern-1]
match_from = index_str - index_pattern
def recursive_find_match(i,j,pattern,pattern_track):
如果模式[i]==模式[j]:
pattern_track.append(i+1)
返回{“append”:pattern_track,“i”:i+1,“j”:j+1}
elif模式[i]!=模式[j]和i==0:
pattern_track.append(i)
返回{“append”:pattern_track,“i”:i,“j”:j+1}
其他:
i=模式_轨道[i-1]
返回递归查找匹配(i、j、模式、模式跟踪)
def kmp(str_,模式):
len_str=len(str_)
len_模式=len(模式)
模式_轨道=[]
如果len_pattern==0:
返回
elif len_模式==1:
模式\轨道=[0]
其他:
模式\轨道=[0]
i=0
j=1
而j
您正在尝试阅读p[-1]
。这似乎不是有意的。我也尝试了p[0]。但是运气不好,请修复压痕。你试过了吗?与Rohanil的答案相比,它似乎不起作用,至少对我来说不起作用。我已经提供了我的全部课程,这将有助于你解决这个问题。在最初的文章中有一些问题,主要是移位和索引。这有一个错误,在t='aaabc',p='aab'上失败,谢谢你指出。我已经修好了。现在,对于t='aaabc',p='aab',它输出的模式是移位1
。这对我来说不起作用。pi=范围(m)?为什么?这会导致pi[k]处出现错误l@RajeshMappu您的输入值是什么?与上面的答案相同的文本和模式。