C++ 查找字符串中具有最大长度的所有有序序列
我有以下问题需要解决: 有两个长度任意、内容任意的字符串。我需要找到最大长度的所有有序序列,它出现在两个字符串中 例1: 输入:“a1b2c3”“1a2b3c” 输出:“123”“12c”“1b3”“1bc”“a23”“a2c”“ab3”“abc” 例2: 输入:“cadb”“abcd” 输出:“ab”“ad”“cd” 我用两个循环直接编写它,递归,然后删除重复项和结果,它们是更大结果的一部分(例如,“abc”序列包含“ab”、“ac”和“bc”序列,所以我正在过滤它们)C++ 查找字符串中具有最大长度的所有有序序列,c++,string,algorithm,c++11,pattern-matching,C++,String,Algorithm,C++11,Pattern Matching,我有以下问题需要解决: 有两个长度任意、内容任意的字符串。我需要找到最大长度的所有有序序列,它出现在两个字符串中 例1: 输入:“a1b2c3”“1a2b3c” 输出:“123”“12c”“1b3”“1bc”“a23”“a2c”“ab3”“abc” 例2: 输入:“cadb”“abcd” 输出:“ab”“ad”“cd” 我用两个循环直接编写它,递归,然后删除重复项和结果,它们是更大结果的一部分(例如,“abc”序列包含“ab”、“ac”和“bc”序列,所以我正在过滤它们) /“match”参数在
/“match”参数在此用作临时缓冲区
void match\u recursive(集合与匹配、字符串与匹配、常量字符串与数组str1、常量字符串与数组str2、大小数组pos1、大小数组pos2)
{
bool added=false;
对于(大小i=a_pos1;i
此函数解决了问题,但复杂性是不可接受的。例如,“0q0e0t0c0a0d0a0d0i0e0o0p0z0”“0w0r0y0d0s0a0b0w0k0f0.0k0x0”的解决方案在我的机器上需要28秒(调试目标,但无论如何速度非常慢)。我认为应该有一些简单的算法来解决这个问题,但不知怎么的,我在网上找不到任何算法
你们能给我指一下正确的方向吗?听起来你们想找出两个字符串之间的相似之处?多年前,我在网上的某个地方发现了这段代码,并对其进行了一些修改(很抱歉,我不能再引用源代码了),并且经常使用它。它工作得非常快(不管怎样,对于字符串)。你可能需要改变你的目的。对不起,这是VB
Private Shared piScore As Integer
''' <summary>
''' Compares two not-empty strings regardless of case.
''' Returns a numeric indication of their similarity
''' (0 = not at all similar, 100 = identical)
''' </summary>
''' <param name="psStr1">String to compare</param>
''' <param name="psStr2">String to compare</param>
''' <returns>0-100 (0 = not at all similar, 100 = identical)</returns>
''' <remarks></remarks>
Public Shared Function Similar(ByVal psStr1 As String, ByVal psStr2 As String) As Integer
If psStr1 Is Nothing Or psStr2 Is Nothing Then Return 0
' Convert each string to simplest form (letters
' and digits only, all upper case)
psStr1 = ReplaceSpecial(psStr1.ToUpper)
psStr2 = ReplaceSpecial(psStr2.ToUpper)
If psStr1.Trim = "" Or psStr2.Trim = "" Then
' One or both of the strings is now empty
Return 0
End If
If psStr1 = psStr2 Then
' Strings are identical
Return 100
End If
' Initialize cumulative score (this will be the
' total length of all the common substrings)
piScore = 0
' Find all common sub-strings
FindCommon(psStr1, psStr2)
' We now have the cumulative score. Return this
' as a percent of the maximum score. The maximum
' score is the average length of the two strings.
Return piScore * 200 / (Len(psStr1) + Len(psStr2))
End Function
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Sub FindCommon(ByVal psS1 As String, ByVal psS2 As String)
' Finds longest common substring (other than single
' characters) in psS1 and psS2, then recursively
' finds longest common substring in left-hand
' portion and right-hand portion. Updates the
' cumulative score.
Dim iLongest As Integer = 0, iStartPos1 As Integer = 0, iStartPos2 As Integer = 0, iJ As Integer = 0
Dim sHoldStr As String = "", sTestStr As String = "", sLeftStr1 As String = "", sLeftStr2 As String = ""
Dim sRightStr1 As String = "", sRightStr2 As String = ""
sHoldStr = psS2
Do While Len(sHoldStr) > iLongest
sTestStr = sHoldStr
Do While Len(sTestStr) > 1
iJ = InStr(psS1, sTestStr)
If iJ > 0 Then
' Test string is sub-set of the other string
If Len(sTestStr) > iLongest Then
' Test string is longer than previous
' longest. Store its length and position.
iLongest = Len(sTestStr)
iStartPos1 = iJ
iStartPos2 = InStr(psS2, sTestStr)
End If
' No point in going further with this string
Exit Do
Else
' Test string is not a sub-set of the other
' string. Discard final character of test
' string and try again.
sTestStr = Left(sTestStr, Len(sTestStr) - 1)
End If
Loop
' Now discard first char of test string and
' repeat the process.
sHoldStr = Right(sHoldStr, Len(sHoldStr) - 1)
Loop
' Update the cumulative score with the length of
' the common sub-string.
piScore = piScore + iLongest
' We now have the longest common sub-string, so we
' can isolate the sub-strings to the left and right
' of it.
If iStartPos1 > 3 And iStartPos2 > 3 Then
sLeftStr1 = Left(psS1, iStartPos1 - 1)
sLeftStr2 = Left(psS2, iStartPos2 - 1)
If sLeftStr1.Trim <> "" And sLeftStr2.Trim <> "" Then
' Get longest common substring from left strings
FindCommon(sLeftStr1, sLeftStr2)
End If
Else
sLeftStr1 = ""
sLeftStr2 = ""
End If
If iLongest > 0 Then
sRightStr1 = Mid(psS1, iStartPos1 + iLongest)
sRightStr2 = Mid(psS2, iStartPos2 + iLongest)
If sRightStr1.Trim <> "" And sRightStr2.Trim <> "" Then
' Get longest common substring from right strings
FindCommon(sRightStr1, sRightStr2)
End If
Else
sRightStr1 = ""
sRightStr2 = ""
End If
End Sub
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Function ReplaceSpecial(ByVal sString As String) As String
Dim iPos As Integer
Dim sReturn As String = ""
Dim iAsc As Integer
For iPos = 1 To sString.Length
iAsc = Asc(Mid(sString, iPos, 1))
If (iAsc >= 48 And iAsc <= 57) Or (iAsc >= 65 And iAsc <= 90) Then
sReturn &= Chr(iAsc)
End If
Next
Return sReturn
End Function
作为整数的私有共享piScore
'''
''比较两个非空字符串,不考虑大小写。
''返回其相似性的数字指示
''(0=完全不相似,100=完全相同)
'''
要比较的“”字符串
要比较的“”字符串
''0-100(0=完全不相似,100=完全相同)
'''
公共共享函数类似于整数(ByVal psStr1作为字符串,ByVal psStr2作为字符串)
如果psStr1为Nothing或psStr2为Nothing,则返回0
'将每个字符串转换为最简单的形式(字母
'仅限数字,全部大写)
psStr1=replaceSpective(psStr1.ToUpper)
psStr2=replaceSpective(psStr2.ToUpper)
如果psStr1.Trim=“”或psStr2.Trim=“”,则
'一个或两个字符串现在为空
返回0
如果结束
如果psStr1=psStr2,则
“字符串是相同的
返回100
如果结束
'初始化累积分数(这将是
'所有公共子字符串的总长度)
piScore=0
'查找所有公共子字符串
FindCommon(psStr1、psStr2)
“我们现在有了累积分数。还这个
'作为最高分数的百分比。最大值
'分数是两个字符串的平均长度。
返回piScore*200/(Len(psStr1)+Len(psStr2))
端函数
类似函数使用的“”
私有共享子FindCommon(ByVal psS1作为字符串,ByVal psS2作为字符串)
'查找最长的公用子字符串(而不是单个
'字符),然后递归地
'在左侧查找最长的公共子字符串
'部分和右侧部分。更新
“累积分数。
Dim iLongest为整数=0,iStartPos1为整数=0,iStartPos2为整数=0,iJ为整数=0
Dim sHoldStr As String=“”、sTestStr As String=“”、sLeftStr1 As String=“”、sLeftStr2 As String=“”
Dim sRightStr1为字符串=”,sRightStr2为字符串=”“
sHoldStr=psS2
当Len(sHoldStr)>我在看的时候做
sTestStr=sHoldStr
当Len(sTestStr)>1时执行
iJ=仪表(psS1、sTestStr)
如果iJ>0,则
'测试字符串是另一个字符串的子集
如果Len(sTestStr)>iLongest那么
'测试字符串比上一个字符串长
“最长的。存储其长度和位置。
iLongest=Len(sTestStr)
iStartPos1=iJ
iStartPos2=仪表(psS2、sTestStr)
如果结束
'进一步使用此字符串没有意义
退出Do
其他的
'测试字符串不是另一个的子集
“绳子。丢弃测试的最终字符
'字符串,然后重试。
sTestStr=左(sTestStr,Len(sTestStr)-1)
如果结束
环
'现在放弃测试字符串的第一个字符并
重复这个过程。
sHoldStr=Right(sHoldStr,Len(sHoldStr)-1)
环
'将累积分数更新为
'公共子字符串。
piScore=piScore+iLongest
'我们现在拥有最长的公共子字符串,因此
'可以将子字符串隔离到左侧和右侧
”“是的。
如果iStartPos1>3且iStartPos2>3,则
SLEFSTR1=左侧(psS1,iStartPos1-1)
SLEFSTR2=左侧(psS2,iStartPos2-1)
如果sLeftStr1.Trim“”和sLeftStr2.Trim“”,则
'从左字符串中获取最长的公共子字符串
FindCommon(sLeftStr1、sLeftStr2)
如果结束
其他的
sLeftStr1=“”
sLeftStr2=“”
如果结束
如果iLongest>0,则
sRightStr1=Mid(psS1、iStartPos1+iLongest)
sRightStr2=Mid(psS2,iStartPos2+iLongest)
如果sRightStr1.Trim“”和sRightStr2.Trim“”,则
'从右字符串获取最长的公共子字符串
FindCommon(右侧)
Private Shared piScore As Integer
''' <summary>
''' Compares two not-empty strings regardless of case.
''' Returns a numeric indication of their similarity
''' (0 = not at all similar, 100 = identical)
''' </summary>
''' <param name="psStr1">String to compare</param>
''' <param name="psStr2">String to compare</param>
''' <returns>0-100 (0 = not at all similar, 100 = identical)</returns>
''' <remarks></remarks>
Public Shared Function Similar(ByVal psStr1 As String, ByVal psStr2 As String) As Integer
If psStr1 Is Nothing Or psStr2 Is Nothing Then Return 0
' Convert each string to simplest form (letters
' and digits only, all upper case)
psStr1 = ReplaceSpecial(psStr1.ToUpper)
psStr2 = ReplaceSpecial(psStr2.ToUpper)
If psStr1.Trim = "" Or psStr2.Trim = "" Then
' One or both of the strings is now empty
Return 0
End If
If psStr1 = psStr2 Then
' Strings are identical
Return 100
End If
' Initialize cumulative score (this will be the
' total length of all the common substrings)
piScore = 0
' Find all common sub-strings
FindCommon(psStr1, psStr2)
' We now have the cumulative score. Return this
' as a percent of the maximum score. The maximum
' score is the average length of the two strings.
Return piScore * 200 / (Len(psStr1) + Len(psStr2))
End Function
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Sub FindCommon(ByVal psS1 As String, ByVal psS2 As String)
' Finds longest common substring (other than single
' characters) in psS1 and psS2, then recursively
' finds longest common substring in left-hand
' portion and right-hand portion. Updates the
' cumulative score.
Dim iLongest As Integer = 0, iStartPos1 As Integer = 0, iStartPos2 As Integer = 0, iJ As Integer = 0
Dim sHoldStr As String = "", sTestStr As String = "", sLeftStr1 As String = "", sLeftStr2 As String = ""
Dim sRightStr1 As String = "", sRightStr2 As String = ""
sHoldStr = psS2
Do While Len(sHoldStr) > iLongest
sTestStr = sHoldStr
Do While Len(sTestStr) > 1
iJ = InStr(psS1, sTestStr)
If iJ > 0 Then
' Test string is sub-set of the other string
If Len(sTestStr) > iLongest Then
' Test string is longer than previous
' longest. Store its length and position.
iLongest = Len(sTestStr)
iStartPos1 = iJ
iStartPos2 = InStr(psS2, sTestStr)
End If
' No point in going further with this string
Exit Do
Else
' Test string is not a sub-set of the other
' string. Discard final character of test
' string and try again.
sTestStr = Left(sTestStr, Len(sTestStr) - 1)
End If
Loop
' Now discard first char of test string and
' repeat the process.
sHoldStr = Right(sHoldStr, Len(sHoldStr) - 1)
Loop
' Update the cumulative score with the length of
' the common sub-string.
piScore = piScore + iLongest
' We now have the longest common sub-string, so we
' can isolate the sub-strings to the left and right
' of it.
If iStartPos1 > 3 And iStartPos2 > 3 Then
sLeftStr1 = Left(psS1, iStartPos1 - 1)
sLeftStr2 = Left(psS2, iStartPos2 - 1)
If sLeftStr1.Trim <> "" And sLeftStr2.Trim <> "" Then
' Get longest common substring from left strings
FindCommon(sLeftStr1, sLeftStr2)
End If
Else
sLeftStr1 = ""
sLeftStr2 = ""
End If
If iLongest > 0 Then
sRightStr1 = Mid(psS1, iStartPos1 + iLongest)
sRightStr2 = Mid(psS2, iStartPos2 + iLongest)
If sRightStr1.Trim <> "" And sRightStr2.Trim <> "" Then
' Get longest common substring from right strings
FindCommon(sRightStr1, sRightStr2)
End If
Else
sRightStr1 = ""
sRightStr2 = ""
End If
End Sub
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Function ReplaceSpecial(ByVal sString As String) As String
Dim iPos As Integer
Dim sReturn As String = ""
Dim iAsc As Integer
For iPos = 1 To sString.Length
iAsc = Asc(Mid(sString, iPos, 1))
If (iAsc >= 48 And iAsc <= 57) Or (iAsc >= 65 And iAsc <= 90) Then
sReturn &= Chr(iAsc)
End If
Next
Return sReturn
End Function
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <set>
using namespace std;
#define MAX_LENGTH 100
int lcs(const char* a, const char* b)
{
int row = strlen(a)+ 1;
int column = strlen(b) + 1;
//Memoization lower the function's time cost in exchange for space cost.
int **matrix = (int**)malloc(sizeof(int*) * row);
int i, j;
for(i = 0; i < row; ++i)
matrix[i] = (int*)calloc(sizeof(int), column);
typedef set<string> lcs_set;
lcs_set s_matrix[MAX_LENGTH][MAX_LENGTH];
//initiate
for(i = 0; i < MAX_LENGTH ; ++i)
s_matrix[0][i].insert("");
for(i = 0; i < MAX_LENGTH ; ++i)
s_matrix[i][0].insert("");
//Bottom up calculation
for(i = 1; i < row; ++i)
{
for(j = 1; j < column; ++j)
{
if(a[i - 1] == b[j - 1])
{
matrix[i][j] = matrix[i -1][j - 1] + 1;
// if your compiler support c++ 11, you can simplify this code.
for(lcs_set::iterator it = s_matrix[i - 1][j - 1].begin(); it != s_matrix[i - 1][j - 1].end(); ++it)
s_matrix[i][j].insert(*it + a[i - 1]);
}
else
{
if(matrix[i][j - 1] > matrix[i - 1][j])
{
matrix[i][j] = matrix[i][j - 1];
for(lcs_set::iterator it = s_matrix[i][j - 1].begin(); it != s_matrix[i][j - 1].end(); ++it)
s_matrix[i][j].insert(*it);
}
else if(matrix[i][j - 1] == matrix[i - 1][j])
{
matrix[i][j] = matrix[i][j - 1];
for(lcs_set::iterator it = s_matrix[i][j - 1].begin(); it != s_matrix[i][j - 1].end(); ++it)
s_matrix[i][j].insert(*it);
for(lcs_set::iterator it = s_matrix[i - 1][j].begin(); it != s_matrix[i - 1][j].end(); ++it)
s_matrix[i][j].insert(*it);
}
else
{
matrix[i][j] = matrix[i - 1][j];
for(lcs_set::iterator it = s_matrix[i - 1][j].begin(); it != s_matrix[i - 1][j].end(); ++it)
s_matrix[i][j].insert(*it);
}
}
}
}
int lcs_length = matrix[row - 1][column -1];
// all ordered sequences with maximum length are here.
lcs_set result_set;
int m, n;
for(m = 1; m < row; ++m)
{
for(n = 1; n < column; ++n)
{
if(matrix[m][n] == lcs_length)
{
for(lcs_set::iterator it = s_matrix[m][n].begin(); it != s_matrix[m][n].end(); ++it)
result_set.insert(*it);
}
}
}
//comment it
for(lcs_set::iterator it = result_set.begin(); it != result_set.end(); ++it)
printf("%s\t", it->c_str());
printf("\n");
for(i = 0; i < row; ++i)
free(matrix[i]);
free(matrix);
return lcs_length;
}
int main()
{
char buf1[MAX_LENGTH], buf2[MAX_LENGTH];
while(scanf("%s %s", buf1, buf2) != EOF)
{
printf("length is: %d\n", lcs(buf1, buf2) );
}
return 0;
}