C++ 查找字符串中具有最大长度的所有有序序列

C++ 查找字符串中具有最大长度的所有有序序列,c++,string,algorithm,c++11,pattern-matching,C++,String,Algorithm,C++11,Pattern Matching,我有以下问题需要解决: 有两个长度任意、内容任意的字符串。我需要找到最大长度的所有有序序列,它出现在两个字符串中 例1: 输入:“a1b2c3”“1a2b3c” 输出:“123”“12c”“1b3”“1bc”“a23”“a2c”“ab3”“abc” 例2: 输入:“cadb”“abcd” 输出:“ab”“ad”“cd” 我用两个循环直接编写它,递归,然后删除重复项和结果,它们是更大结果的一部分(例如,“abc”序列包含“ab”、“ac”和“bc”序列,所以我正在过滤它们) /“match”参数在

我有以下问题需要解决: 有两个长度任意、内容任意的字符串。我需要找到最大长度的所有有序序列,它出现在两个字符串中

例1: 输入:“a1b2c3”“1a2b3c” 输出:“123”“12c”“1b3”“1bc”“a23”“a2c”“ab3”“abc”

例2: 输入:“cadb”“abcd” 输出:“ab”“ad”“cd”

我用两个循环直接编写它,递归,然后删除重复项和结果,它们是更大结果的一部分(例如,“abc”序列包含“ab”、“ac”和“bc”序列,所以我正在过滤它们)

/“match”参数在此用作临时缓冲区
void match\u recursive(集合与匹配、字符串与匹配、常量字符串与数组str1、常量字符串与数组str2、大小数组pos1、大小数组pos2)
{
bool added=false;
对于(大小i=a_pos1;i
此函数解决了问题,但复杂性是不可接受的。例如,“0q0e0t0c0a0d0a0d0i0e0o0p0z0”“0w0r0y0d0s0a0b0w0k0f0.0k0x0”的解决方案在我的机器上需要28秒(调试目标,但无论如何速度非常慢)。我认为应该有一些简单的算法来解决这个问题,但不知怎么的,我在网上找不到任何算法


你们能给我指一下正确的方向吗?

听起来你们想找出两个字符串之间的相似之处?多年前,我在网上的某个地方发现了这段代码,并对其进行了一些修改(很抱歉,我不能再引用源代码了),并且经常使用它。它工作得非常快(不管怎样,对于字符串)。你可能需要改变你的目的。对不起,这是VB

Private Shared piScore As Integer
''' <summary>
''' Compares two not-empty strings regardless of case. 
''' Returns a numeric indication of their similarity 
''' (0 = not at all similar, 100 = identical)
''' </summary>
''' <param name="psStr1">String to compare</param>
''' <param name="psStr2">String to compare</param>
''' <returns>0-100 (0 = not at all similar, 100 = identical)</returns>
''' <remarks></remarks>
Public Shared Function Similar(ByVal psStr1 As String, ByVal psStr2 As String) As Integer
    If psStr1 Is Nothing Or psStr2 Is Nothing Then Return 0

    ' Convert each string to simplest form (letters
    ' and digits only, all upper case)
    psStr1 = ReplaceSpecial(psStr1.ToUpper)
    psStr2 = ReplaceSpecial(psStr2.ToUpper)

    If psStr1.Trim = "" Or psStr2.Trim = "" Then
        ' One or both of the strings is now empty
        Return 0
    End If

    If psStr1 = psStr2 Then
        ' Strings are identical
        Return 100
    End If

    ' Initialize cumulative score (this will be the
    ' total length of all the common substrings)
    piScore = 0

    ' Find all common sub-strings
    FindCommon(psStr1, psStr2)

    ' We now have the cumulative score. Return this
    ' as a percent of the maximum score. The maximum
    ' score is the average length of the two strings.
    Return piScore * 200 / (Len(psStr1) + Len(psStr2))

End Function

''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Sub FindCommon(ByVal psS1 As String, ByVal psS2 As String)
    ' Finds longest common substring (other than single
    ' characters) in psS1 and psS2, then recursively
    ' finds longest common substring in left-hand
    ' portion and right-hand portion. Updates the
    ' cumulative score.

    Dim iLongest As Integer = 0, iStartPos1 As Integer = 0, iStartPos2 As Integer = 0, iJ As Integer = 0
    Dim sHoldStr As String = "", sTestStr As String = "", sLeftStr1 As String = "", sLeftStr2 As String = ""
    Dim sRightStr1 As String = "", sRightStr2 As String = ""

    sHoldStr = psS2
    Do While Len(sHoldStr) > iLongest

        sTestStr = sHoldStr
        Do While Len(sTestStr) > 1
            iJ = InStr(psS1, sTestStr)
            If iJ > 0 Then
                ' Test string is sub-set of the other string

                If Len(sTestStr) > iLongest Then
                    ' Test string is longer than previous
                    ' longest. Store its length and position.
                    iLongest = Len(sTestStr)
                    iStartPos1 = iJ
                    iStartPos2 = InStr(psS2, sTestStr)
                End If

                ' No point in going further with this string
                Exit Do

            Else
                ' Test string is not a sub-set of the other
                ' string. Discard final character of test
                ' string and try again.
                sTestStr = Left(sTestStr, Len(sTestStr) - 1)
            End If

        Loop

        ' Now discard first char of test string and
        ' repeat the process.
        sHoldStr = Right(sHoldStr, Len(sHoldStr) - 1)

    Loop

    ' Update the cumulative score with the length of
    ' the common sub-string.
    piScore = piScore + iLongest

    ' We now have the longest common sub-string, so we
    ' can isolate the sub-strings to the left and right
    ' of it.

    If iStartPos1 > 3 And iStartPos2 > 3 Then
        sLeftStr1 = Left(psS1, iStartPos1 - 1)
        sLeftStr2 = Left(psS2, iStartPos2 - 1)

        If sLeftStr1.Trim <> "" And sLeftStr2.Trim <> "" Then
            ' Get longest common substring from left strings
            FindCommon(sLeftStr1, sLeftStr2)
        End If
    Else
        sLeftStr1 = ""
        sLeftStr2 = ""
    End If
    If iLongest > 0 Then
        sRightStr1 = Mid(psS1, iStartPos1 + iLongest)
        sRightStr2 = Mid(psS2, iStartPos2 + iLongest)

        If sRightStr1.Trim <> "" And sRightStr2.Trim <> "" Then
            ' Get longest common substring from right strings
            FindCommon(sRightStr1, sRightStr2)
        End If
    Else
        sRightStr1 = ""
        sRightStr2 = ""
    End If
End Sub

''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Function ReplaceSpecial(ByVal sString As String) As String
    Dim iPos As Integer
    Dim sReturn As String = ""
    Dim iAsc As Integer
    For iPos = 1 To sString.Length
        iAsc = Asc(Mid(sString, iPos, 1))
        If (iAsc >= 48 And iAsc <= 57) Or (iAsc >= 65 And iAsc <= 90) Then
            sReturn &= Chr(iAsc)
        End If
    Next
    Return sReturn
End Function
作为整数的私有共享piScore
''' 
''比较两个非空字符串,不考虑大小写。
''返回其相似性的数字指示
''(0=完全不相似,100=完全相同)
''' 
要比较的“”字符串
要比较的“”字符串
''0-100(0=完全不相似,100=完全相同)
''' 
公共共享函数类似于整数(ByVal psStr1作为字符串,ByVal psStr2作为字符串)
如果psStr1为Nothing或psStr2为Nothing,则返回0
'将每个字符串转换为最简单的形式(字母
'仅限数字,全部大写)
psStr1=replaceSpective(psStr1.ToUpper)
psStr2=replaceSpective(psStr2.ToUpper)
如果psStr1.Trim=“”或psStr2.Trim=“”,则
'一个或两个字符串现在为空
返回0
如果结束
如果psStr1=psStr2,则
“字符串是相同的
返回100
如果结束
'初始化累积分数(这将是
'所有公共子字符串的总长度)
piScore=0
'查找所有公共子字符串
FindCommon(psStr1、psStr2)
“我们现在有了累积分数。还这个
'作为最高分数的百分比。最大值
'分数是两个字符串的平均长度。
返回piScore*200/(Len(psStr1)+Len(psStr2))
端函数
类似函数使用的“”
私有共享子FindCommon(ByVal psS1作为字符串,ByVal psS2作为字符串)
'查找最长的公用子字符串(而不是单个
'字符),然后递归地
'在左侧查找最长的公共子字符串
'部分和右侧部分。更新
“累积分数。
Dim iLongest为整数=0,iStartPos1为整数=0,iStartPos2为整数=0,iJ为整数=0
Dim sHoldStr As String=“”、sTestStr As String=“”、sLeftStr1 As String=“”、sLeftStr2 As String=“”
Dim sRightStr1为字符串=”,sRightStr2为字符串=”“
sHoldStr=psS2
当Len(sHoldStr)>我在看的时候做
sTestStr=sHoldStr
当Len(sTestStr)>1时执行
iJ=仪表(psS1、sTestStr)
如果iJ>0,则
'测试字符串是另一个字符串的子集
如果Len(sTestStr)>iLongest那么
'测试字符串比上一个字符串长
“最长的。存储其长度和位置。
iLongest=Len(sTestStr)
iStartPos1=iJ
iStartPos2=仪表(psS2、sTestStr)
如果结束
'进一步使用此字符串没有意义
退出Do
其他的
'测试字符串不是另一个的子集
“绳子。丢弃测试的最终字符
'字符串,然后重试。
sTestStr=左(sTestStr,Len(sTestStr)-1)
如果结束
环
'现在放弃测试字符串的第一个字符并
重复这个过程。
sHoldStr=Right(sHoldStr,Len(sHoldStr)-1)
环
'将累积分数更新为
'公共子字符串。
piScore=piScore+iLongest
'我们现在拥有最长的公共子字符串,因此
'可以将子字符串隔离到左侧和右侧
”“是的。
如果iStartPos1>3且iStartPos2>3,则
SLEFSTR1=左侧(psS1,iStartPos1-1)
SLEFSTR2=左侧(psS2,iStartPos2-1)
如果sLeftStr1.Trim“”和sLeftStr2.Trim“”,则
'从左字符串中获取最长的公共子字符串
FindCommon(sLeftStr1、sLeftStr2)
如果结束
其他的
sLeftStr1=“”
sLeftStr2=“”
如果结束
如果iLongest>0,则
sRightStr1=Mid(psS1、iStartPos1+iLongest)
sRightStr2=Mid(psS2,iStartPos2+iLongest)
如果sRightStr1.Trim“”和sRightStr2.Trim“”,则
'从右字符串获取最长的公共子字符串
FindCommon(右侧)
Private Shared piScore As Integer
''' <summary>
''' Compares two not-empty strings regardless of case. 
''' Returns a numeric indication of their similarity 
''' (0 = not at all similar, 100 = identical)
''' </summary>
''' <param name="psStr1">String to compare</param>
''' <param name="psStr2">String to compare</param>
''' <returns>0-100 (0 = not at all similar, 100 = identical)</returns>
''' <remarks></remarks>
Public Shared Function Similar(ByVal psStr1 As String, ByVal psStr2 As String) As Integer
    If psStr1 Is Nothing Or psStr2 Is Nothing Then Return 0

    ' Convert each string to simplest form (letters
    ' and digits only, all upper case)
    psStr1 = ReplaceSpecial(psStr1.ToUpper)
    psStr2 = ReplaceSpecial(psStr2.ToUpper)

    If psStr1.Trim = "" Or psStr2.Trim = "" Then
        ' One or both of the strings is now empty
        Return 0
    End If

    If psStr1 = psStr2 Then
        ' Strings are identical
        Return 100
    End If

    ' Initialize cumulative score (this will be the
    ' total length of all the common substrings)
    piScore = 0

    ' Find all common sub-strings
    FindCommon(psStr1, psStr2)

    ' We now have the cumulative score. Return this
    ' as a percent of the maximum score. The maximum
    ' score is the average length of the two strings.
    Return piScore * 200 / (Len(psStr1) + Len(psStr2))

End Function

''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Sub FindCommon(ByVal psS1 As String, ByVal psS2 As String)
    ' Finds longest common substring (other than single
    ' characters) in psS1 and psS2, then recursively
    ' finds longest common substring in left-hand
    ' portion and right-hand portion. Updates the
    ' cumulative score.

    Dim iLongest As Integer = 0, iStartPos1 As Integer = 0, iStartPos2 As Integer = 0, iJ As Integer = 0
    Dim sHoldStr As String = "", sTestStr As String = "", sLeftStr1 As String = "", sLeftStr2 As String = ""
    Dim sRightStr1 As String = "", sRightStr2 As String = ""

    sHoldStr = psS2
    Do While Len(sHoldStr) > iLongest

        sTestStr = sHoldStr
        Do While Len(sTestStr) > 1
            iJ = InStr(psS1, sTestStr)
            If iJ > 0 Then
                ' Test string is sub-set of the other string

                If Len(sTestStr) > iLongest Then
                    ' Test string is longer than previous
                    ' longest. Store its length and position.
                    iLongest = Len(sTestStr)
                    iStartPos1 = iJ
                    iStartPos2 = InStr(psS2, sTestStr)
                End If

                ' No point in going further with this string
                Exit Do

            Else
                ' Test string is not a sub-set of the other
                ' string. Discard final character of test
                ' string and try again.
                sTestStr = Left(sTestStr, Len(sTestStr) - 1)
            End If

        Loop

        ' Now discard first char of test string and
        ' repeat the process.
        sHoldStr = Right(sHoldStr, Len(sHoldStr) - 1)

    Loop

    ' Update the cumulative score with the length of
    ' the common sub-string.
    piScore = piScore + iLongest

    ' We now have the longest common sub-string, so we
    ' can isolate the sub-strings to the left and right
    ' of it.

    If iStartPos1 > 3 And iStartPos2 > 3 Then
        sLeftStr1 = Left(psS1, iStartPos1 - 1)
        sLeftStr2 = Left(psS2, iStartPos2 - 1)

        If sLeftStr1.Trim <> "" And sLeftStr2.Trim <> "" Then
            ' Get longest common substring from left strings
            FindCommon(sLeftStr1, sLeftStr2)
        End If
    Else
        sLeftStr1 = ""
        sLeftStr2 = ""
    End If
    If iLongest > 0 Then
        sRightStr1 = Mid(psS1, iStartPos1 + iLongest)
        sRightStr2 = Mid(psS2, iStartPos2 + iLongest)

        If sRightStr1.Trim <> "" And sRightStr2.Trim <> "" Then
            ' Get longest common substring from right strings
            FindCommon(sRightStr1, sRightStr2)
        End If
    Else
        sRightStr1 = ""
        sRightStr2 = ""
    End If
End Sub

''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Function ReplaceSpecial(ByVal sString As String) As String
    Dim iPos As Integer
    Dim sReturn As String = ""
    Dim iAsc As Integer
    For iPos = 1 To sString.Length
        iAsc = Asc(Mid(sString, iPos, 1))
        If (iAsc >= 48 And iAsc <= 57) Or (iAsc >= 65 And iAsc <= 90) Then
            sReturn &= Chr(iAsc)
        End If
    Next
    Return sReturn
End Function
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <set>

using namespace std;

#define MAX_LENGTH 100

int lcs(const char* a, const char* b)
{
    int row = strlen(a)+ 1;
    int column = strlen(b) + 1;

    //Memoization lower the function's time cost in exchange for space cost.
    int **matrix = (int**)malloc(sizeof(int*) * row);
    int i, j;
    for(i = 0; i < row; ++i)
        matrix[i] = (int*)calloc(sizeof(int), column);
    typedef set<string> lcs_set;

    lcs_set s_matrix[MAX_LENGTH][MAX_LENGTH];

    //initiate
    for(i = 0; i < MAX_LENGTH ; ++i)
        s_matrix[0][i].insert("");
    for(i = 0; i < MAX_LENGTH ; ++i)
        s_matrix[i][0].insert("");

    //Bottom up calculation
    for(i = 1; i < row; ++i)
    {
        for(j = 1; j < column; ++j)
        {
            if(a[i - 1] == b[j - 1])
            {
                matrix[i][j] = matrix[i -1][j - 1] + 1;
                // if your compiler support c++ 11, you can simplify this code.
                for(lcs_set::iterator it = s_matrix[i - 1][j - 1].begin(); it != s_matrix[i - 1][j - 1].end(); ++it)
                    s_matrix[i][j].insert(*it + a[i - 1]);
            }
            else
            {
                if(matrix[i][j - 1] > matrix[i - 1][j])
                {
                    matrix[i][j] = matrix[i][j - 1];
                    for(lcs_set::iterator it = s_matrix[i][j - 1].begin(); it != s_matrix[i][j - 1].end(); ++it)
                        s_matrix[i][j].insert(*it);
                }
                else if(matrix[i][j - 1] == matrix[i - 1][j])
                {
                    matrix[i][j] = matrix[i][j - 1];
                    for(lcs_set::iterator it = s_matrix[i][j - 1].begin(); it != s_matrix[i][j - 1].end(); ++it)
                        s_matrix[i][j].insert(*it);
                    for(lcs_set::iterator it = s_matrix[i - 1][j].begin(); it != s_matrix[i - 1][j].end(); ++it)
                        s_matrix[i][j].insert(*it);
                }
                else
                {
                    matrix[i][j] = matrix[i - 1][j];
                    for(lcs_set::iterator it = s_matrix[i - 1][j].begin(); it != s_matrix[i - 1][j].end(); ++it)
                        s_matrix[i][j].insert(*it);
                }

            }
        }
    }
    int lcs_length = matrix[row - 1][column -1];
    // all ordered sequences with maximum length are here.
    lcs_set result_set;

    int m, n;
    for(m = 1; m < row; ++m)
    {
        for(n = 1; n < column; ++n)
        {
            if(matrix[m][n] == lcs_length)
            {
                for(lcs_set::iterator it = s_matrix[m][n].begin(); it != s_matrix[m][n].end(); ++it)
                    result_set.insert(*it);
            }
        }
    }

    //comment it
    for(lcs_set::iterator it = result_set.begin(); it != result_set.end(); ++it)
        printf("%s\t", it->c_str());
    printf("\n");

    for(i = 0; i < row; ++i)
        free(matrix[i]);
    free(matrix);

    return lcs_length;
}

int main()
{
    char buf1[MAX_LENGTH], buf2[MAX_LENGTH];
    while(scanf("%s %s", buf1, buf2) != EOF)
    {
        printf("length is: %d\n", lcs(buf1, buf2) );
    }
    return 0;
}