C++ 使用Rabin-Karp进行模式搜索

C++ 使用Rabin-Karp进行模式搜索,c++,string,algorithm,stl,rabin-karp,C++,String,Algorithm,Stl,Rabin Karp,我正在使用递推公式研究Rabin-Karp算法。下面是代码。 在代码中,我检查以正常方式和递归公式计算的哈希值。两个值不匹配。我花了足够的时间调试了将近3个小时,不确定是什么问题。请求您帮助查找错误 #include <iostream> #include <fstream> #include <streambuf> #include <cstdint> #include <string> #include <vector>

我正在使用递推公式研究Rabin-Karp算法。下面是代码。 在代码中,我检查以正常方式和递归公式计算的哈希值。两个值不匹配。我花了足够的时间调试了将近3个小时,不确定是什么问题。请求您帮助查找错误

#include <iostream>
#include <fstream>
#include <streambuf>
#include <cstdint>
#include <string>
#include <vector>

const std::uint64_t uiLargePrime    = 1000000007;
const unsigned int  uiXValue        = 263; 
const unsigned int  uiHashTableSize = 79;



struct sCalcHash {

    std::uint64_t operator() (const std::string& strText) {
        // user horners method.
        unsigned int uiStrLength =  strText.length();
        std::uint64_t uiResult = 0;

        // calculate hash value
        for(int uiIdx = (uiStrLength - 1); uiIdx >= 0; uiIdx--) {
            uiResult = (((uiResult * uiXValue) % uiLargePrime) + strText[uiIdx]) % uiLargePrime ;
        }
        // return uiResult % uiHashTableSize;
        return uiResult;
    }
};

// calculate x ^ uiPatternLength % uiLargePrime.
unsigned int expValueOfX(unsigned int uiXVal, unsigned int uiPower) {
    // get X value in range of prime;
    uiXVal = uiXVal % uiLargePrime; 
    unsigned int uiResult = 1;
    while (uiPower > 0 ) {

        // check if power is odd
        if (uiPower & 1) {
            uiResult = ((uiResult % uiLargePrime) * (uiXVal % uiLargePrime) ) % uiLargePrime;
        }

        // now uiPower is even
        uiPower = uiPower >> 1;
        uiXVal = ((uiXVal % uiLargePrime) * (uiXVal % uiLargePrime)) % uiLargePrime;
    }
    return uiResult;
}

// Rabin Karp Algorithm

void RabinKarpAlgo(std::string& Text, std::string& pattern) {

    std::vector<unsigned int> vecPostions;

    //calculate hash value of pattern.
    sCalcHash hash;
    std::uint64_t hashValPattern = hash(pattern);
    std::cout << "Hash Value of pattern: " <<  hashValPattern  << std::endl;

    unsigned int uiPatternLength = pattern.length();
    // calculate x ^ uiPatternLength % uiLargePrime.
    unsigned int uiXExpVal = expValueOfX(uiXValue, uiPatternLength);
    //std::cout << "Exponential value  " <<  uiXExpVal  << std::endl;
    // calculate hash value 
    unsigned int uiStrLength = Text.length();
    // calculate hash value of last part of string of pattern length.
    unsigned int uiLastIdx = uiStrLength - uiPatternLength;
    std::uint64_t hashValLastIdx = hash(Text.substr(uiLastIdx));
    std::cout << "Hash Value of last indx of text: " <<  hashValLastIdx  << std::endl;

    // if hash value is same then compare string
    if (hashValLastIdx == hashValPattern) {
        if(pattern == Text.substr(uiLastIdx)) {
            std::cout << "Pushing index: " << uiLastIdx << std::endl;
            vecPostions.push_back(uiLastIdx);
        }
    }
    for(int uiIdx = uiLastIdx - 1; uiIdx >= 0; uiIdx--) {
        // calculate hash value of string
        std::int64_t iHashValRecur = ( (Text[uiIdx] % uiLargePrime) + 
                                       ((hashValLastIdx % uiLargePrime) * (uiXValue % uiLargePrime)) % uiLargePrime - 
                                       ((Text[uiIdx + uiPatternLength] % uiLargePrime) * (uiXExpVal % uiLargePrime) ) % uiLargePrime
                                     ) % uiLargePrime;
        unsigned int iHashVal = hash(Text.substr(uiIdx, uiPatternLength));

        std::cout << "Hash Value of with recurr " << uiIdx << " is " << iHashValRecur <<  " and with hash func: " << iHashVal << std::endl;



        if(iHashValRecur == hashValPattern) {
            // compare string
            if(pattern == Text.substr(uiIdx, uiPatternLength) ) {
                std::cout << "Pushing index: " << uiIdx << std::endl;
                vecPostions.push_back(uiIdx);
            }
        }
        hashValLastIdx = iHashValRecur;
    }

    // print vectors
    for( int uiIdx = vecPostions.size() - 1; uiIdx >= 0; uiIdx--) {
        std::cout << vecPostions[uiIdx] << " ";
    }
    std::cout << std::endl;

    return ;


}




int main() {

    std::ifstream inputFile("rabinkarp.in");
    std::streambuf *pCinbuf = std::cin.rdbuf();
    std::cin.set_rdbuf(inputFile.rdbuf());

    std::string strText;
    std::string strPattern;

    std::cin >> strPattern;
    std::cin >> strText;


    std::cout << "Text: " << strText << std::endl;
    std::cout << "Pattern: " << strPattern << std::endl;

    RabinKarpAlgo(strText, strPattern); 


    return 0;
}

Text: baaaaaaa
Pattern: aaaaa
Hash Value of pattern: 853306522
Hash Value of last indx of text: 853306522
Pushing index: 3
Hash Value of with recurr 2 is 435650523 and with hash func: 853306522
Hash Value of with recurr 1 is 9779548 and with hash func: 853306522
Hash Value of with recurr 0 is 5713908 and with hash func: 853306523
3
Press any key to continue . . .
#包括
#包括
#包括
#包括
#包括
#包括
常数std::uint64\u t uiLargePrime=100000007;
常量无符号整数uiXValue=263;
常量unsigned int uiHashTableSize=79;
结构尺度{
std::uint64_t运算符()(常量std::string和strText){
//用户霍纳斯方法。
unsigned int uiStrLength=strText.length();
标准::uint64_t uiResult=0;
//计算散列值
对于(int-uiIdx=(uiStrLength-1);uiIdx>=0;uiIdx--){
uiResult=((uiResult*uiXValue)%uiLargePrime)+strText[uiIdx])%uiLargePrime;
}
//返回uiResult%uiHashTableSize;
返回结果;
}
};
//计算x^uiPatternLength%uiLargePrime。
unsigned int expValueOfX(unsigned int uiXVal,unsigned int uiPower){
//得到素数范围内的X值;
uiXVal=uiXVal%uiLargePrime;
无符号整数uiResult=1;
而(uiPower>0){
//检查电源是否为奇数
如果(uiPower&1){
uiResult=((uiResult%uiLargePrime)*(uiXVal%uiLargePrime))%uiLargePrime;
}
//现在,权力是平等的
uiPower=uiPower>>1;
uiXVal=((uiXVal%uiLargePrime)*(uiXVal%uiLargePrime))%uiLargePrime;
}
返回结果;
}
//拉宾-卡普算法
void RabinKarpAlgo(标准::字符串和文本,标准::字符串和模式){
std::矢量位置;
//计算模式的哈希值。
sCalcHash散列;
std::uint64\u t hashValPattern=散列(模式);

问题是我用无符号std64整数计算mod,所以mod变为正,这取决于实现

参考:C++03第5.6段第4条:

二进制/运算符产生商,二进制%运算符产生第一个表达式除以第二个表达式的余数。如果第二个操作数/或%为零,则行为未定义;否则(a/b)*b+a%b等于a。如果两个操作数都是非负的,则余数是非负的;如果不是,则余数的符号由实现定义

因此,为了避免这个问题,我在下面尝试将大素数数据类型更改为int64,它在下面工作并完成

std::int64_t iHashValRecur =  ( (Text[uiIdx] % uiLargePrime) + 
                                     ((uiXValue  % uiLargePrime * hashValLastIdx  % uiLargePrime)  % uiLargePrime) -
                                     ((Text[uiIdx + uiPatternLength]  % uiLargePrime * (uiXExpVal  % uiLargePrime))  % uiLargePrime)
                                     );

        std::int64_t iHashVal = hash(Text.substr(uiIdx, uiPatternLength));
        iHashValRecur = iHashValRecur  % uiLargePrime;
        while (iHashValRecur < 0) { 
            iHashValRecur += uiLargePrime; 
        } 
        iHashValRecur = iHashValRecur % uiLargePrime; 
std::int64\t iHashValRecur=((Text[uiIdx]%uiLargePrime)+
((uiXValue%uiLargePrime*hashValLastIdx%uiLargePrime)%uiLargePrime)-
((文本[uiIdx+uiPatternLength]%uiLargePrime*(uiXExpVal%uiLargePrime))%uiLargePrime)
);
std::int64_t iHashVal=hash(Text.substr(uiIdx,uiPatternLength));
IHashValRecurr=IHashValRecurr%uiLargePrime;
而(ihashvalrecurr<0){
IHashValRecurr+=uiLargePrime;
} 
IHashValRecurr=IHashValRecurr%uiLargePrime;