C++11 C+中文本分类的朴素贝叶斯实现+;

C++11 C+中文本分类的朴素贝叶斯实现+;,c++11,naivebayes,C++11,Naivebayes,我正在编写一个代码,用于实现用于文本分类的朴素贝叶斯分类器。我曾经做过一个很小的例子,它似乎在起作用 但我想知道实现是否正确,它是否适用于其他培训和测试集?我不是试图实现一个商业级朴素贝叶斯,只是一个小任务,学习一些C++。李> 我想知道代码是怎么写的?就像我编写代码的方式一样,它是一个很好的C++实践吗? 我知道有很多改进可以做,比如现在我只测试一个测试文件,所以一种测试多个文件的方法是我想在将来做的事情,而且目前我只做2类分类,将来可能是多类分类。但在代码方面还有其他改进吗 以下是代码,NB

我正在编写一个代码,用于实现用于文本分类的朴素贝叶斯分类器。我曾经做过一个很小的例子,它似乎在起作用

  • 但我想知道实现是否正确,它是否适用于其他培训和测试集?我不是试图实现一个商业级朴素贝叶斯,只是一个小任务,学习一些C++。李>
  • 我想知道代码是怎么写的?就像我编写代码的方式一样,它是一个很好的C++实践吗?
  • 我知道有很多改进可以做,比如现在我只测试一个测试文件,所以一种测试多个文件的方法是我想在将来做的事情,而且目前我只做2类分类,将来可能是多类分类。但在代码方面还有其他改进吗
  • 以下是代码,NB头文件:

    #pragma once
    
    #include<iostream>
    #include<fstream>
    #include<string>
    #include<vector>
    #include<map>
    using namespace std;
    
    class NB
    {
    public:
        NB(NB& cl1, NB& cl2, string className);
        NB(string className);
        NB(string className, int classType);
        vector <string> combineClassText();
        void bagOfWords(string classCombine, bool isTotal = false);
        void calcProb(NB& total);
        float totalProb(NB& prob, NB& total);
        int classType;
    
    private:
        int _len = 0;
        float _prob = 1.0f;
        int _voc = 0;
        int _nOfClass = 0;
        int _tnClass = 0;
        int _totalWordsinC = 0;
        int _wordCounter = 0;
        bool _isDone = false;
        ifstream _in;
        ofstream _out;
        //string _classCombine;
        string _className;
        string _fileName;
        vector <string> _combined;
        map<string, string> _category;
        map<string, int> _bow;
        map<string, float> _probCalc;
    };
    
    #include "NB.h"
    #include<cmath>
    
    NB::NB(NB& cl1, NB& cl2, string className)
    {
        _className = className;
        _out.open("combineAll.txt");
        if (_out.fail()) {
            perror("cannot write to combineAll.txt");
        }
        _len = cl1.combineClassText().size();
        for (int i = 0; i < _len; i++) {
            _combined.push_back(cl1.combineClassText()[i]);
        }
    
        _len = cl2.combineClassText().size();
        for (int i = 0; i < _len; i++) {
            _combined.push_back(cl2.combineClassText()[i]);
        }
    
        _len = _combined.size();
        for (int i = 0; i < _len; i++) {
            _out << _combined[i] << endl;
            //cout << i + 1 << ". " << _combined[i] << endl;
        }
        _out.close();
        _tnClass = cl1._tnClass + cl2._tnClass;
        bagOfWords("combineAll.txt", true);
    }
    
    NB::NB(string className, int classType) {
        NB::classType = classType;
        _className = className;
        cout << "Enter a filename for " + _className << endl;
        cin >> _fileName;
        _category[_fileName] = _className;
        combineClassText();
        bagOfWords(_className + ".txt");
    }
    
    NB::NB(string className)
    {
        _className = className;
        while (_isDone == false) {
            cout << "Enter a filename for " + _className << endl;
            cin >> _fileName;
            if (_fileName != "q") {
                _category[_fileName] = _className;
                _nOfClass++;
                _tnClass++;
            } else {
                _isDone = true;
            }
        }
        combineClassText();
        bagOfWords(_className + ".txt");
    }
    
    vector<string> NB::combineClassText() {
    
        string temp;
        string classCombine = _className + ".txt";
        vector <string> tmp;
        map<string, string>::iterator it;
    
        _out.open(classCombine);
        if (_out.fail()) {
            perror("cannot write to");
        }
        for (it = _category.begin(); it != _category.end(); it++) {
            _in.open(it->first);
            if (_in.fail()) {
                perror("cannot read from");
            }
            while (_in >> temp) {
                _out << temp << endl;
                tmp.push_back(temp);            
            }
            _in.close();
        }
        _out.close();
        return tmp;
    }
    
    void NB::bagOfWords(string classCombine, bool isTotal) {
    
        map<string, int>::iterator it;
        string temp;
        vector<string> tp;
        string name = _className + "_bow.txt";
        int len;
    
        _in.open(classCombine);
        if (_in.fail()) {
            perror("cannot read from");
        }
    
        _out.open(name);
        if (_out.fail()) {
            perror("cannot write to");
        }
    
        while (_in >> temp) {
            tp.push_back(temp);
        }
    
        for (int i = 0; i < tp.size(); i++) {
            for (int j = 0; j < tp[i].size(); j++) {
                if (tp[i][j] == '.' || tp[i][j] == ',') {
                    tp[i][j] = ' ';
                }
            }
        }
    
        len = tp.size();
        vector<int> count(len, 1);
    
        for (int i = 0; i < len; i++) {
            for (int j = 0; j < (len - i - 1); j++) {
                if (tp[i] == tp[j + i + 1]) {
                    count[i]++;
                }
            }
        }
    
        for (int i = len - 1; i >= 0; i--) {
            _bow[tp[i]] = count[i];
        }
    
        for (it = _bow.begin(); it != _bow.end(); it++) {
            _out << it->first << ": " << it->second << endl;
            //cout << it->first << ": " << it->second << endl;
        }
        //cout << endl;
    
        if (isTotal == true) {
            for (it = _bow.begin(); it != _bow.end(); it++) {
                _voc += 1;
                //cout << _voc << endl;
            }
        } else {
            for (it = _bow.begin(); it != _bow.end(); it++) {
                _totalWordsinC += it->second;
            }
            //cout << _totalWordsinC << endl;
        }
        _in.close();
        _out.close();
    }
    
    void NB::calcProb(NB& total) {
    
        map<string, int> ::iterator it;
        map<string, int> ::iterator it2;
        map<string, float> ::iterator it3;
    
            _out.open(_className + "_prob.txt");
            if (_out.fail()) {
                perror("cannot write to");
            }
            for (it = total._bow.begin(); it != total._bow.end(); it++) {
                for (it2 = _bow.begin(); it2 != _bow.end(); it2++) {
                    if (it->first == it2->first) {
                        _probCalc[it->first] = (float)((it2->second) + 1) / (_totalWordsinC + total._voc);
                        break;
                    } else {
                        _probCalc[it->first] = (float)(1) / (_totalWordsinC + total._voc);
                    }
                }
            }
    
            for (it3 = _probCalc.begin(); it3 != _probCalc.end(); it3++) {
                //cout << it3->first << ": " << it3->second << endl;
                _out << it3->first << ": " << it3->second << endl;
            }
            _out.close();
        }
    
    float NB::totalProb(NB& prob, NB& total) {
    
        map<string, int> ::iterator it;
        map<string, int> ::iterator it2;
        map<string, float> ::iterator it3;
    
        _out.open(_className + "_" + prob._className + "_prob.txt");
        if (_out.fail()) {
            perror("cannot write to");
        }
        _prob = 1.0f;
        for (it = _bow.begin(); it != _bow.end(); it++) {
            for (it3 = prob._probCalc.begin(); it3 != prob._probCalc.end(); it3++) {
                if (it->first == it3->first) {
                    _wordCounter = 0;
                    _prob = (_prob * pow((it3->second), (it->second)));
                    break;
                } else {
                    _wordCounter++;
                    if (_wordCounter == prob._probCalc.size()) {
                        _prob = _prob * ((float)1 / (prob._totalWordsinC + total._voc));
                    }
                }
            }
        }
        _prob = (_prob * ((float)(prob._nOfClass) / total._tnClass));
        cout << _prob << endl;
        _out << "The probalility of the " << _className << " beloning to " << prob._className << " is: " << _prob << endl;
        _out.close();
        return _prob;
    }
    
    #pragma一次
    #包括
    #包括
    #包括
    #包括
    #包括
    使用名称空间std;
    NB类
    {
    公众:
    NB(NB&cl1,NB&cl2,字符串类名称);
    NB(字符串类名称);
    NB(字符串类名称,int类类型);
    向量组合ClassText();
    void bagOfWords(字符串classCombine,bool isTotal=false);
    无效calcProb(NB和总计);
    浮动总概率(NB&prob,NB&total);
    int类类型;
    私人:
    int _len=0;
    浮动概率=1.0f;
    int _voc=0;
    int _nOfClass=0;
    int_tnClass=0;
    int _totalWordsinC=0;
    int _wordCounter=0;
    bool_isDone=假;
    ifstream_in;
    流出的液体;
    //字符串组合;
    字符串_类名称;
    字符串_文件名;
    向量_组合;
    地图类;
    地图弓;
    地图(probCalc),;
    };
    
    NB.cpp文件:

    #pragma once
    
    #include<iostream>
    #include<fstream>
    #include<string>
    #include<vector>
    #include<map>
    using namespace std;
    
    class NB
    {
    public:
        NB(NB& cl1, NB& cl2, string className);
        NB(string className);
        NB(string className, int classType);
        vector <string> combineClassText();
        void bagOfWords(string classCombine, bool isTotal = false);
        void calcProb(NB& total);
        float totalProb(NB& prob, NB& total);
        int classType;
    
    private:
        int _len = 0;
        float _prob = 1.0f;
        int _voc = 0;
        int _nOfClass = 0;
        int _tnClass = 0;
        int _totalWordsinC = 0;
        int _wordCounter = 0;
        bool _isDone = false;
        ifstream _in;
        ofstream _out;
        //string _classCombine;
        string _className;
        string _fileName;
        vector <string> _combined;
        map<string, string> _category;
        map<string, int> _bow;
        map<string, float> _probCalc;
    };
    
    #include "NB.h"
    #include<cmath>
    
    NB::NB(NB& cl1, NB& cl2, string className)
    {
        _className = className;
        _out.open("combineAll.txt");
        if (_out.fail()) {
            perror("cannot write to combineAll.txt");
        }
        _len = cl1.combineClassText().size();
        for (int i = 0; i < _len; i++) {
            _combined.push_back(cl1.combineClassText()[i]);
        }
    
        _len = cl2.combineClassText().size();
        for (int i = 0; i < _len; i++) {
            _combined.push_back(cl2.combineClassText()[i]);
        }
    
        _len = _combined.size();
        for (int i = 0; i < _len; i++) {
            _out << _combined[i] << endl;
            //cout << i + 1 << ". " << _combined[i] << endl;
        }
        _out.close();
        _tnClass = cl1._tnClass + cl2._tnClass;
        bagOfWords("combineAll.txt", true);
    }
    
    NB::NB(string className, int classType) {
        NB::classType = classType;
        _className = className;
        cout << "Enter a filename for " + _className << endl;
        cin >> _fileName;
        _category[_fileName] = _className;
        combineClassText();
        bagOfWords(_className + ".txt");
    }
    
    NB::NB(string className)
    {
        _className = className;
        while (_isDone == false) {
            cout << "Enter a filename for " + _className << endl;
            cin >> _fileName;
            if (_fileName != "q") {
                _category[_fileName] = _className;
                _nOfClass++;
                _tnClass++;
            } else {
                _isDone = true;
            }
        }
        combineClassText();
        bagOfWords(_className + ".txt");
    }
    
    vector<string> NB::combineClassText() {
    
        string temp;
        string classCombine = _className + ".txt";
        vector <string> tmp;
        map<string, string>::iterator it;
    
        _out.open(classCombine);
        if (_out.fail()) {
            perror("cannot write to");
        }
        for (it = _category.begin(); it != _category.end(); it++) {
            _in.open(it->first);
            if (_in.fail()) {
                perror("cannot read from");
            }
            while (_in >> temp) {
                _out << temp << endl;
                tmp.push_back(temp);            
            }
            _in.close();
        }
        _out.close();
        return tmp;
    }
    
    void NB::bagOfWords(string classCombine, bool isTotal) {
    
        map<string, int>::iterator it;
        string temp;
        vector<string> tp;
        string name = _className + "_bow.txt";
        int len;
    
        _in.open(classCombine);
        if (_in.fail()) {
            perror("cannot read from");
        }
    
        _out.open(name);
        if (_out.fail()) {
            perror("cannot write to");
        }
    
        while (_in >> temp) {
            tp.push_back(temp);
        }
    
        for (int i = 0; i < tp.size(); i++) {
            for (int j = 0; j < tp[i].size(); j++) {
                if (tp[i][j] == '.' || tp[i][j] == ',') {
                    tp[i][j] = ' ';
                }
            }
        }
    
        len = tp.size();
        vector<int> count(len, 1);
    
        for (int i = 0; i < len; i++) {
            for (int j = 0; j < (len - i - 1); j++) {
                if (tp[i] == tp[j + i + 1]) {
                    count[i]++;
                }
            }
        }
    
        for (int i = len - 1; i >= 0; i--) {
            _bow[tp[i]] = count[i];
        }
    
        for (it = _bow.begin(); it != _bow.end(); it++) {
            _out << it->first << ": " << it->second << endl;
            //cout << it->first << ": " << it->second << endl;
        }
        //cout << endl;
    
        if (isTotal == true) {
            for (it = _bow.begin(); it != _bow.end(); it++) {
                _voc += 1;
                //cout << _voc << endl;
            }
        } else {
            for (it = _bow.begin(); it != _bow.end(); it++) {
                _totalWordsinC += it->second;
            }
            //cout << _totalWordsinC << endl;
        }
        _in.close();
        _out.close();
    }
    
    void NB::calcProb(NB& total) {
    
        map<string, int> ::iterator it;
        map<string, int> ::iterator it2;
        map<string, float> ::iterator it3;
    
            _out.open(_className + "_prob.txt");
            if (_out.fail()) {
                perror("cannot write to");
            }
            for (it = total._bow.begin(); it != total._bow.end(); it++) {
                for (it2 = _bow.begin(); it2 != _bow.end(); it2++) {
                    if (it->first == it2->first) {
                        _probCalc[it->first] = (float)((it2->second) + 1) / (_totalWordsinC + total._voc);
                        break;
                    } else {
                        _probCalc[it->first] = (float)(1) / (_totalWordsinC + total._voc);
                    }
                }
            }
    
            for (it3 = _probCalc.begin(); it3 != _probCalc.end(); it3++) {
                //cout << it3->first << ": " << it3->second << endl;
                _out << it3->first << ": " << it3->second << endl;
            }
            _out.close();
        }
    
    float NB::totalProb(NB& prob, NB& total) {
    
        map<string, int> ::iterator it;
        map<string, int> ::iterator it2;
        map<string, float> ::iterator it3;
    
        _out.open(_className + "_" + prob._className + "_prob.txt");
        if (_out.fail()) {
            perror("cannot write to");
        }
        _prob = 1.0f;
        for (it = _bow.begin(); it != _bow.end(); it++) {
            for (it3 = prob._probCalc.begin(); it3 != prob._probCalc.end(); it3++) {
                if (it->first == it3->first) {
                    _wordCounter = 0;
                    _prob = (_prob * pow((it3->second), (it->second)));
                    break;
                } else {
                    _wordCounter++;
                    if (_wordCounter == prob._probCalc.size()) {
                        _prob = _prob * ((float)1 / (prob._totalWordsinC + total._voc));
                    }
                }
            }
        }
        _prob = (_prob * ((float)(prob._nOfClass) / total._tnClass));
        cout << _prob << endl;
        _out << "The probalility of the " << _className << " beloning to " << prob._className << " is: " << _prob << endl;
        _out.close();
        return _prob;
    }
    
    #包括“NB.h”
    #包括
    NB::NB(NB&cl1,NB&cl2,字符串类名称)
    {
    _className=className;
    _out.open(“combineAll.txt”);
    如果(_out.fail()){
    perror(“无法写入combineAll.txt”);
    }
    _len=cl1.combineClassText().size();
    对于(int i=0;i<\u len;i++){
    _combined.push_back(cl1.combineClassText()[i]);
    }
    _len=cl2.combineClassText().size();
    对于(int i=0;i<\u len;i++){
    _combined.push_back(cl2.combineClassText()[i]);
    }
    _len=_组合的.size();
    对于(int i=0;i<\u len;i++){
    
    你应该在CoDeVIEW中询问而不是StAdvOpLoad…<代码>我想知道代码是什么样子的?就像我编写代码的方式一样,它是一个很好的C++实践< /代码>。