C++ 如何计算文件中的字数?

C++ 如何计算文件中的字数?,c++,C++,我正在创建一个计算输入文件中有多少单词的程序。我似乎不知道如何让它用空格、句点、逗号或行首或行尾来定义单词 输入文件的内容: 你好,世界一切都很好。你好,世界一切都很好。你好,世界一切都很好 输出应该是15个字,而我的输出是14个字 我尝试过添加包含句点、逗号等的or,但它也只计算空格顶部的or #include <iostream> #include <string> #include <fstream> using namespace std; //F

我正在创建一个计算输入文件中有多少单词的程序。我似乎不知道如何让它用空格、句点、逗号或行首或行尾来定义单词

输入文件的内容:

你好,世界一切都很好。你好,世界一切都很好。你好,世界一切都很好

输出应该是15个字,而我的输出是14个字

我尝试过添加包含句点、逗号等的or,但它也只计算空格顶部的or

#include <iostream> 
#include <string>
#include <fstream>
using namespace std;

//Function Declarations
void findFrequency(int A[], string &x);
void findWords(int A[], string &x);

//Function Definitions
void findFrequency(int A[], string &x)
{   

    //Counts the number of occurences in the string
    for (int i = 0; x[i] != '\0'; i++)
    {

        if (x[i] >= 'A' && x[i] <= 'Z')
            A[toascii(x[i]) - 64]++;
        else if (x[i] >= 'a' && x[i] <= 'z')
            A[toascii(x[i]) - 96]++;
    }

    //Displaying the results
    char ch = 'a';

    for (int count = 1; count < 27; count++)
    {
        if (A[count] > 0)
        {

            cout << A[count] << " : " << ch << endl;
        }
        ch++;
    }
}


void findWords(int A[], string &x)
{

    int wordcount = 0;
    for (int count = 0; x[count] != '\0'; count++)
    {

        if (x[count] == ' ')
        {
            wordcount++;
            A[0] = wordcount;
        }
    }
    cout << A[0] << " Words " << endl;
}



int main()
{
    string x;
    int A[27] = { 0 }; //Array assigned all elements to zero
    ifstream in;    //declaring an input file stream
    in.open("mytext.dat");

    if (in.fail())
    {
        cout << "Input file did not open correctly" << endl;
    }

    getline(in,x);
    findWords(A, x);
    findFrequency(A, x);

    in.close();

    system("pause");
    return 0;
}


当我得到的结果是14时,输出应该是15。

也许这就是您需要的

size_t count_words(std::istream& is) {
    size_t co = 0;
    std::string word;
    while(is >> word) {       // read a whitespace separated chunk
        for(char ch : word) { // step through its characters
            if(std::isalpha(ch)) {
                // it contains at least one alphabetic character so
                // count it as a word and move on
                ++co;
                break;
            }
        }
    }
    return co;
}

这里是一种方法,也有一些测试用例

#include <iostream> 
#include <string>
#include <fstream>
using namespace std;

//Function Declarations
void findFrequency(int A[], string &x);
void findWords(int A[], string &x);

//Function Definitions
void findFrequency(int A[], string &x)
{   

    //Counts the number of occurences in the string
    for (int i = 0; x[i] != '\0'; i++)
    {

        if (x[i] >= 'A' && x[i] <= 'Z')
            A[toascii(x[i]) - 64]++;
        else if (x[i] >= 'a' && x[i] <= 'z')
            A[toascii(x[i]) - 96]++;
    }

    //Displaying the results
    char ch = 'a';

    for (int count = 1; count < 27; count++)
    {
        if (A[count] > 0)
        {

            cout << A[count] << " : " << ch << endl;
        }
        ch++;
    }
}


void findWords(int A[], string &x)
{

    int wordcount = 0;
    for (int count = 0; x[count] != '\0'; count++)
    {

        if (x[count] == ' ')
        {
            wordcount++;
            A[0] = wordcount;
        }
    }
    cout << A[0] << " Words " << endl;
}



int main()
{
    string x;
    int A[27] = { 0 }; //Array assigned all elements to zero
    ifstream in;    //declaring an input file stream
    in.open("mytext.dat");

    if (in.fail())
    {
        cout << "Input file did not open correctly" << endl;
    }

    getline(in,x);
    findWords(A, x);
    findFrequency(A, x);

    in.close();

    system("pause");
    return 0;
}

测试用例是一系列具有特定字符串的字符数组,用于测试RetVal结构/类的findNextWord方法

char line1[] = "this is1    a  line. \t of text  \n ";  // multiple white spaces
char line2[] = "another   line";    // string that ends with zero terminator, no newline
char line3[] = "\n";                // line with newline only
char line4[] = "";                  // empty string with no text
这是实际的源代码

#include <iostream>
#include <cstring>
#include <cstring>

struct RetVal {
    RetVal(char *p1, char *p2) : pFirst(p1), pLast(p2) {}
    RetVal(char *p2 = nullptr) : pFirst(nullptr), pLast(p2) {}
    char *pFirst;
    char *pLast;

    bool  findNextWord()
    {
        if (pLast && *pLast) {
            pFirst = pLast;
            // scan the input line looking for the first non-space character.
            // the isspace() function indicates true for any of the following
            // characters: space, newline, tab, carriage return, etc.
            while (*pFirst && isspace(*pFirst)) pFirst++;

            if (pFirst && *pFirst) {
                // we have found a non-space character so now we look
                // for a space character or the end of string.
                pLast = pFirst;
                while (*pLast && ! isspace(*pLast)) pLast++;
            }
            else {
                // indicate we are done with this string.
                pFirst = pLast = nullptr;
            }
        }
        else {
            pFirst = nullptr;
        }

        // return value indicates if we are still processing, true, or if we are done, false.
        return pFirst != nullptr;
    }
};

void printWords(RetVal &x)
{
    int    iCount = 0;

    while (x.findNextWord()) {
        char xWord[128] = { 0 };

        strncpy(xWord, x.pFirst, x.pLast - x.pFirst);
        iCount++;
        std::cout << "word " << iCount << " is \"" << xWord << "\"" << std::endl;
    }

    std::cout << "total word count is " << iCount << std::endl;
}

int main()
{
    char line1[] = "this is1    a  line. \t of text  \n ";
    char line2[] = "another   line";
    char line3[] = "\n";
    char line4[] = "";

    std::cout << "Process line1[] \"" << line1 << "\""  << std::endl;
    RetVal x (line1);
    printWords(x);

    std::cout << std::endl << "Process line2[] \"" << line2 << "\"" << std::endl;
    RetVal x2 (line2);
    printWords(x2);

    std::cout << std::endl << "Process line3[] \"" << line3 << "\"" << std::endl;
    RetVal x3 (line3);
    printWords(x3);

    std::cout << std::endl << "Process line4[] \"" << line4 << "\"" << std::endl;
    RetVal x4(line4);
    printWords(x4);

    return 0;
}
如果需要将类似于空白的标点符号视为可忽略的内容,则可以修改FindTextWord方法,以包括循环中字符的ispunt测试,如中所示:

bool  findNextWord()
{
    if (pLast && *pLast) {
        pFirst = pLast;
        // scan the input line looking for the first non-space character.
        // the isspace() function indicates true for any of the following
        // characters: space, newline, tab, carriage return, etc.
        while (*pFirst && (isspace(*pFirst) || ispunct(*pFirst))) pFirst++;

        if (pFirst && *pFirst) {
            // we have found a non-space character so now we look
            // for a space character or the end of string.
            pLast = pFirst;
            while (*pLast && ! (isspace(*pLast) || ispunct (*pLast))) pLast++;
        }
        else {
            // indicate we are done with this string.
            pFirst = pLast = nullptr;
        }
    }
    else {
        pFirst = nullptr;
    }

    // return value indicates if we are still processing, true, or if we are done, false.
    return pFirst != nullptr;
}

一般来说,如果需要优化单词开头和结尾的过滤器,可以使用其他函数修改这两个位置,该函数查看字符并将其分类为单词的有效字符或无效字符。

@Renat更好,std::set。OP不需要键值,只需要键值。64和96这两个神奇的数字是什么?也许你应该改为使用字符文字。或者更好的是,请参阅std::toupper和std::tolower。std::isalpha将告诉您字符中的值是否编码字母,而无需硬编码某些字符编码不起作用的值。您可能想用std::ispunct来检测标点符号来补充这一点。任何时候调用getline或任何其他输入函数而不检查其返回值时,您的程序都是错误的。您不是在计算单词,而是在计算空格。如果一个单词后面有两个空格,你就称它为两个单词。如果你的行尾没有空格,你就不会数到最后一个字。