C++ 如何计算文件中的字数?
我正在创建一个计算输入文件中有多少单词的程序。我似乎不知道如何让它用空格、句点、逗号或行首或行尾来定义单词 输入文件的内容: 你好,世界一切都很好。你好,世界一切都很好。你好,世界一切都很好 输出应该是15个字,而我的输出是14个字 我尝试过添加包含句点、逗号等的or,但它也只计算空格顶部的orC++ 如何计算文件中的字数?,c++,C++,我正在创建一个计算输入文件中有多少单词的程序。我似乎不知道如何让它用空格、句点、逗号或行首或行尾来定义单词 输入文件的内容: 你好,世界一切都很好。你好,世界一切都很好。你好,世界一切都很好 输出应该是15个字,而我的输出是14个字 我尝试过添加包含句点、逗号等的or,但它也只计算空格顶部的or #include <iostream> #include <string> #include <fstream> using namespace std; //F
#include <iostream>
#include <string>
#include <fstream>
using namespace std;
//Function Declarations
void findFrequency(int A[], string &x);
void findWords(int A[], string &x);
//Function Definitions
void findFrequency(int A[], string &x)
{
//Counts the number of occurences in the string
for (int i = 0; x[i] != '\0'; i++)
{
if (x[i] >= 'A' && x[i] <= 'Z')
A[toascii(x[i]) - 64]++;
else if (x[i] >= 'a' && x[i] <= 'z')
A[toascii(x[i]) - 96]++;
}
//Displaying the results
char ch = 'a';
for (int count = 1; count < 27; count++)
{
if (A[count] > 0)
{
cout << A[count] << " : " << ch << endl;
}
ch++;
}
}
void findWords(int A[], string &x)
{
int wordcount = 0;
for (int count = 0; x[count] != '\0'; count++)
{
if (x[count] == ' ')
{
wordcount++;
A[0] = wordcount;
}
}
cout << A[0] << " Words " << endl;
}
int main()
{
string x;
int A[27] = { 0 }; //Array assigned all elements to zero
ifstream in; //declaring an input file stream
in.open("mytext.dat");
if (in.fail())
{
cout << "Input file did not open correctly" << endl;
}
getline(in,x);
findWords(A, x);
findFrequency(A, x);
in.close();
system("pause");
return 0;
}
当我得到的结果是14时,输出应该是15。也许这就是您需要的
size_t count_words(std::istream& is) {
size_t co = 0;
std::string word;
while(is >> word) { // read a whitespace separated chunk
for(char ch : word) { // step through its characters
if(std::isalpha(ch)) {
// it contains at least one alphabetic character so
// count it as a word and move on
++co;
break;
}
}
}
return co;
}
这里是一种方法,也有一些测试用例
#include <iostream>
#include <string>
#include <fstream>
using namespace std;
//Function Declarations
void findFrequency(int A[], string &x);
void findWords(int A[], string &x);
//Function Definitions
void findFrequency(int A[], string &x)
{
//Counts the number of occurences in the string
for (int i = 0; x[i] != '\0'; i++)
{
if (x[i] >= 'A' && x[i] <= 'Z')
A[toascii(x[i]) - 64]++;
else if (x[i] >= 'a' && x[i] <= 'z')
A[toascii(x[i]) - 96]++;
}
//Displaying the results
char ch = 'a';
for (int count = 1; count < 27; count++)
{
if (A[count] > 0)
{
cout << A[count] << " : " << ch << endl;
}
ch++;
}
}
void findWords(int A[], string &x)
{
int wordcount = 0;
for (int count = 0; x[count] != '\0'; count++)
{
if (x[count] == ' ')
{
wordcount++;
A[0] = wordcount;
}
}
cout << A[0] << " Words " << endl;
}
int main()
{
string x;
int A[27] = { 0 }; //Array assigned all elements to zero
ifstream in; //declaring an input file stream
in.open("mytext.dat");
if (in.fail())
{
cout << "Input file did not open correctly" << endl;
}
getline(in,x);
findWords(A, x);
findFrequency(A, x);
in.close();
system("pause");
return 0;
}
测试用例是一系列具有特定字符串的字符数组,用于测试RetVal结构/类的findNextWord方法
char line1[] = "this is1 a line. \t of text \n "; // multiple white spaces
char line2[] = "another line"; // string that ends with zero terminator, no newline
char line3[] = "\n"; // line with newline only
char line4[] = ""; // empty string with no text
这是实际的源代码
#include <iostream>
#include <cstring>
#include <cstring>
struct RetVal {
RetVal(char *p1, char *p2) : pFirst(p1), pLast(p2) {}
RetVal(char *p2 = nullptr) : pFirst(nullptr), pLast(p2) {}
char *pFirst;
char *pLast;
bool findNextWord()
{
if (pLast && *pLast) {
pFirst = pLast;
// scan the input line looking for the first non-space character.
// the isspace() function indicates true for any of the following
// characters: space, newline, tab, carriage return, etc.
while (*pFirst && isspace(*pFirst)) pFirst++;
if (pFirst && *pFirst) {
// we have found a non-space character so now we look
// for a space character or the end of string.
pLast = pFirst;
while (*pLast && ! isspace(*pLast)) pLast++;
}
else {
// indicate we are done with this string.
pFirst = pLast = nullptr;
}
}
else {
pFirst = nullptr;
}
// return value indicates if we are still processing, true, or if we are done, false.
return pFirst != nullptr;
}
};
void printWords(RetVal &x)
{
int iCount = 0;
while (x.findNextWord()) {
char xWord[128] = { 0 };
strncpy(xWord, x.pFirst, x.pLast - x.pFirst);
iCount++;
std::cout << "word " << iCount << " is \"" << xWord << "\"" << std::endl;
}
std::cout << "total word count is " << iCount << std::endl;
}
int main()
{
char line1[] = "this is1 a line. \t of text \n ";
char line2[] = "another line";
char line3[] = "\n";
char line4[] = "";
std::cout << "Process line1[] \"" << line1 << "\"" << std::endl;
RetVal x (line1);
printWords(x);
std::cout << std::endl << "Process line2[] \"" << line2 << "\"" << std::endl;
RetVal x2 (line2);
printWords(x2);
std::cout << std::endl << "Process line3[] \"" << line3 << "\"" << std::endl;
RetVal x3 (line3);
printWords(x3);
std::cout << std::endl << "Process line4[] \"" << line4 << "\"" << std::endl;
RetVal x4(line4);
printWords(x4);
return 0;
}
如果需要将类似于空白的标点符号视为可忽略的内容,则可以修改FindTextWord方法,以包括循环中字符的ispunt测试,如中所示:
bool findNextWord()
{
if (pLast && *pLast) {
pFirst = pLast;
// scan the input line looking for the first non-space character.
// the isspace() function indicates true for any of the following
// characters: space, newline, tab, carriage return, etc.
while (*pFirst && (isspace(*pFirst) || ispunct(*pFirst))) pFirst++;
if (pFirst && *pFirst) {
// we have found a non-space character so now we look
// for a space character or the end of string.
pLast = pFirst;
while (*pLast && ! (isspace(*pLast) || ispunct (*pLast))) pLast++;
}
else {
// indicate we are done with this string.
pFirst = pLast = nullptr;
}
}
else {
pFirst = nullptr;
}
// return value indicates if we are still processing, true, or if we are done, false.
return pFirst != nullptr;
}
一般来说,如果需要优化单词开头和结尾的过滤器,可以使用其他函数修改这两个位置,该函数查看字符并将其分类为单词的有效字符或无效字符。@Renat更好,std::set。OP不需要键值,只需要键值。64和96这两个神奇的数字是什么?也许你应该改为使用字符文字。或者更好的是,请参阅std::toupper和std::tolower。std::isalpha将告诉您字符中的值是否编码字母,而无需硬编码某些字符编码不起作用的值。您可能想用std::ispunct来检测标点符号来补充这一点。任何时候调用getline或任何其他输入函数而不检查其返回值时,您的程序都是错误的。您不是在计算单词,而是在计算空格。如果一个单词后面有两个空格,你就称它为两个单词。如果你的行尾没有空格,你就不会数到最后一个字。