C 如何计算文件中的特定单词？_C

C 如何计算文件中的特定单词？

C 如何计算文件中的特定单词？,c,C,我试图创建一个c程序来读取文件并计算特定的单词我尝试了此代码，但没有得到任何结果： #include<stdio.h> #include<stdlib.h> void main { File *fp = fopen("file.txt","r+"); int count =0; char ch[10]; while((fgetc(fp)!=NULL) { while((fgetc(fp)!=NULL) {

我试图创建一个c程序来读取文件并计算特定的单词

我尝试了此代码，但没有得到任何结果：

#include<stdio.h>
#include<stdlib.h>
void main
{
  File *fp = fopen("file.txt","r+");
  int count =0;
  char ch[10]; 

  while((fgetc(fp)!=NULL)
   {
     while((fgetc(fp)!=NULL)
      {
        if((fgets(ch,3,fp))=="the" || (fgets(ch,3,fp))=="and")
         count++;
      }
   }
   printf("%d",count);
}

当您一次以3个块的形式获取数据时，您假设and和这两个单词在3个字符的边界上对齐。一般来说，情况并非如此

您还需要使用strncmp来比较字符串

作为第一次复习，我会逐行阅读，每行搜索你想要的单词

我也不确定您使用两个嵌套while循环的意图。

您不能将字符串指针与相等运算符进行比较，必须使用该函数

您的代码还存在其他问题。这一次，fgetc调用不会在出现错误或问题时返回NULL，而是返回EOF。否则，它将返回从文件中读取的字符

此外，这种情况下的两个fgets将导致读取两行，尽管您读取的每行仅为文件中的两个字符。

fgetsch，3，fp使您读取2个字符加上空终止符，如果您要读取3个字符，则需要fgetsch，4，fp。此外，还需要使用strcmp来比较字符串

还有，所有这些while循环都用于什么？

iffgetsch，3，fp==和| | fgetsch，3，fp==和

上面这句话完全没有用。 fgetsch，3，fp从文件中获取单词到ch[10]。但是你不能用==来比较。

我要做的是使用strcmp并在fgets中给出大小4永远不要忘记\o

您必须使用它来比较两个字符串。不是关系运算符。

我脑子里想不出来的可能不是最佳方式，但应该很容易阅读和理解：

#define WHITE_SPACE(c) ((c)==' ' || (c)=='\r' || (c)=='\n' || (c)=='\t'))

int CountWords(const char* fileName,int numOfWords,const char words[])
{
    int count = 0;
    FILE* fp = fopen(fileName,"rt");
    fseek(fp,0,SEEK_END);
    int size = ftell(fp);
    fseek(fp,0,SEEK_SET);
    char* buf = new char[size];
    fread(buf,size,1,fp);
    fclose(fp);
    for (int i=0,j; i<size; i=j+1)
    {
        for (j=i; j<size; j++)
        {
            if (WHITE_SPACE(buf[j]))
                break;
        }
        for (int n=0; n<numOfWords; n++)
        {
            int len = strlen(words[n]);
            if (len == j-i && !memcmp(buf+i,words[n],len))
                count++;
        }
    }
    delete[] buf;
    return count;
}

但是，请注意，我没有像上面所说的那样编译或测试它，这是我头脑中的想法…

请看一看

您还可以在github中找到的实现示例

iffgetsch，3，fp==这个| | fgetsch，3，fp==和

有几个问题：

不能将字符串值与==运算符进行比较；您需要使用strcmp库函数；您没有将相同的输入与and进行比较；当第一次比较失败时，您正在从输入中读取接下来的3个字符；如果你把输入和比较操作抽象出来，生活会更轻松；在高层次上，它看起来是这样的：

#define MAX_WORD_LENGTH 10 // or however big it needs to be
...
char word[MAX_WORD_LENGTH + 1];
...
while ( getNextWord( word, sizeof word, fp )) // will loop until getNextWord 
{                                             // returns false (error or EOF)
  if ( match( word ) )
    count++;
}

getNextWord函数处理所有输入；它将从输入流中读取字符，直到识别出一个单词或输入缓冲区中没有剩余空间为止。在这个特殊的例子中，我们假设一个单词只是一系列非空白字符，这意味着标点符号将被视为单词的一部分。如果你想也能识别标点符号，这就有点难了；例如，一个“可能”引用了字符“hello”，在这种情况下，它不应该是单词的一部分，或者它可能是缩略语或posessive的一部分，它是，Joe的，在这种情况下，它应该是单词的一部分

#include <ctype.h>
...
int getNextWord( char *target, size_t targetSize, FILE *fp )
{
  size_t i = 0;
  int c;

  /**
   * Read the next character from the input stream, skipping
   * over any leading whitespace.  We'll add each non-whitespace
   * character to the target buffer until we see trailing 
   * whitespace or EOF.
   */
  while ( (c = fgetc( fp )) != EOF && i < targetSize - 1 )
  {
    if ( isspace( c ) )
    {
      if ( i == 0 )
        continue;
      else
        break;
    }
    else
    {
      target[i++] = c;
    }
  }

  target[i] = 0;      // add 0 terminator to string
  return i > 0;       // if i == 0, then we did not successfully read a word
}

请注意，此比较区分大小写；与之相比，该值将不等于。如果需要不区分大小写的比较，则必须复制输入字符串并将其全部转换为小写，然后将该副本与目标进行比较：

#include <stdlib.h>
#Include <ctype.h>
#include <string.h>
...
int match( const char *word )
{
  const char *targets[] = {"and", "the", NULL};
  const char *t = targets;

  char *wcopy = malloc( strlen( word ) + 1 );
  if ( wcopy )
  {
    char *w = word;
    char *c = wcopy;

    while ( *w )
      *c++ = tolower( *w++ );
  }
  else
  {
    fprintf( stderr, "malloc failure in match: fatal error, exiting\n" );
    exit(0);
  }

  while ( t && strcmp( t, wcopy))
    t++;

  free( wcopy );
  return t != NULL;  // evaluates to true if we match either "the" or "and"
}

你没有得到任何结果是什么意思？您的输出是什么？请让您的代码至少在编译时处于某种状态，这将使您更接近。此外，您需要完全重新考虑您的算法，因为单词不需要出现在三个字符的边界上。最后，您的代码没有检查单词是否自己出现，因此这些或stand这样的单词可能会被错误地计数。我认为比较可能会被包括在另一个字符串中，因为有必要首先删除单词。要处理单个单词，您可能应该使用类似于strtok或strtok_r的内容。另见：

#include <stdlib.h>
#Include <ctype.h>
#include <string.h>
...
int match( const char *word )
{
  const char *targets[] = {"and", "the", NULL};
  const char *t = targets;

  char *wcopy = malloc( strlen( word ) + 1 );
  if ( wcopy )
  {
    char *w = word;
    char *c = wcopy;

    while ( *w )
      *c++ = tolower( *w++ );
  }
  else
  {
    fprintf( stderr, "malloc failure in match: fatal error, exiting\n" );
    exit(0);
  }

  while ( t && strcmp( t, wcopy))
    t++;

  free( wcopy );
  return t != NULL;  // evaluates to true if we match either "the" or "and"
}