用C语言对文件进行排序_C_Text Files_Stdio

用C语言对文件进行排序

用C语言对文件进行排序,c,text-files,stdio,C,Text Files,Stdio,我正试图编写一个程序，打开一个文本文件，读取该文件，将大写字母改为小写字母，然后计算该单词在该文件中出现的次数，并将结果打印到一个新的文本文件中我的代码如下： #include <stdio.h> #include <stdlib.h> #include <conio.h> #include <ctype.h> #include <string.h> int main() { FILE *fileIN; FILE

我正试图编写一个程序，打开一个文本文件，读取该文件，将大写字母改为小写字母，然后计算该单词在该文件中出现的次数，并将结果打印到一个新的文本文件中

我的代码如下：

#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <ctype.h>
#include <string.h>

int main()
{

    FILE *fileIN;
    FILE *fileOUT;
    char str[255];
    char c;
    int i = 0;

    fileIN = fopen ("input.txt", "r");
    fileOUT = fopen ("output.txt", "w");

    if (fileIN == NULL || fileOUT == NULL)
    {
        printf("Error opening files\n");
    }

    else
    {
        while(! feof(fileIN)) //reading and writing loop
        {
            fscanf(fileIN, "%s", str); //reading file


            i = 0;
            c = str[i];
            if (isupper(c)) //changing any upper case to lower case
            {
                c =(tolower(c));
                str[i] = putchar(c);
            }

            printf("%s ", str); //printing output

                            fprintf(fileOUT, "%s\n", str); //printing into file
        }




        fclose(fileIN);
        fclose(fileOUT);
    }
    getch();
}

#包括
#包括
#包括
#包括
#包括
int main（）
{
文件*fileIN；
文件*文件输出；
char-str[255]；
字符c；
int i=0；
fileIN=fopen（“input.txt”，“r”）；
fileOUT=fopen（“output.txt”，“w”）；
if（fileIN==NULL | | fileOUT==NULL）
{
printf（“打开文件时出错\n”）；
}
其他的
{
while（！feof（fileIN））//读写循环
{
fscanf（fileIN，“%s”，str）；//正在读取文件
i=0；
c=str[i]；
if（isupper（c））//将任何大写更改为小写
{
c=（tolower（c））；
str[i]=putchar（c）；
}
printf（“%s”，str）；//打印输出
fprintf（fileOUT，“%s\n”，str）；//打印到文件中
}
fclose（fileIN）；
fclose（fileOUT）；
}
getch（）；
}

input.txt文件包含以下内容“西班牙的雨主要落在飞机上” 不要问为什么。程序按原样运行后，输出如下所示：这个雨在里面西班牙瀑布主要地在里面这个飞机

我已设法把大写的字改成小写。我现在很难理解如何计算每个单词的出现次数。例如，在输出中，我希望它说“2”，意思是2已经出现，这也意味着我不希望在该文件中存储更多的“the”

我在考虑strcmp和strcpy，但不确定如何以我想要的方式使用它们

非常感谢您的帮助

（如果格式不正确，很抱歉）

您可能需要创建一个哈希表，其中单词作为键，频率作为值

草图构思：

识别单词，即用空格分隔的字母数字字符串，尝试使用strtok（）
每一个字
- 在基于哈希表的词典中搜索单词
  - 如果找到：增加频率
  - 如果找不到：在字典中插入一个新条目作为（单词，1）

最后，打印词典的内容，即对于所有条目，

entry.word

和

entry.frequency

请参阅此问答了解详细信息：它基于《圣经》第6.6节“C编程语言”

根据OP的评论更新：

哈希表只是一个有效的表，如果您不想使用它，您仍然可以使用普通表。这里有一些想法

typedef struct WordFreq {
    char  word[ N ];
    int   freq;
} WordFreq;

WordFreq wordFreqTable[ T ];

(N is the maximum length of a single word, T is the maximum number of unique words)

对于搜索和插入，您可以在表

中对（int i=0；i！=T；++i）{

简单示例进行线性搜索（需要错误捕获、释放内存、使用qsort排序等）

如果你不关心性能，你可以强行计数（创建一个指向动态分配的单词副本的字符指针列表，在添加到列表之前搜索每个字符指针，如果找到，则更新计数器，否则将其添加为初始计数1。如果有时间，您可以在创建列表时对其进一步进行二进制插入排序，从而进一步提高性能。但除非（a）您知道您将处理大文件（成百上千个字），以及（b）你先把基本算法记下来。你用的是错误的

feof

：我用的是我的老师和我们给出的powerpoints所证明的，我没有遇到你所链接的问题是概述的错误。恕我直言，假设教授在软件工程方面是绝对正确的离开学术界后，你会像伤口上的盐一样痛。在这里，你可以支配成千上万的工程师，其中一些我保证你已经忘记了语言及其标准，标准库，以及这两种语言的行为，这是大多数教师所不知道的。无论如何，你都会质疑你读到了什么我并不是说他没有错，我只是说我还没有遇到这个问题，在我遇到这个问题之前，我不会担心这个问题，尤其是看到我才上了8个星期的课。我还没有在课堂上学会哈希表，所以我想我的老师不会允许我使用它谢谢你的更新，我会努力实现并回复你，告诉你我成功了还是失败了xD@TheAngryBr1t当前位置进展如何？希望你能解决这个问题。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define BUFFSIZE 1024

typedef struct _wc {
    char *word;
    int count;
} WordCounter;

WordCounter *WordCounters = NULL;
int WordCounters_size = 0;

void WordCount(char *word){
    static int size = 0;
    WordCounter *p=NULL;
    int i;

    if(NULL==WordCounters){
        size = 4;
        WordCounters = (WordCounter*)calloc(size, sizeof(WordCounter));
    }
    for(i=0;i<WordCounters_size;++i){
        if(0==strcmp(WordCounters[i].word, word)){
            p=WordCounters + i;
            break;
        }
    }
    if(p){
        p->count += 1;
    } else {
        if(WordCounters_size == size){
            size += 4;
            WordCounters = (WordCounter*)realloc(WordCounters, sizeof(WordCounter)*size);
        }
        if(WordCounters_size < size){
            p = WordCounters + WordCounters_size++;
            p->word = strdup(word);
            p->count = 1;
        }
    }
}

int main(void){
    char buff[BUFFSIZE];
    char *wordp;
    int i;

    while(fgets(buff, BUFFSIZE, stdin)){
        strlwr(buff);
        for(wordp=buff; NULL!=(wordp=strtok(wordp, ".,!?\"'#$%&()=@ \t\n\\;:[]/*-+<>"));wordp=NULL){
            if(!isdigit(*wordp) && isalpha(*wordp)){
                WordCount(wordp);
            }
        }
    }
    for(i=0;i<WordCounters_size;++i){
        printf("%s:%d\n", WordCounters[i].word, WordCounters[i].count);
    }

    return 0;
}

>WordCount.exe
The rain in Spain falls mainly in the plane
^Z
the:2
rain:1
in:2
spain:1
falls:1
mainly:1
plane:1