C 内存泄漏-如何为在另一个结构中作为线程参数传递的Typdef结构分配内存?

C 内存泄漏-如何为在另一个结构中作为线程参数传递的Typdef结构分配内存?,c,linux,C,Linux,我是C语言的新手,正在努力进行正确的内存管理,并且在我的程序中遇到了很多seg错误 if (strcmp(words[i].word, token) == 0) { inArray = i; } 我在这里的最终目标是将文本文件和输入的线程数作为用户参数,获取文件大小,然后根据用户输入的线程数拆分文件 if (strcmp(words[i].word, token) == 0) { inArray = i; } 然后,每个线程将读取文件的一部分,然后从其读取的块中提取令牌。如果令牌大于

我是C语言的新手,正在努力进行正确的内存管理,并且在我的程序中遇到了很多seg错误

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
我在这里的最终目标是将文本文件和输入的线程数作为用户参数,获取文件大小,然后根据用户输入的线程数拆分文件

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
然后,每个线程将读取文件的一部分,然后从其读取的块中提取令牌。如果令牌大于5个字符,则将其与该令牌在整个文本中出现的次数一起添加到数组中。因此,我希望最终能得到一个列表,列出文本中使用的前n个单词>5个

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
然而,这可能是我第三次使用C语言了,我有点纠结,并且犯了不少错误。我一直在尝试使用valgrind和其他调试工具来解决这个问题,但我很迷茫

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
从valgrind,我得到以下信息:

==27634== 1 errors in context 1 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B7B4C: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 2 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B7014: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 3 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B3875: _itoa_word (_itoa.c:179)
==27634==    by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 4 of 5:
==27634== Use of uninitialised value of size 8
==27634==    at 0x50B386B: _itoa_word (_itoa.c:179)
==27634==    by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 5 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B78DA: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)
if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
我还看到这样一条消息:“地址xxx是块大小60 alloc'd后的0字节”

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
我认为我的问题在于:

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
这是因为我没有为wordStruct单词正确分配内存吗?我不知道如何修复它,任何帮助都将不胜感激

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
谢谢

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
完整代码如下

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>


typedef struct {
    char word[50];
    int count;
    int totalWords;
} wordsStruct ;



struct argStruct {
    FILE *file;
    int start;
    int end;
    int count;
    wordsStruct *words;
};



int stringLength(char s[]) {
    int c = 0;
    while (s[c] != '\0')
        c++;
    return c;
}



void groomString(char *line){

    for (int i = 0; line[i] != '\0'; ++i) {
        line[i] = tolower(line[i]);

        while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {


            for (int j = i; line[j] != '\0'; j++) {
                line[j] = line[j + 1];
                line[j] = '\0';
            }
        }
    }
}

int counter(int n){
    static int test;
    test = n;

    if(n = 0){
        return test;
    } else {
        n++;
        return n;
    }

}


void processFile(void *input) {
    struct argStruct params = *(struct argStruct *) input;

    wordsStruct *words = params.words;
    FILE *textFile = params.file;
    int start = params.start;
    int end = params.end;
    int count = params.count;

    int size = (end - start) + 10;
    char delim[] = " \t\v\n\r";


    char *readFile = (malloc(sizeof(char) * size +10));
    fread(readFile, 1, size, textFile);


    char *copy = (malloc(sizeof(char) * size +10));
    strcpy(copy, readFile);

    char *saveptr;

    int inArray;
    int length;
    static int added;

    char *token = strtok_r(copy, delim, &saveptr);

    while (token) {

        groomString(token);
        length = stringLength(token);

        if (length > 5) {

            inArray = 0;


            for (int i = 0; i < added; i++) {
                if (strcmp(words[i].word, token) == 0) {
                    inArray = i;
                }
            }

            if (inArray == 0) {
                added++;
                strcpy(words[added].word, token);
                words[added].count = 1;
            } else {
                words[inArray].count++;
            }
        }

        token = strtok_r(NULL, delim, &saveptr);
    }


     words->totalWords = added;

    free(token);
    free(readFile);
}




int main(int argc, char *argv[])
{
    FILE *pfile;
    int threadCount = 0, fileSize = 0, divide = 0;
    wordsStruct *allWords = (wordsStruct *) malloc( sizeof(wordsStruct));


    if (argc > 2)
    {
        pfile = fopen( argv[1], "r");
        if (pfile == NULL){
            perror("FILE OPEN FAILURE");
        }

        threadCount = atoi(argv[2]);

        pthread_t * thread = malloc(sizeof(pthread_t)* threadCount *10);

        fseek(pfile, 0, SEEK_END);
        fileSize= ftell(pfile);
        fseek(pfile, 0, SEEK_SET);


        divide = (fileSize/threadCount);

        struct argStruct arguments;
        arguments.file = pfile;
        arguments.words = allWords;

        int j = 0;
        for(int i = 0; i < threadCount; i++) {

            arguments.start = j;
            arguments.end = j+divide;

            arguments.count = i;

            struct argStruct *passArgs = malloc(sizeof *passArgs);
            *passArgs = arguments;

            pthread_create(&thread[i], NULL, (void *) processFile, passArgs);

            j+=divide;
        }


        for (int i = 0; i < threadCount +1; i++){
            pthread_join(thread[i], NULL);
        }

        fclose(pfile);

    } else {
        printf("Please enter text file name and number of threads");
    }

return 0;
}

#包括
#包括
#包括
#包括
#包括
#包括
类型定义结构{
字符字[50];
整数计数;
整句话;
}文字结构;
结构argStruct{
文件*文件;
int启动;
内端;
整数计数;
单词结构*单词;
};
int stringLength(字符s[]{
int c=0;
而(s[c]!='\0')
C++;
返回c;
}
无效字符串(字符*行){
对于(int i=0;行[i]!='\0';++i){
第[i]行=tolower(第[i]行);
而(!((第[i]>='a'行和第[i]5行){
inArray=0;
对于(int i=0;itotalWords=已添加;
免费(代币);
免费(readFile);
}
int main(int argc,char*argv[])
{
文件*pfile;
int threadCount=0,fileSize=0,divide=0;
wordsStruct*allWords=(wordsStruct*)malloc(sizeof(wordsStruct));
如果(argc>2)
{
pfile=fopen(argv[1],“r”);
if(pfile==NULL){
perror(“文件打开失败”);
}
threadCount=atoi(argv[2]);
pthread_t*thread=malloc(sizeof(pthread_t)*threadCount*10);
fseek(pfile,0,SEEK_END);
fileSize=ftell(pfile);
fseek(pfile,0,SEEK_SET);
divide=(文件大小/线程计数);
结构argStruct参数;
arguments.file=pfile;
arguments.words=所有单词;
int j=0;
对于(int i=0;i
对于初学者,在最后一个线程上,您需要[设置
结束后
]:

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
if(i==(threadCount-1))arguments.end=fileSize;

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
获取最后一段中的所有字节,并且不超过EOF。每当文件大小不是线程数的精确倍数时,就需要这样做

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}

为了防止传递给线程的
argStruct
内存泄漏,在
processFile
的底部,您需要
free(input)

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
此外,线程共享某些资源(请参见
manpthreads
)。尤其是打开文件描述符。因此,在访问
textFile
(例如
pthread\u mutex\u lock(&text\u mutex);

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
而且,每个线程必须对其试图访问的文件部分执行自己的
fseek

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
而且,
size
的“斜率因子”为10。这对于分配的区域来说是安全的,但会导致读取太多数据。最好不要使用
+10
,或者使用:
int-exact\u size=end-start;
。另外,请注意,您正在执行“斜率”当设置
大小时
malloc
中的额外金额,因此不需要这样做

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
另外,请注意,
fread
不能像
fgets
那样保证缓冲区末尾的EOS字符(0x00)。因此,如果要在缓冲区上执行字符串操作,您需要自己强制执行此操作(并且需要“slop”至少为1):

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
因此,我们需要:

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
pthread_mutex_lock(&text_mutex);

fseek(textFile,start,0);
fread(readFile,1,exact_size,textFile);   
readFile[exact_size] = 0;

pthread_mutex_unlock(&text_mutex);
记住,
main
在执行
pthread\u mutex\u create
之前,必须使用
pthread\u mutex\u init
初始化
text\u mutex

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
但是…

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
在此处使用
fread
可能会有问题。当您将文件分段为长度为
divide
的块时,您[可能]正在分割文件,这样第一个线程的最后一行被截断,下一个线程将在看到第一整行之前看到此行的其余部分,依此类推

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}
您最好在
main
中对整个文件执行一个
mmap
,让它扫描缓冲区,查找换行符[或空格],并为每个线程指定一个可用的段