C strtok正在创建大小为1的无效读取，无法释放（令牌）_C_Valgrind_Free_Strtok

C strtok正在创建大小为1的无效读取，无法释放（令牌）

C strtok正在创建大小为1的无效读取，无法释放（令牌）,c,valgrind,free,strtok,C,Valgrind,Free,Strtok,正在尝试调试此简单c程序： #include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #define MAX_WORD_SIZE 60 int wordCnt = 0; int main(void){ //open dictionary FILE *ptr = fopen("large", "r"); if(ptr == NULL){

正在尝试调试此简单c程序：

#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define MAX_WORD_SIZE 60
int wordCnt = 0;

int main(void){

//open dictionary 
FILE *ptr = fopen("large", "r");
if(ptr == NULL){
  printf("unable to open %s","large");
}

//get file size 
int fileSize;
fseek(ptr, 0 , SEEK_END);
fileSize=ftell(ptr) ;

//get memory for file buffer (read in whole file at once, faster) 
char * buffer = malloc(sizeof(char)*fileSize);

//rewind and read in file
fseek(ptr, 0 , SEEK_SET);
fread(buffer, fileSize, 1, ptr);

//get memory for longest word
char * token = malloc(sizeof(char)*MAX_WORD_SIZE);

以下是valgrind发出的错误消息：

valgrind ./test
==16233== Memcheck, a memory error detector
==16233== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==16233== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
==16233== Command: ./test
==16233== 
==16233== Invalid read of size 1
==16233==    at 0x5E4496C: strtok (strtok.S:137)
==16233==    by 0x42D848: main (test.c:43)
==16233==  Address 0x62dd8bc is 0 bytes after a block of size 1,439,228 alloc'd
==16233==    at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==16233==    by 0x42D728: main (test.c:29)
==16233== 
==16233== Invalid read of size 1
==16233==    at 0x5E4499C: strtok (strtok.S:163)
==16233==    by 0x42D848: main (test.c:43)
==16233==  Address 0x62dd8bc is 0 bytes after a block of size 1,439,228 alloc'd
==16233==    at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==16233==    by 0x42D728: main (test.c:29)
==16233== 
==16233== 
==16233== HEAP SUMMARY:
==16233==     in use at exit: 60 bytes in 1 blocks
==16233==   total heap usage: 3 allocs, 2 frees, 1,439,856 bytes allocated
==16233== 
==16233== 60 bytes in 1 blocks are definitely lost in loss record 1 of 1
==16233==    at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==16233==    by 0x42D7B1: main (test.c:36)
==16233== 
==16233== LEAK SUMMARY:
==16233==    definitely lost: 60 bytes in 1 blocks
==16233==    indirectly lost: 0 bytes in 0 blocks
==16233==      possibly lost: 0 bytes in 0 blocks
==16233==    still reachable: 0 bytes in 0 blocks
==16233==         suppressed: 0 bytes in 0 blocks
==16233== 
==16233== For counts of detected and suppressed errors, rerun with: -v
==16233== ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)

函数返回一个指针，在缓冲区的中间提供初始调用，不应该用该指针调用<代码>自由<代码>。< /p> 报告的内存泄漏是因为您分配内存并使

令牌最初指向该内存。然后在标记化循环中，使标记
指向缓冲区内的内存

一个典型的循环使用
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
    ++wordCnt;
    token = strtok(NULL, "\r\n");
}

+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
                                        ^
+-------+                               |
| token | ------------------------------/
+-------+

假设缓冲区包含字符串“Hello\nWorld”

在记忆中，它看起来像
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
    ++wordCnt;
    token = strtok(NULL, "\r\n");
}

+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
                                        ^
+-------+                               |
| token | ------------------------------/
+-------+
那你就有
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
    ++wordCnt;
    token = strtok(NULL, "\r\n");
}

+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
                                        ^
+-------+                               |
| token | ------------------------------/
+-------+
注意，我不与<代码> SIEZOF（char）相乘，因为它在规范中被定义为总是等于<代码> 1代码/> >  < P>函数返回一个指针，在您提供初始调用的缓冲区的中间，不应该用该指针调用<代码>自由< /代码>。
报告的内存泄漏是因为您分配内存并使令牌最初指向该内存。然后在标记化循环中，使标记
指向缓冲区内的内存

一个典型的循环使用
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
    ++wordCnt;
    token = strtok(NULL, "\r\n");
}

+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
                                        ^
+-------+                               |
| token | ------------------------------/
+-------+

假设缓冲区包含字符串“Hello\nWorld”

在记忆中，它看起来像
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
    ++wordCnt;
    token = strtok(NULL, "\r\n");
}

+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
                                        ^
+-------+                               |
| token | ------------------------------/
+-------+
那你就有
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
    ++wordCnt;
    token = strtok(NULL, "\r\n");
}

+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+     +---+---+---+---+---+----+---+---+---+---+---+----+
                                        ^
+-------+                               |
| token | ------------------------------/
+-------+
请注意，我不会与sizeof（char）
相乘，因为规范中定义它总是等于1
作为后续解释，您可能会问，为什么不能通过向字符串的任何部分传递free（）指针来释放字符串。自由函数不能迭代到分配块的开始或结束吗
malloc以块的形式分配数据。每个块都有一个标头，用于跟踪分配的大小。为了能够释放该块，每个free（）必须能够访问标头，以某种方式将其标记为free，可以将长度设置为0，也可以将整个标头归零，这取决于实现
问题是，free（）根据传递给它的指针来假定标头的位置。它不能仅仅从其余的数据中挑出标题。它必须知道它相对于指针的位置
为了演示，让我们制作一个假装的、微不足道的内存分配器
typedef struct s_memory_block {
    int Size;
    char Memory[1];
} memory_block;

char *AllocateMemory(size_t Size)
{
    memory_block *Block;
    Block = SomeOperatingSystemMemoryAllocator(sizeof(int) + sizeof(char) * Size);
    Block->Size = Size;
    return &Block->Memory[0];
}

void FreeMemory(char *Memory)
{
    memory_block *Block;
    Block = Memory - sizeof(int); // assume the header is right in front of the pointer
    Block->Length = 0;
}

显然，这是一个愚蠢而琐碎的例子，但它可能会帮助您理解。在返回指针之前存储簿记内存在各种方面都非常有用。以肖恩·巴雷特的解释为例。
作为后续解释，您可能会问，为什么不能通过向free（）传递指向字符串任何部分的指针来释放字符串。自由函数不能迭代到分配块的开始或结束吗
malloc以块的形式分配数据。每个块都有一个标头，用于跟踪分配的大小。为了能够释放该块，每个free（）必须能够访问标头，以某种方式将其标记为free，可以将长度设置为0，也可以将整个标头归零，这取决于实现
问题是，free（）根据传递给它的指针来假定标头的位置。它不能仅仅从其余的数据中挑出标题。它必须知道它相对于指针的位置
为了演示，让我们制作一个假装的、微不足道的内存分配器
typedef struct s_memory_block {
    int Size;
    char Memory[1];
} memory_block;

char *AllocateMemory(size_t Size)
{
    memory_block *Block;
    Block = SomeOperatingSystemMemoryAllocator(sizeof(int) + sizeof(char) * Size);
    Block->Size = Size;
    return &Block->Memory[0];
}

void FreeMemory(char *Memory)
{
    memory_block *Block;
    Block = Memory - sizeof(int); // assume the header is right in front of the pointer
    Block->Length = 0;
}

显然，这是一个愚蠢而琐碎的例子，但它可能会帮助您理解。在返回指针之前存储簿记内存在各种方面都非常有用。以肖恩·巴雷特的为例。
char*buffer=malloc（sizeof（char）*fileSize）-->char*buffer=malloc（文件大小+1）fread（缓冲区、文件大小、1、ptr）-->fread（缓冲区、文件大小、1、ptr）；缓冲区[文件大小]=0char*token=malloc（sizeof（char）*最大字大小）-->char*token=buffer//设置虚拟自由（令牌）：删除这行。太棒了，谢谢你char*buffer=malloc（sizeof（char）*fileSize）-->char*buffer=malloc（文件大小+1）fread（缓冲区、文件大小、1、ptr）-->fread（缓冲区、文件大小、1、ptr）；缓冲区[文件大小]=0char*token=malloc（sizeof（char）*最大字大小）-->char*token=buffer//设置虚拟自由（令牌）：删除这行。太棒了，谢谢。我不明白。我只为令牌分配了一次内存，最后只释放了一次。@DCR请查看我更新的答案，希望它能让我的回答更清楚。当我执行token=strtok（缓冲区“\r\n”）时，我会终止令牌null中的第一行。在随后的调用中，我会终止令牌null中的每一行，即使在第一次调用strtok
，尝试打印出buffer
后，缓冲区没有任何空值（可能在末尾除外）@DCR。strtok
函数通过用终止符替换找到的令牌来修改缓冲区。你应该阅读更多的信息，我不明白。我只为令牌分配了一次内存，最后只释放了一次。@DCR请查看我更新的答案，希望它能让我的回答更清楚。当我执行token=strtok（缓冲区“\r\n”）时，我会终止令牌null中的第一行。在随后的调用中，我会终止令牌null中的每一行，即使在第一次调用strtok
，尝试打印出buffer
后，缓冲区没有任何空值（可能在末尾除外）@DCR。strtok
函数通过用终止符替换找到的令牌来修改缓冲区。您应该阅读，例如了解更多信息。为什么使用char内存[1]？此数组只有一个空格。C不执行边界检查，因此您可以访问超出其边界的内存