C strtok正在创建大小为1的无效读取,无法释放(令牌)
正在尝试调试此简单c程序:C strtok正在创建大小为1的无效读取,无法释放(令牌),c,valgrind,free,strtok,C,Valgrind,Free,Strtok,正在尝试调试此简单c程序: #include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #define MAX_WORD_SIZE 60 int wordCnt = 0; int main(void){ //open dictionary FILE *ptr = fopen("large", "r"); if(ptr == NULL){
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_WORD_SIZE 60
int wordCnt = 0;
int main(void){
//open dictionary
FILE *ptr = fopen("large", "r");
if(ptr == NULL){
printf("unable to open %s","large");
}
//get file size
int fileSize;
fseek(ptr, 0 , SEEK_END);
fileSize=ftell(ptr) ;
//get memory for file buffer (read in whole file at once, faster)
char * buffer = malloc(sizeof(char)*fileSize);
//rewind and read in file
fseek(ptr, 0 , SEEK_SET);
fread(buffer, fileSize, 1, ptr);
//get memory for longest word
char * token = malloc(sizeof(char)*MAX_WORD_SIZE);
以下是valgrind发出的错误消息:
valgrind ./test
==16233== Memcheck, a memory error detector
==16233== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==16233== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
==16233== Command: ./test
==16233==
==16233== Invalid read of size 1
==16233== at 0x5E4496C: strtok (strtok.S:137)
==16233== by 0x42D848: main (test.c:43)
==16233== Address 0x62dd8bc is 0 bytes after a block of size 1,439,228 alloc'd
==16233== at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==16233== by 0x42D728: main (test.c:29)
==16233==
==16233== Invalid read of size 1
==16233== at 0x5E4499C: strtok (strtok.S:163)
==16233== by 0x42D848: main (test.c:43)
==16233== Address 0x62dd8bc is 0 bytes after a block of size 1,439,228 alloc'd
==16233== at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==16233== by 0x42D728: main (test.c:29)
==16233==
==16233==
==16233== HEAP SUMMARY:
==16233== in use at exit: 60 bytes in 1 blocks
==16233== total heap usage: 3 allocs, 2 frees, 1,439,856 bytes allocated
==16233==
==16233== 60 bytes in 1 blocks are definitely lost in loss record 1 of 1
==16233== at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==16233== by 0x42D7B1: main (test.c:36)
==16233==
==16233== LEAK SUMMARY:
==16233== definitely lost: 60 bytes in 1 blocks
==16233== indirectly lost: 0 bytes in 0 blocks
==16233== possibly lost: 0 bytes in 0 blocks
==16233== still reachable: 0 bytes in 0 blocks
==16233== suppressed: 0 bytes in 0 blocks
==16233==
==16233== For counts of detected and suppressed errors, rerun with: -v
==16233== ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
函数返回一个指针,在缓冲区的中间提供初始调用,不应该用该指针调用<代码>自由<代码>。< /p>
报告的内存泄漏是因为您分配内存并使令牌最初指向该内存。然后在标记化循环中,使标记
指向缓冲区内的内存
一个典型的循环使用
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
++wordCnt;
token = strtok(NULL, "\r\n");
}
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
^
+-------+ |
| token | ------------------------------/
+-------+
假设缓冲区包含字符串“Hello\nWorld”
在记忆中,它看起来像
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
++wordCnt;
token = strtok(NULL, "\r\n");
}
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
^
+-------+ |
| token | ------------------------------/
+-------+
那你就有
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
++wordCnt;
token = strtok(NULL, "\r\n");
}
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
^
+-------+ |
| token | ------------------------------/
+-------+
注意,我不与<代码> SIEZOF(char)相乘,因为它在规范中被定义为总是等于<代码> 1代码/> >
< P>函数返回一个指针,在您提供初始调用的缓冲区的中间,不应该用该指针调用<代码>自由< /代码>。
报告的内存泄漏是因为您分配内存并使令牌最初指向该内存。然后在标记化循环中,使标记
指向缓冲区内的内存
一个典型的循环使用
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
++wordCnt;
token = strtok(NULL, "\r\n");
}
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
^
+-------+ |
| token | ------------------------------/
+-------+
假设缓冲区包含字符串“Hello\nWorld”
在记忆中,它看起来像
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
++wordCnt;
token = strtok(NULL, "\r\n");
}
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
^
+-------+ |
| token | ------------------------------/
+-------+
那你就有
char *token = strtok(buffer, "\r\n");
while (token != NULL)
{
++wordCnt;
token = strtok(NULL, "\r\n");
}
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
| buffer | --> | H | e | l | l | o | \n | W | o | r | l | d | \0 |
+--------+ +---+---+---+---+---+----+---+---+---+---+---+----+
^
+-------+ |
| token | ------------------------------/
+-------+
请注意,我不会与sizeof(char)
相乘,因为规范中定义它总是等于1
作为后续解释,您可能会问,为什么不能通过向字符串的任何部分传递free()指针来释放字符串。自由函数不能迭代到分配块的开始或结束吗
malloc以块的形式分配数据。每个块都有一个标头,用于跟踪分配的大小。为了能够释放该块,每个free()必须能够访问标头,以某种方式将其标记为free,可以将长度设置为0,也可以将整个标头归零,这取决于实现
问题是,free()根据传递给它的指针来假定标头的位置。它不能仅仅从其余的数据中挑出标题。它必须知道它相对于指针的位置
为了演示,让我们制作一个假装的、微不足道的内存分配器
typedef struct s_memory_block {
int Size;
char Memory[1];
} memory_block;
char *AllocateMemory(size_t Size)
{
memory_block *Block;
Block = SomeOperatingSystemMemoryAllocator(sizeof(int) + sizeof(char) * Size);
Block->Size = Size;
return &Block->Memory[0];
}
void FreeMemory(char *Memory)
{
memory_block *Block;
Block = Memory - sizeof(int); // assume the header is right in front of the pointer
Block->Length = 0;
}
显然,这是一个愚蠢而琐碎的例子,但它可能会帮助您理解。在返回指针之前存储簿记内存在各种方面都非常有用。以肖恩·巴雷特的解释为例。作为后续解释,您可能会问,为什么不能通过向free()传递指向字符串任何部分的指针来释放字符串。自由函数不能迭代到分配块的开始或结束吗
malloc以块的形式分配数据。每个块都有一个标头,用于跟踪分配的大小。为了能够释放该块,每个free()必须能够访问标头,以某种方式将其标记为free,可以将长度设置为0,也可以将整个标头归零,这取决于实现
问题是,free()根据传递给它的指针来假定标头的位置。它不能仅仅从其余的数据中挑出标题。它必须知道它相对于指针的位置
为了演示,让我们制作一个假装的、微不足道的内存分配器
typedef struct s_memory_block {
int Size;
char Memory[1];
} memory_block;
char *AllocateMemory(size_t Size)
{
memory_block *Block;
Block = SomeOperatingSystemMemoryAllocator(sizeof(int) + sizeof(char) * Size);
Block->Size = Size;
return &Block->Memory[0];
}
void FreeMemory(char *Memory)
{
memory_block *Block;
Block = Memory - sizeof(int); // assume the header is right in front of the pointer
Block->Length = 0;
}
显然,这是一个愚蠢而琐碎的例子,但它可能会帮助您理解。在返回指针之前存储簿记内存在各种方面都非常有用。以肖恩·巴雷特的为例。char*buffer=malloc(sizeof(char)*fileSize)代码>-->char*buffer=malloc(文件大小+1)代码>fread(缓冲区、文件大小、1、ptr)代码>-->fread(缓冲区、文件大小、1、ptr);缓冲区[文件大小]=0代码>char*token=malloc(sizeof(char)*最大字大小)代码>-->char*token=buffer代码>//设置虚拟自由(令牌)代码>:删除这行。太棒了,谢谢你char*buffer=malloc(sizeof(char)*fileSize)代码>-->char*buffer=malloc(文件大小+1)代码>fread(缓冲区、文件大小、1、ptr)代码>-->fread(缓冲区、文件大小、1、ptr);缓冲区[文件大小]=0代码>char*token=malloc(sizeof(char)*最大字大小)代码>-->char*token=buffer代码>//设置虚拟自由(令牌)代码>:删除这行。太棒了,谢谢。我不明白。我只为令牌分配了一次内存,最后只释放了一次。@DCR请查看我更新的答案,希望它能让我的回答更清楚。当我执行token=strtok(缓冲区“\r\n”)时,我会终止令牌null中的第一行。在随后的调用中,我会终止令牌null中的每一行,即使在第一次调用strtok
,尝试打印出buffer
后,缓冲区没有任何空值(可能在末尾除外)@DCR。strtok
函数通过用终止符替换找到的令牌来修改缓冲区。你应该阅读更多的信息,我不明白。我只为令牌分配了一次内存,最后只释放了一次。@DCR请查看我更新的答案,希望它能让我的回答更清楚。当我执行token=strtok(缓冲区“\r\n”)时,我会终止令牌null中的第一行。在随后的调用中,我会终止令牌null中的每一行,即使在第一次调用strtok
,尝试打印出buffer
后,缓冲区没有任何空值(可能在末尾除外)@DCR。strtok
函数通过用终止符替换找到的令牌来修改缓冲区。您应该阅读,例如了解更多信息。为什么使用char内存[1]代码>?此数组只有一个空格。C不执行边界检查,因此您可以访问超出其边界的内存