如何使用fscanf从忽略标点符号的输入文件中读取单词?
我试图使用如何使用fscanf从忽略标点符号的输入文件中读取单词?,c,C,我试图使用fscanf从输入文件中读入,同时只读入字母,而忽略逗号、句点等特殊字符。 我尝试了下面的代码,但当我尝试打印每个输入字时,它不会打印任何内容 我还尝试了%20[a-zA-Z]“和%20[a-zA-Z]”中的fscanf char** input; input = (char **)malloc(numWordsInput*sizeof(char*)); for (i = 0; i < numWordsInput; i++) { fscanf(in_file, "%s",
fscanf
从输入文件中读入,同时只读入字母,而忽略逗号、句点等特殊字符。
我尝试了下面的代码,但当我尝试打印每个输入字时,它不会打印任何内容
我还尝试了%20[a-zA-Z]“
和%20[a-zA-Z]”
中的fscanf
char** input;
input = (char **)malloc(numWordsInput*sizeof(char*));
for (i = 0; i < numWordsInput; i++)
{
fscanf(in_file, "%s", buffer);
sLength = strlen(buffer)+1;
input[i] = (char *)malloc(sLength*sizeof(char));
}
rewind(in_file);
for (i = 0; i < numWordsInput; i++)
{
fscanf(in_file, "%20[a-zA-Z]%*[a-zA-Z]", input[i]);
}
char**输入;
输入=(字符**)malloc(numWordsInput*sizeof(字符*);
对于(i=0;i
不清楚为什么要为每个单词创建指向char
的指针,然后为每个单词分配,但为了简单地对[a-zA-Z]
字符进行分类,C库在ctype.h
中提供了许多宏,比如isalpha()
(好的,您关于存储单词的评论是在我完成回答的这一部分时提出的,因此我将在一分钟内添加单词处理)
要处理文件输入并检查每个字符是否为[a-zA-Z]
,只需打开文件并使用面向字符的输入函数,如fgetc
,然后使用isalpha()
测试每个字符。一个简单的例子就是:
#include <stdio.h>
#include <ctype.h>
int main (int argc, char **argv) {
int c;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while ((c = fgetc (fp)) != EOF) /* read each char in file */
if (isalpha (c) || c == '\n') /* is it a-zA-Z or \n */
putchar (c); /* output it */
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
示例使用/输出
$ ./bin/readalphadyn ../dat/10intmess.txt
word[ 0]: a
word[ 1]: a
word[ 2]: a
word[ 3]: a
word[ 4]: The
word[ 5]: Quick
word[ 6]: Brown
word[ 7]: Fox
word[ 8]: Jumps
word[ 9]: Over
word[ 10]: A
word[ 11]: Lazy
word[ 12]: a
word[ 13]: Dog
。。。只需从中选择[a-zA-Z]
字符(以及'\n'
字符以保留示例中的行距),您将获得:
$ ./bin/readalpha ../dat/10intmess.txt
aa
aa
TheQuick
BrownFox
JumpsOver
A
Lazya
Dog
如果您还想包括[0-9]
,只需使用isalnum(c)
而不是isalpha(c)
你也可以自由地一次读一行(或者一次读一个单词),然后简单地沿着缓冲区走一个指针做同样的事情。例如,你可以做:
#include <stdio.h>
#include <ctype.h>
#define MAXC 4096
int main (int argc, char **argv) {
char buf[MAXC];
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line in file */
char *p = buf; /* pointer to bufffer */
while (*p) { /* loop over each char */
if (isalpha (*p) || *p == '\n') /* is it a-zA-Z or \n */
putchar (*p); /* output it */
p++;
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define NPTR 8 /* initial number of pointers */
#define MAXC 1024
void *xrealloc2 (void *ptr, size_t psz, size_t *nelem);
char *dupstr (const char *s);
int main (int argc, char **argv) {
char **input, /* pointers to words */
buf[MAXC]; /* read buffer */
size_t nptr = NPTR, /* number of allcoated pointers */
used = 0; /* number of used pointers */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
input = malloc (nptr * sizeof *input); /* allocate nptr pointers */
if (!input) { /* validate every allocation */
perror ("malloc-input");
return 1;
}
while (fscanf (fp, "%s", buf) == 1) { /* read each word in file */
size_t ndx = 0; /* alpha char index */
char tmp[MAXC]; /* temp buffer for alpha */
if (used == nptr) /* check if realloc needed */
input = xrealloc2 (input, sizeof *input, &nptr); /* realloc */
for (int i = 0; buf[i]; i++) /* loop over each char */
if (isalpha (buf[i])) /* is it a-zA-Z or \n */
tmp[ndx++] = buf[i]; /* store alpha chars */
if (!ndx) /* if no alpha-chars */
continue; /* get next word */
tmp[ndx] = 0; /* nul-terminate chars */
input[used] = dupstr (tmp); /* allocate/copy tmp */
if (!input[used]) { /* validate word storage */
if (used) /* if words already stored */
break; /* break, earlier words still good */
else { /* otherwise bail */
fputs ("error: allocating 1st word.\n", stderr);
return 1;
}
}
used++; /* increment used count */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < used; i++) {
printf ("word[%3zu]: %s\n", i, input[i]);
free (input[i]); /* free storage when done with word */
}
free (input); /* free pointers */
return 0;
}
/** realloc 'ptr' of 'nelem' of 'psz' to 'nelem * 2' of 'psz'.
* returns pointer to reallocated block of memory with new
* memory initialized to 0/NULL. return must be assigned to
* original pointer in caller.
*/
void *xrealloc2 (void *ptr, size_t psz, size_t *nelem)
{ void *memptr = realloc ((char *)ptr, *nelem * 2 * psz);
if (!memptr) {
perror ("realloc(): virtual memory exhausted.");
exit (EXIT_FAILURE);
/* return NULL; */
} /* zero new memory (optional) */
memset ((char *)memptr + *nelem * psz, 0, *nelem * psz);
*nelem *= 2;
return memptr;
}
/** allocate storage for s + 1 chars and copy contents of s
* to allocated block returning new sting on success,
* NULL otherwise.
*/
char *dupstr (const char *s)
{
size_t len = strlen (s);
char *str = malloc (len + 1);
if (!str)
return NULL;
return memcpy (str, s, len + 1);
}
(输出相同)
仔细检查一下,如果有问题请告诉我。如果您确实需要一次只做一个字,那么您必须在代码中添加大量内容,以保护指针的数量,并根据需要添加realloc
。给我一点时间,我会帮你理解上面的基本字符分类
仅分配和存储字母字符的单个单词
正如您可以想象的那样,动态分配指针,然后分配,并存储每个仅由字母字符组成的单词要复杂一些。这不再困难,您只需跟踪分配的指针数量,如果您已经使用了所有分配的指针,则重新分配并继续
新的C程序员通常会遇到麻烦的地方是无法验证所需的每个步骤,以确保每个分配都成功,从而避免调用未定义的行为写入不属于自己的内存
使用fscanf
阅读单个单词是可以的。然后,为了确保要存储alpha字符,在为该单词分配存储之前,将alpha字符提取到单独的临时缓冲区并检查是否确实存储了任何字符是有意义的。非医学未桥接词典中最长的单词是29个字符,因此一个大于该长度的固定缓冲区就足够了(下面使用了1024
chars——不要吝啬缓冲区大小!)
因此,存储每个单词、跟踪分配的指针数量和使用的指针数量以及要读取的固定缓冲区所需的内容类似于:
#define NPTR 8 /* initial number of pointers */
#define MAXC 1024
...
char **input, /* pointers to words */
buf[MAXC]; /* read buffer */
size_t nptr = NPTR, /* number of allcoated pointers */
used = 0; /* number of used pointers */
分配初始指针数后,您可以读取每个单词,然后解析其中的字母字符,如下所示:
while (fscanf (fp, "%s", buf) == 1) { /* read each word in file */
size_t ndx = 0; /* alpha char index */
char tmp[MAXC]; /* temp buffer for alpha */
if (used == nptr) /* check if realloc needed */
input = xrealloc2 (input, sizeof *input, &nptr); /* realloc */
for (int i = 0; buf[i]; i++) /* loop over each char */
if (isalpha (buf[i])) /* is it a-zA-Z or \n */
tmp[ndx++] = buf[i]; /* store alpha chars */
if (!ndx) /* if no alpha-chars */
continue; /* get next word */
tmp[ndx] = 0; /* nul-terminate chars */
input[used] = dupstr (tmp); /* allocate/copy tmp */
if (!input[used]) { /* validate word storage */
if (used) /* if words already stored */
break; /* break, earlier words still good */
else { /* otherwise bail */
fputs ("error: allocating 1st word.\n", stderr);
return 1;
}
}
used++; /* increment used count */
}
(注意:当使用的指针数量等于分配的数量时,输入将重新分配到当前指针数量的两倍)
xrealloc2
和dupstr
函数只是辅助函数xrealloc2
只需调用realloc
并将当前分配的大小加倍,验证分配并在成功时返回重新分配的指针,或在失败时返回当前的指针——如果愿意,可以将其更改为returnNULL
,以处理错误
/** realloc 'ptr' of 'nelem' of 'psz' to 'nelem * 2' of 'psz'.
* returns pointer to reallocated block of memory with new
* memory initialized to 0/NULL. return must be assigned to
* original pointer in caller.
*/
void *xrealloc2 (void *ptr, size_t psz, size_t *nelem)
{ void *memptr = realloc ((char *)ptr, *nelem * 2 * psz);
if (!memptr) {
perror ("realloc(): virtual memory exhausted.");
exit (EXIT_FAILURE);
/* return NULL; */
} /* zero new memory (optional) */
memset ((char *)memptr + *nelem * psz, 0, *nelem * psz);
*nelem *= 2;
return memptr;
}
dupstr
函数只是一个普通的strdup
,但由于并非所有编译器都提供strdup
,因此它用于确保可移植性
/** allocate storage for s + 1 chars and copy contents of s
* to allocated block returning new sting on success,
* NULL otherwise.
*/
char *dupstr (const char *s)
{
size_t len = strlen (s);
char *str = malloc (len + 1);
if (!str)
return NULL;
return memcpy (str, s, len + 1);
}
使用helpers只是让代码的主体稍微干净一点,而不是将其全部塞进循环中
总而言之,你可以做到:
#include <stdio.h>
#include <ctype.h>
#define MAXC 4096
int main (int argc, char **argv) {
char buf[MAXC];
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line in file */
char *p = buf; /* pointer to bufffer */
while (*p) { /* loop over each char */
if (isalpha (*p) || *p == '\n') /* is it a-zA-Z or \n */
putchar (*p); /* output it */
p++;
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define NPTR 8 /* initial number of pointers */
#define MAXC 1024
void *xrealloc2 (void *ptr, size_t psz, size_t *nelem);
char *dupstr (const char *s);
int main (int argc, char **argv) {
char **input, /* pointers to words */
buf[MAXC]; /* read buffer */
size_t nptr = NPTR, /* number of allcoated pointers */
used = 0; /* number of used pointers */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
input = malloc (nptr * sizeof *input); /* allocate nptr pointers */
if (!input) { /* validate every allocation */
perror ("malloc-input");
return 1;
}
while (fscanf (fp, "%s", buf) == 1) { /* read each word in file */
size_t ndx = 0; /* alpha char index */
char tmp[MAXC]; /* temp buffer for alpha */
if (used == nptr) /* check if realloc needed */
input = xrealloc2 (input, sizeof *input, &nptr); /* realloc */
for (int i = 0; buf[i]; i++) /* loop over each char */
if (isalpha (buf[i])) /* is it a-zA-Z or \n */
tmp[ndx++] = buf[i]; /* store alpha chars */
if (!ndx) /* if no alpha-chars */
continue; /* get next word */
tmp[ndx] = 0; /* nul-terminate chars */
input[used] = dupstr (tmp); /* allocate/copy tmp */
if (!input[used]) { /* validate word storage */
if (used) /* if words already stored */
break; /* break, earlier words still good */
else { /* otherwise bail */
fputs ("error: allocating 1st word.\n", stderr);
return 1;
}
}
used++; /* increment used count */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < used; i++) {
printf ("word[%3zu]: %s\n", i, input[i]);
free (input[i]); /* free storage when done with word */
}
free (input); /* free pointers */
return 0;
}
/** realloc 'ptr' of 'nelem' of 'psz' to 'nelem * 2' of 'psz'.
* returns pointer to reallocated block of memory with new
* memory initialized to 0/NULL. return must be assigned to
* original pointer in caller.
*/
void *xrealloc2 (void *ptr, size_t psz, size_t *nelem)
{ void *memptr = realloc ((char *)ptr, *nelem * 2 * psz);
if (!memptr) {
perror ("realloc(): virtual memory exhausted.");
exit (EXIT_FAILURE);
/* return NULL; */
} /* zero new memory (optional) */
memset ((char *)memptr + *nelem * psz, 0, *nelem * psz);
*nelem *= 2;
return memptr;
}
/** allocate storage for s + 1 chars and copy contents of s
* to allocated block returning new sting on success,
* NULL otherwise.
*/
char *dupstr (const char *s)
{
size_t len = strlen (s);
char *str = malloc (len + 1);
if (!str)
return NULL;
return memcpy (str, s, len + 1);
}
内存使用/错误检查
在您编写的任何动态分配内存的代码中,对于所分配的任何内存块,您有两个责任:(1)始终保留指向内存块起始地址的指针,以便(2)在不再需要它时可以释放它
必须使用内存错误检查程序,以确保您不会试图访问内存或写入超出/超出分配的块的边界,尝试在未初始化的值上读取或建立条件跳转,最后确认释放所有已分配的内存
对于Linux,valgrind
是正常的选择。每个平台都有类似的内存检查器。它们都很容易使用,只需运行程序即可
$ valgrind ./bin/readalphadyn ../dat/10intmess.txt
==8765== Memcheck, a memory error detector
==8765== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==8765== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==8765== Command: ./bin/readalphadyn ../dat/10intmess.txt
==8765==
word[ 0]: a
word[ 1]: a
word[ 2]: a
word[ 3]: a
word[ 4]: The
word[ 5]: Quick
word[ 6]: Brown
word[ 7]: Fox
word[ 8]: Jumps
word[ 9]: Over
word[ 10]: A
word[ 11]: Lazy
word[ 12]: a
word[ 13]: Dog
==8765==
==8765== HEAP SUMMARY:
==8765== in use at exit: 0 bytes in 0 blocks
==8765== total heap usage: 17 allocs, 17 frees, 796 bytes allocated
==8765==
==8765== All heap blocks were freed -- no leaks are possible
==8765==
==8765== For counts of detected and suppressed errors, rerun with: -v
==8765== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
始终确认已释放所有已分配的内存,并且没有内存错误
(注意:不需要强制执行malloc
的返回,这是不必要的。请参阅:)
要跳过单个字符的单词(或选择所需的限制),只需更改:
if (ndx < 2) /* if 0/1 alpha-chars */
continue; /* get next word */
不清楚你为什么要这样做
$ ./bin/readalphadyn ../dat/10intmess.txt
word[ 0]: The
word[ 1]: Quick
word[ 2]: Brown
word[ 3]: Fox
word[ 4]: Jumps
word[ 5]: Over
word[ 6]: Lazy
word[ 7]: Dog