C-在无限长的行中读取有限长的字

C-在无限长的行中读取有限长的字,c,scanf,c99,fgets,c-strings,C,Scanf,C99,Fgets,C Strings,我想读一个文件中的单词,并知道新行何时开始 我知道每行可以有三个、四个或零个单词,而且单词不能超过一定的长度。但是带空格的行长度是没有边界的,因此不可能将一行读入字符串,解析并继续。我想知道当我读到每一行时,每一行是否有三到四个单词 目前,我使用fscanf和一些特定于问题的内部逻辑来决定我读到的第四个单词是在新行中还是在前一行中的第四个。但这种方式是脆弱的,很容易打破 我想我可以逐字符读取,忽略空格并查找'\n'。还有更优雅的方式吗 多谢各位 编辑:我仅限于使用C99和标准库。以下是一些代码,

我想读一个文件中的单词,并知道新行何时开始

我知道每行可以有三个、四个或零个单词,而且单词不能超过一定的长度。但是带空格的行长度是没有边界的,因此不可能将一行读入字符串,解析并继续。我想知道当我读到每一行时,每一行是否有三到四个单词

目前,我使用fscanf和一些特定于问题的内部逻辑来决定我读到的第四个单词是在新行中还是在前一行中的第四个。但这种方式是脆弱的,很容易打破

我想我可以逐字符读取,忽略空格并查找'\n'。还有更优雅的方式吗

多谢各位


编辑:我仅限于使用C99和标准库。

以下是一些代码,它们的工作与您的请求密切相关。有两个主要区别:

  • 它不相信用户知道他们提供什么,因为数据必须遵守某些规则,所以它假设用户会滥用这些规则
  • 因此,它会记录每一行中找到的所有单词,并以完整的长度记录这些单词,因此使用动态内存分配
  • 在我发布之前,它已经通过了一些相当苛刻的测试。您可以使用
    makeuflags=-DTEST
    进行编译,以获得较短的行片段(默认情况下为64字节,而不是4096字节),这也会为您提供额外的诊断输出。我在
    6
    处使用
    MAX\u LINE\u LEN
    而不是
    64
    进行了大量测试-这有助于调试在一行的多个片段上连续出现的单词的问题

    #include <assert.h>
    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    enum { MAX_WORD_CNT = 8 };
    
    #ifdef TEST
    static int debug = 1;
    enum { MAX_LINE_LEN = 64 };
    #else
    static int debug = 0;
    enum { MAX_LINE_LEN = 4096 };
    #endif /* TEST */
    
    typedef struct Word
    {
        size_t length;
        char  *word;
    } Word;
    
    typedef struct WordList
    {
        size_t  num_words;
        size_t  max_words;
        Word   *words;
    } WordList;
    
    typedef struct LineControl
    {
        size_t   line_length;
        bool     part_word;
        size_t   part_len;
        WordList list;
    } LineControl;
    
    static void init_wordlist(WordList *list)
    {
        list->num_words = 0;
        list->max_words = 0;
        list->words = 0;
    }
    
    static void free_wordlist(WordList *list)
    {
        assert(list != 0);
        for (size_t i = 0; i < list->num_words; i++)
            free(list->words[i].word);
        free(list->words);
        init_wordlist(list);
    }
    
    static void extend_word(const char *extn, size_t ext_len, Word *word)
    {
        if (debug)
            printf("old (%zu) = [%s]; extra (%zu) = [%.*s]\n", word->length, word->word,
                    ext_len, (int)ext_len, extn);
        size_t space = word->length + ext_len + 1;
        char *new_space = realloc(word->word, space);
        if (new_space == 0)
        {
            fprintf(stderr, "failed to reallocate %zu bytes of memory\n", space);
            exit(EXIT_FAILURE);
        }
        word->word = new_space;
        memmove(word->word + word->length, extn, ext_len);
        word->length += ext_len;
        word->word[word->length] = '\0';
        if (debug)
            printf("new (%zu) = [%s]\n", word->length, word->word);
        }
    
    static void addword_wordlist(const char *word, size_t word_len, WordList *list)
    {
        if (list->num_words >= list->max_words)
        {
            assert(list->num_words == list->max_words);
            size_t new_max = list->max_words * 2 + 2;
            Word *new_words = realloc(list->words, new_max * sizeof(*new_words));
            if (new_words == 0)
            {
                fprintf(stderr, "failed to allocate %zu bytes of memory\n", new_max * sizeof(*new_words));
                exit(EXIT_FAILURE);
            }
            list->max_words = new_max;
            list->words = new_words;
        }
        list->words[list->num_words].word = malloc(word_len + 1);
        if (list->words[list->num_words].word == 0)
        {
            fprintf(stderr, "failed to allocate %zu bytes of memory\n", word_len + 1);
            exit(EXIT_FAILURE);
        }
        Word *wp = &list->words[list->num_words];
        wp->length = word_len;
        memmove(wp->word, word, word_len);
        wp->word[word_len] = '\0';
        list->num_words++;
    }
    
    static void init_linectrl(LineControl *ctrl)
    {
        ctrl->line_length = 0;
        ctrl->part_word = false;
        ctrl->part_len = 0;
        init_wordlist(&ctrl->list);
    }
    
    static int parse_fragment(const char *line, LineControl *ctrl)
    {
        char   whisp[] = " \t";
        size_t offset = 0;
        bool   got_eol = false;
    
        /* The only newline in the string is at the end, if it is there at all */
        assert(strchr(line, '\n') == strrchr(line, '\n'));
        assert(strchr(line, '\n') == 0 || *(strchr(line, '\n') + 1) == '\0');
        if (debug && ctrl->part_word)
        {
            assert(ctrl->list.num_words > 0);
            printf("Dealing with partial word on entry (%zu: [%s])\n",
                   ctrl->part_len, ctrl->list.words[ctrl->list.num_words - 1].word);
        }
    
        size_t o_nonsp = 0;
        while (line[offset] != '\0')
        {
            size_t n_whisp = strspn(line + offset, whisp);
            size_t n_nonsp = strcspn(line + offset + n_whisp, whisp);
            if (debug)
                printf("offset %zu, whisp %zu, nonsp %zu\n", offset, n_whisp, n_nonsp);
            got_eol = false;
            ctrl->line_length += n_whisp + n_nonsp;
            if (line[offset + n_whisp + n_nonsp - 1] == '\n')
            {
                assert(n_nonsp > 0);
                got_eol = true;
                n_nonsp--;
            }
            if (n_whisp + n_nonsp == 0)
            {
                o_nonsp = 0;
                break;
            }
    
            if (n_whisp != 0)
            {
                ctrl->part_word = false;
                ctrl->part_len = 0;
            }
    
            /* Add words to list if the list is not already full */
            if (n_nonsp > 0)
            {
                const char *word = line + offset + n_whisp;
                if (ctrl->part_word)
                {
                    assert(ctrl->list.num_words > 0);
                    extend_word(word, n_nonsp,
                                &ctrl->list.words[ctrl->list.num_words - 1]);
                }
                else
                {
                    addword_wordlist(word, n_nonsp, &ctrl->list);
                }
            }
    
            offset += n_whisp + n_nonsp;
            if (line[offset] != '\0')
            {
                ctrl->part_word = false;
                ctrl->part_len = 0;
            }
            o_nonsp = n_nonsp;
            if (got_eol)
                break;
        }
    
        /* Partial word detection */
        if (o_nonsp > 0 && !got_eol)
        {
            ctrl->part_word = true;
            ctrl->part_len += o_nonsp;
        }
        else
        {
            ctrl->part_word = false;
            ctrl->part_len = 0;
        }
    
        /* If seen newline; line complete */
        /* If No newline; line incomplete */
        return !got_eol;
    }
    
    int main(void)
    {
        char line[MAX_LINE_LEN];
        size_t lineno = 0;
    
        while (fgets(line, sizeof(line), stdin) != 0)
        {
            LineControl ctrl;
            init_linectrl(&ctrl);
            lineno++;
            if (debug)
                printf("Line %zu: (%zu) [[%s]]\n", lineno, strlen(line), line);
    
            int extra = 0;
            while (parse_fragment(line, &ctrl) != 0 &&
                   fgets(line, sizeof(line), stdin) != 0)
            {
                if (debug)
                    printf("Extra %d for line %zu: (%zu) [[%s]]\n",
                           ++extra, lineno, strlen(line), line);
            }
    
            WordList *list = &ctrl.list;
            printf("Line %zu: length %zu, words = %zu\n",
                   lineno, ctrl.line_length, list->num_words);
            size_t num_words = list->num_words;
            if (num_words > MAX_WORD_CNT)
                num_words = MAX_WORD_CNT;
            for (size_t i = 0; i < num_words; i++)
            {
                printf("  %zu: (%zu) %s\n",
                       i + 1, list->words[i].length, list->words[i].word);
            }
            putchar('\n');
            free_wordlist(&ctrl.list);
        }
    
        return 0;
    }
    
    其中包含各种选项卡,正如此版本的相同数据所示,其中选项卡显示为
    \t

        a b   
        a b      c         d                                                        
    \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t
    1123xxsdfdsfsfdsfdssa          1234ddfxxyff            frrrdds
    1123dfdffdfdxxxxxxxxxas                        1234ydfyyyzm   knsaaass      1234asdafxxfrrrfrrrsaa    
                   1123werwetrretttrretertre       aaaa     bbbbbb      ccccc        
    k
      \t\t \t \t\t\t \t \t \t\t\t\tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper\t\t\t    \t\t\t\tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper  \t \t \t \t\t\t\t \t \tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper\t\t           \t\t\t\t \t \t \t \t\tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper\t\t\t\t\t\t    \t \t \t \t      \t \t \t 
    
    运行此
    awk
    脚本分析数据:

    $ awk '{ printf "%3d %d [%s]\n", length($0) + 1, NF, $0 }' test-data.1
      1 0 []
      5 0 [    ]
     11 2 [    a b   ]
     81 4 [    a b      c         d                                                        ]
     20 0 [                                                     ]
     63 3 [1123xxsdfdsfsfdsfdssa          1234ddfxxyff            frrrdds]
    103 4 [1123dfdffdfdxxxxxxxxxas                        1234ydfyyyzm   knsaaass      1234asdafxxfrrrfrrrsaa    ]
     82 4 [               1123werwetrretttrretertre       aaaa     bbbbbb      ccccc        ]
      2 1 [k]
    494 4 [                                                 apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                              apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                                      apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                      apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                                           ]
    $
    
    该数据文件上程序的输出为:

    Line 1: length 1, words = 0
    
    Line 2: length 5, words = 0
    
    Line 3: length 11, words = 2
      1: (1) a
      2: (1) b
    
    Line 4: length 81, words = 4
      1: (1) a
      2: (1) b
      3: (1) c
      4: (1) d
    
    Line 5: length 20, words = 0
    
    Line 6: length 63, words = 3
      1: (21) 1123xxsdfdsfsfdsfdssa
      2: (12) 1234ddfxxyff
      3: (7) frrrdds
    
    Line 7: length 103, words = 4
      1: (23) 1123dfdffdfdxxxxxxxxxas
      2: (12) 1234ydfyyyzm
      3: (8) knsaaass
      4: (22) 1234asdafxxfrrrfrrrsaa
    
    Line 8: length 82, words = 4
      1: (25) 1123werwetrretttrretertre
      2: (4) aaaa
      3: (6) bbbbbb
      4: (5) ccccc
    
    Line 9: length 2, words = 1
      1: (1) k
    
    Line 10: length 494, words = 4
      1: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
      2: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
      3: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
      4: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
    
    您可以看到输出中出现的
    awk
    脚本中的数据

    此代码在GitHub上的my(Stack Overflow Questions)存储库中,作为子目录中的文件
    scan59.c
    测试数据.1
    测试数据.2
    测试数据.3
    提供。尤其是
    testdata.3
    文件,它包含一行9955个字符和693个单词,以及其他不太严格的测试行


    代码使用GCC 8.2.0和Valgrind 3.14.0.GIT在运行macOS 10.13.6 High Sierra的Mac上干净地编译和运行。(尽管
    makefile
    规定了C11,但此代码中没有特定于C11的内容;它与C99完全兼容。它还可以使用
    make SFLAGS='-std=C99-pedantic'
    干净地编译)

    以下是一些与您的请求密切相关的代码。有两个主要区别:

  • 它不相信用户知道他们提供什么,因为数据必须遵守某些规则,所以它假设用户会滥用这些规则
  • 因此,它会记录每一行中找到的所有单词,并以完整的长度记录这些单词,因此使用动态内存分配
  • 在我发布之前,它已经通过了一些相当苛刻的测试。您可以使用
    makeuflags=-DTEST
    进行编译,以获得较短的行片段(默认情况下为64字节,而不是4096字节),这也会为您提供额外的诊断输出。我在
    6
    处使用
    MAX\u LINE\u LEN
    而不是
    64
    进行了大量测试-这有助于调试在一行的多个片段上连续出现的单词的问题

    #include <assert.h>
    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    enum { MAX_WORD_CNT = 8 };
    
    #ifdef TEST
    static int debug = 1;
    enum { MAX_LINE_LEN = 64 };
    #else
    static int debug = 0;
    enum { MAX_LINE_LEN = 4096 };
    #endif /* TEST */
    
    typedef struct Word
    {
        size_t length;
        char  *word;
    } Word;
    
    typedef struct WordList
    {
        size_t  num_words;
        size_t  max_words;
        Word   *words;
    } WordList;
    
    typedef struct LineControl
    {
        size_t   line_length;
        bool     part_word;
        size_t   part_len;
        WordList list;
    } LineControl;
    
    static void init_wordlist(WordList *list)
    {
        list->num_words = 0;
        list->max_words = 0;
        list->words = 0;
    }
    
    static void free_wordlist(WordList *list)
    {
        assert(list != 0);
        for (size_t i = 0; i < list->num_words; i++)
            free(list->words[i].word);
        free(list->words);
        init_wordlist(list);
    }
    
    static void extend_word(const char *extn, size_t ext_len, Word *word)
    {
        if (debug)
            printf("old (%zu) = [%s]; extra (%zu) = [%.*s]\n", word->length, word->word,
                    ext_len, (int)ext_len, extn);
        size_t space = word->length + ext_len + 1;
        char *new_space = realloc(word->word, space);
        if (new_space == 0)
        {
            fprintf(stderr, "failed to reallocate %zu bytes of memory\n", space);
            exit(EXIT_FAILURE);
        }
        word->word = new_space;
        memmove(word->word + word->length, extn, ext_len);
        word->length += ext_len;
        word->word[word->length] = '\0';
        if (debug)
            printf("new (%zu) = [%s]\n", word->length, word->word);
        }
    
    static void addword_wordlist(const char *word, size_t word_len, WordList *list)
    {
        if (list->num_words >= list->max_words)
        {
            assert(list->num_words == list->max_words);
            size_t new_max = list->max_words * 2 + 2;
            Word *new_words = realloc(list->words, new_max * sizeof(*new_words));
            if (new_words == 0)
            {
                fprintf(stderr, "failed to allocate %zu bytes of memory\n", new_max * sizeof(*new_words));
                exit(EXIT_FAILURE);
            }
            list->max_words = new_max;
            list->words = new_words;
        }
        list->words[list->num_words].word = malloc(word_len + 1);
        if (list->words[list->num_words].word == 0)
        {
            fprintf(stderr, "failed to allocate %zu bytes of memory\n", word_len + 1);
            exit(EXIT_FAILURE);
        }
        Word *wp = &list->words[list->num_words];
        wp->length = word_len;
        memmove(wp->word, word, word_len);
        wp->word[word_len] = '\0';
        list->num_words++;
    }
    
    static void init_linectrl(LineControl *ctrl)
    {
        ctrl->line_length = 0;
        ctrl->part_word = false;
        ctrl->part_len = 0;
        init_wordlist(&ctrl->list);
    }
    
    static int parse_fragment(const char *line, LineControl *ctrl)
    {
        char   whisp[] = " \t";
        size_t offset = 0;
        bool   got_eol = false;
    
        /* The only newline in the string is at the end, if it is there at all */
        assert(strchr(line, '\n') == strrchr(line, '\n'));
        assert(strchr(line, '\n') == 0 || *(strchr(line, '\n') + 1) == '\0');
        if (debug && ctrl->part_word)
        {
            assert(ctrl->list.num_words > 0);
            printf("Dealing with partial word on entry (%zu: [%s])\n",
                   ctrl->part_len, ctrl->list.words[ctrl->list.num_words - 1].word);
        }
    
        size_t o_nonsp = 0;
        while (line[offset] != '\0')
        {
            size_t n_whisp = strspn(line + offset, whisp);
            size_t n_nonsp = strcspn(line + offset + n_whisp, whisp);
            if (debug)
                printf("offset %zu, whisp %zu, nonsp %zu\n", offset, n_whisp, n_nonsp);
            got_eol = false;
            ctrl->line_length += n_whisp + n_nonsp;
            if (line[offset + n_whisp + n_nonsp - 1] == '\n')
            {
                assert(n_nonsp > 0);
                got_eol = true;
                n_nonsp--;
            }
            if (n_whisp + n_nonsp == 0)
            {
                o_nonsp = 0;
                break;
            }
    
            if (n_whisp != 0)
            {
                ctrl->part_word = false;
                ctrl->part_len = 0;
            }
    
            /* Add words to list if the list is not already full */
            if (n_nonsp > 0)
            {
                const char *word = line + offset + n_whisp;
                if (ctrl->part_word)
                {
                    assert(ctrl->list.num_words > 0);
                    extend_word(word, n_nonsp,
                                &ctrl->list.words[ctrl->list.num_words - 1]);
                }
                else
                {
                    addword_wordlist(word, n_nonsp, &ctrl->list);
                }
            }
    
            offset += n_whisp + n_nonsp;
            if (line[offset] != '\0')
            {
                ctrl->part_word = false;
                ctrl->part_len = 0;
            }
            o_nonsp = n_nonsp;
            if (got_eol)
                break;
        }
    
        /* Partial word detection */
        if (o_nonsp > 0 && !got_eol)
        {
            ctrl->part_word = true;
            ctrl->part_len += o_nonsp;
        }
        else
        {
            ctrl->part_word = false;
            ctrl->part_len = 0;
        }
    
        /* If seen newline; line complete */
        /* If No newline; line incomplete */
        return !got_eol;
    }
    
    int main(void)
    {
        char line[MAX_LINE_LEN];
        size_t lineno = 0;
    
        while (fgets(line, sizeof(line), stdin) != 0)
        {
            LineControl ctrl;
            init_linectrl(&ctrl);
            lineno++;
            if (debug)
                printf("Line %zu: (%zu) [[%s]]\n", lineno, strlen(line), line);
    
            int extra = 0;
            while (parse_fragment(line, &ctrl) != 0 &&
                   fgets(line, sizeof(line), stdin) != 0)
            {
                if (debug)
                    printf("Extra %d for line %zu: (%zu) [[%s]]\n",
                           ++extra, lineno, strlen(line), line);
            }
    
            WordList *list = &ctrl.list;
            printf("Line %zu: length %zu, words = %zu\n",
                   lineno, ctrl.line_length, list->num_words);
            size_t num_words = list->num_words;
            if (num_words > MAX_WORD_CNT)
                num_words = MAX_WORD_CNT;
            for (size_t i = 0; i < num_words; i++)
            {
                printf("  %zu: (%zu) %s\n",
                       i + 1, list->words[i].length, list->words[i].word);
            }
            putchar('\n');
            free_wordlist(&ctrl.list);
        }
    
        return 0;
    }
    
    其中包含各种选项卡,正如此版本的相同数据所示,其中选项卡显示为
    \t

        a b   
        a b      c         d                                                        
    \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t
    1123xxsdfdsfsfdsfdssa          1234ddfxxyff            frrrdds
    1123dfdffdfdxxxxxxxxxas                        1234ydfyyyzm   knsaaass      1234asdafxxfrrrfrrrsaa    
                   1123werwetrretttrretertre       aaaa     bbbbbb      ccccc        
    k
      \t\t \t \t\t\t \t \t \t\t\t\tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper\t\t\t    \t\t\t\tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper  \t \t \t \t\t\t\t \t \tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper\t\t           \t\t\t\t \t \t \t \t\tapoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper\t\t\t\t\t\t    \t \t \t \t      \t \t \t 
    
    运行此
    awk
    脚本分析数据:

    $ awk '{ printf "%3d %d [%s]\n", length($0) + 1, NF, $0 }' test-data.1
      1 0 []
      5 0 [    ]
     11 2 [    a b   ]
     81 4 [    a b      c         d                                                        ]
     20 0 [                                                     ]
     63 3 [1123xxsdfdsfsfdsfdssa          1234ddfxxyff            frrrdds]
    103 4 [1123dfdffdfdxxxxxxxxxas                        1234ydfyyyzm   knsaaass      1234asdafxxfrrrfrrrsaa    ]
     82 4 [               1123werwetrretttrretertre       aaaa     bbbbbb      ccccc        ]
      2 1 [k]
    494 4 [                                                 apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                              apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                                      apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                      apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper                                           ]
    $
    
    该数据文件上程序的输出为:

    Line 1: length 1, words = 0
    
    Line 2: length 5, words = 0
    
    Line 3: length 11, words = 2
      1: (1) a
      2: (1) b
    
    Line 4: length 81, words = 4
      1: (1) a
      2: (1) b
      3: (1) c
      4: (1) d
    
    Line 5: length 20, words = 0
    
    Line 6: length 63, words = 3
      1: (21) 1123xxsdfdsfsfdsfdssa
      2: (12) 1234ddfxxyff
      3: (7) frrrdds
    
    Line 7: length 103, words = 4
      1: (23) 1123dfdffdfdxxxxxxxxxas
      2: (12) 1234ydfyyyzm
      3: (8) knsaaass
      4: (22) 1234asdafxxfrrrfrrrsaa
    
    Line 8: length 82, words = 4
      1: (25) 1123werwetrretttrretertre
      2: (4) aaaa
      3: (6) bbbbbb
      4: (5) ccccc
    
    Line 9: length 2, words = 1
      1: (1) k
    
    Line 10: length 494, words = 4
      1: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
      2: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
      3: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
      4: (98) apoplectic-catastrophe-mongers-of-the-world-unite-for-you-have-nothing-to-lose-but-your-bad-temper
    
    您可以看到输出中出现的
    awk
    脚本中的数据

    此代码在GitHub上的my(Stack Overflow Questions)存储库中,作为子目录中的文件
    scan59.c
    测试数据.1
    测试数据.2
    测试数据.3
    提供。尤其是
    testdata.3
    文件,它包含一行9955个字符和693个单词,以及其他不太严格的测试行


    代码使用GCC 8.2.0和Valgrind 3.14.0.GIT在运行macOS 10.13.6 High Sierra的Mac上干净地编译和运行。(虽然
    makefile
    规定了C11,但此代码中没有特定于C11的内容;它与C99完全兼容。它还可以使用
    make SFLAGS='-std=C99-pedantic'
    干净地编译)

    您看过
    flex
    bison
    吗?POSIX呢?它用于读取未知最大长度的行。请记住,一次处理一个字符并不需要一次执行一个字节的I/O操作。您可以读取一个大但有限的块,然后在其中循环。即使逐个字符读取,默认情况下,文件输入也会被缓冲,因此不会那么无效。使用
    fgets()
    读取(可能是部分)行;对每行(或线段)进行后期处理,以丢弃空白并计算字数。如果
    fgets()
    返回的内容没有以换行结束,则继续到下一个缓冲区已满并处理当前行的其余部分。如果
    fgets()
    返回的内容以换行结束,则继续下一行处理。您可以考虑使用<代码> StcSPNE()/<代码> > StrutNo()>代码> -在标准C(甚至C89)中较不知名的函数——对“<代码> FGETSH())/代码>返回进行解析。您查看过<代码> Flex 和<代码> BISOS<代码>吗?Posix怎么办?它用于读取未知最大长度的行。请记住,一次处理一个字符并不需要一次执行一个字节的I/O操作。您可以读取一个大但有限的块,然后在其中循环。即使逐字符读取,文件输入也是通过def进行的