缓冲区=1;打破 大小写逗号: 如果(!parser.id_set){errno=EILSEQ;goto-catch;} if(parser.first_comma){parser.first_comma=0;break;} 如果(!store_word(&parser))转到catch; 打破 案例换行: if(parser.id\u集){ /*我们至少需要关键数据*/ 如果(!store_word(&parser))转到catch; 打印语法分析器(&语法分析器); 清除语法分析器(&语法分析器); }else if(解析器.启动\u单词){ errno=EILSEQ;转到捕捉; } 打破 案件逃逸: 如果(!parser.id_set){errno=EILSEQ;goto-catch;} 展开单词(解析器和词法分析器); 打破 案例WSP:中断; 案件编号: if(parser.id\u集){ 展开单词(解析器和词法分析器); }否则{ 字符*结束; 长i=strtol(lexer.from和end,0); if(end!=lexer.cursor | | iINT_MAX) {errno=EDOM;goto-catch;} parser.id=(int)i; parser.id_set=1; } 打破 格词: 展开单词(解析器和词法分析器); 打破 } }而(!结束\u缓冲区的\u); 成功=退出成功; 后藤最后; 捕获: fprintf(stderr,“在第%lu行上时。\n”,(无符号长)lexer.line); perror(“解析”); 断言(!lexer.from | |)(lexer.from=lexer.cursor); 如果(lexer.from)fprintf(stderr,“在%.*s上时”。\n”, (int)(lexer.cursor-lexer.from)、lexer.from); 最后: 免费(基本数据); if(fp)fclose(fp); 回归成功; }

缓冲区=1;打破 大小写逗号: 如果(!parser.id_set){errno=EILSEQ;goto-catch;} if(parser.first_comma){parser.first_comma=0;break;} 如果(!store_word(&parser))转到catch; 打破 案例换行: if(parser.id\u集){ /*我们至少需要关键数据*/ 如果(!store_word(&parser))转到catch; 打印语法分析器(&语法分析器); 清除语法分析器(&语法分析器); }else if(解析器.启动\u单词){ errno=EILSEQ;转到捕捉; } 打破 案件逃逸: 如果(!parser.id_set){errno=EILSEQ;goto-catch;} 展开单词(解析器和词法分析器); 打破 案例WSP:中断; 案件编号: if(parser.id\u集){ 展开单词(解析器和词法分析器); }否则{ 字符*结束; 长i=strtol(lexer.from和end,0); if(end!=lexer.cursor | | iINT_MAX) {errno=EDOM;goto-catch;} parser.id=(int)i; parser.id_set=1; } 打破 格词: 展开单词(解析器和词法分析器); 打破 } }而(!结束\u缓冲区的\u); 成功=退出成功; 后藤最后; 捕获: fprintf(stderr,“在第%lu行上时。\n”,(无符号长)lexer.line); perror(“解析”); 断言(!lexer.from | |)(lexer.from=lexer.cursor); 如果(lexer.from)fprintf(stderr,“在%.*s上时”。\n”, (int)(lexer.cursor-lexer.from)、lexer.from); 最后: 免费(基本数据); if(fp)fclose(fp); 回归成功; },c,string,char,int,binary-search-tree,C,String,Char,Int,Binary Search Tree,印刷品 #2409: <blah>, <blah>, <blah> #16: <foo>, <bar>, <baz>, <qux> #8: <a\ a>, <b b\#\\\,still b>, <c> #2409:, #16: , , #8: , 但这可能太过分了。如果你想解析这样的文件 2409, blah, blah, blah 0x10,foo,

印刷品

#2409: <blah>, <blah>, <blah>
#16: <foo>, <bar>, <baz>, <qux>
#8: <a\
a>, <b  b\#\\\,still b>, <c>
#2409:,
#16: , , 
#8: , 

但这可能太过分了。

如果你想解析这样的文件

2409, blah, blah, blah
   0x10,foo,    bar,    baz, qux
# This is more difficult.
    010   , a\
a,   b  b\#\\\,still b,c
一个可能是更好地使用解析器生成器,如或我最喜欢的

#包括
#包括
#包括
#包括
#包括
/*代币*/
#定义参数(A)A
#定义架线(A)#A
#定义标记(X)X(错误)、X(结束)、X(逗号)、X(换行符)\
X(转义)、X(WSP)、X(数字)、X(字)
枚举令牌{TOKENS(PARAM)};
静态常量字符*常量标记[]={tokens(STRINGISE)};
结构Lexer{size\u t line;char*标记,*from,*cursor;};
静态枚举令牌lex(结构Lexer*Lexer){
断言(lexer);
/*!re2c
re2c:yyfill:enable=0;
re2c:define:YYCTYPE=char;
re2c:定义:YYCURSOR=lexer->cursor;
re2c:define:YYMARKER=lexer->marker;//规则重叠。
换行符=“\n”|(“\r”“\n”“);
oct=“0”[0-7]*;
十二月=[1-9][0-9]*;
十六进制='0x'[0-9a-fA-F]+;
num=十月|十二月|十六进制;
word=[^\x00\\\n\r\t\v\f,0-9]+;
comment=“#”[^\x00\n\r]*换行符;
*/
扫描:
lexer->from=lexer->cursor;
/*!re2c
*{返回错误;}
“\x00”{返回结束;}
[\t\v\f]+{return WSP;}
换行符{lexer->line++;返回换行符;}
“\\\n”|注释{lexer->line++;转到扫描;}
“\\\”、“\\\”、“\\\”、“\\\”、“\\n”\\\“\\\”{return ESCAPE;}
“,”{返回逗号;}
单词{返回单词;}
num{返回编号;}
*/
}
结构缓冲区{
字符*数据;
大小、容量;
};
静态字符*缓冲区\u保留(结构缓冲区*常量缓冲区,常量大小\u保留){
const size\u t min=buf->size+reserve;
尺寸c=buf->容量;
字符*数据;
断言(buf);
如果(储量)>(储量)-1-buf->size | min>((储量)-1>>1)+1)
{errno=ERANGE;返回0;}
如果(最小值>c){
如果(!c)c=1;
而(最小容量=c;
}
返回基本单位->数据+基本单位->大小;
}
结构字{char*start,*end;};
结构分析器{
int id,id\u set,第一个逗号;
字数;
struct words[64];/*Lazy*/
字符*开始单词,*结束单词;
};
静态大小\u t解析器\u max\u words=sizeof((struct parser*)0)->words
/sizeof*((struct Parser*)0)->单词;
静态无效清除语法分析器(结构语法分析器*常量语法分析器){
断言(解析器);
解析器->id\u set=0;
解析器->第一个逗号=1;
解析器->num_words=0;
语法分析器->开始单词=语法分析器->结束单词=0;
}
静态void打印解析器(常量结构解析器*常量解析器){
const struct Word*Word=parser->words,
*word\u end=parser->words+parser->num\u words;
断言(解析器和解析器->id\u集合和解析器->num\u单词id);
for(;wordwords)printf(“,”);
如果(!word->start){printf(“”);continue;}
断言(word->start-end);
如果(word->start==word->end){printf(“”);continue;}
printf(“,(int)(word->end-word->start),word->start);
}
fputc('\n',stdout);
}
静态void expand_word(结构解析器*常量解析器,
常量结构Lexer*常量Lexer){
断言(解析器和词法分析器和词法分析器->来自cursor);
如果(!解析器->开始\u单词){
断言(!解析器->结束单词);
语法分析器->开始单词=lexer->from;
}
解析器->结束单词=(词法->来自+INT\u MAX>=词法->光标)?
lexer->cursor:lexer->from+INT\u MAX;
}
静态int-store_-word(结构解析器*常量解析器){
结构词*Word;
断言(解析器);
如果(parser->num\u words>=parser\u max\u words)返回errno=EILSEQ,0;
word=parser->words+parser->num_words++;
word->start=parser->start\u单词;
单词->结束=解析器->结束单词;
语法分析器->开始单词=语法分析器->结束单词=0;
返回1;
}
int main(int argc,字符**argv){
常量大小\u t粒度=1024;
结构Lexer Lexer={1,0,0,0};
结构分析器;
尺寸(单位:nread);
结构缓冲区buf={0,0,0};
char*b;
文件*fp=0;
int success=0,缓冲区的结尾=0;
/*打开*/
如果(argc!=2)返回fprintf(stderr,“需要文件名”。\n”),则退出\u失败;
如果(!(fp=fopen(argv[1],“r”))转到捕获;
/*阅读*/
做{
如果(!(b=缓冲区\保留区(&buf,粒度)),则转到捕获;
nread=fread(b,1,粒度,fp);
大小
2409, blah, blah, blah
   0x10,foo,    bar,    baz, qux
# This is more difficult.
    010   , a\
a,   b  b\#\\\,still b,c
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <limits.h>
#include <assert.h>

/* Tokens. */
#define PARAM(A) A
#define STRINGISE(A) #A
#define TOKENS(X) X(ERROR), X(END), X(COMMA), X(NEWLINE), \
    X(ESCAPE), X(WSP), X(NUMBER), X(WORD)
enum Token { TOKENS(PARAM) };
static const char *const tokens[] = { TOKENS(STRINGISE) };

struct Lexer { size_t line; char *marker, *from, *cursor; };

static enum Token lex(struct Lexer *lexer) {
    assert(lexer);
/*!re2c
    re2c:yyfill:enable   = 0;
    re2c:define:YYCTYPE  = char;
    re2c:define:YYCURSOR = lexer->cursor;
    re2c:define:YYMARKER = lexer->marker; // Rules overlap.

    newline = "\n" | ("\r" "\n"?);
    oct = "0" [0-7]*;
    dec = [1-9][0-9]*;
    hex = '0x' [0-9a-fA-F]+;
    num = oct | dec | hex;
    word = [^\x00\\\n\r \t\v\f,0-9]+;
    comment = "#" [^\x00\n\r]* newline;
*/
scan:
    lexer->from = lexer->cursor;
/*!re2c
    * { return ERROR; }
    "\x00" { return END; }
    [ \t\v\f]+ { return WSP; }
    newline { lexer->line++; return NEWLINE; }
    "\\\n" | comment { lexer->line++; goto scan; }
    "\\\\" | "\\," | "\\ " | "\\n" | "\\#" { return ESCAPE; }
    "," { return COMMA; }
    word { return WORD; }
    num { return NUMBER; }
*/
}

struct Buffer {
    char *data;
    size_t size, capacity;
};

static char *buffer_reserve(struct Buffer *const buf, const size_t reserve) {
    const size_t min = buf->size + reserve;
    size_t c = buf->capacity;
    char *data;
    assert(buf);
    if(reserve > (size_t)-1 - buf->size || min > ((size_t)-1 >> 1) + 1)
        { errno = ERANGE; return 0; }
    if(min > c) {
        if(!c) c = 1;
        while(min <= c) c <<= 1;
        if(!(data = realloc(buf->data, c))) return 0;
        buf->data = data;
        buf->capacity = c;
    }
    return buf->data + buf->size;
}

struct Word { char *start, *end; };

struct Parser {
    int id, id_set, first_comma;
    size_t num_words;
    struct Word words[64]; /* Lazy. */
    char *start_words, *end_words;
};
static size_t parser_max_words = sizeof ((struct Parser *)0)->words
    / sizeof *((struct Parser *)0)->words;

static void clear_parser(struct Parser *const parser) {
    assert(parser);
    parser->id_set = 0;
    parser->first_comma = 1;
    parser->num_words = 0;
    parser->start_words = parser->end_words = 0;
}
static void print_parser(const struct Parser *const parser) {
    const struct Word *word = parser->words,
        *word_end = parser->words + parser->num_words;
    assert(parser && parser->id_set && parser->num_words <= parser_max_words);
    printf("#%d: ", parser->id);
    for( ; word < word_end; word++) {
        if(word != parser->words) printf(", ");
        if(!word->start) { printf("<null>"); continue; }
        assert(word->start <= word->end);
        if(word->start == word->end) { printf("<empty>"); continue; }
        printf("<%.*s>", (int)(word->end - word->start), word->start);
    }
    fputc('\n', stdout);
}
static void expand_word(struct Parser *const parser,
    const struct Lexer *const lexer) {
    assert(parser && lexer && lexer->from < lexer->cursor);
    if(!parser->start_words) {
        assert(!parser->end_words);
        parser->start_words = lexer->from;
    }
    parser->end_words = (lexer->from + INT_MAX >= lexer->cursor) ?
        lexer->cursor : lexer->from + INT_MAX;
}
static int store_word(struct Parser *const parser) {
    struct Word *word;
    assert(parser);
    if(parser->num_words >= parser_max_words) return errno = EILSEQ, 0;
    word = parser->words + parser->num_words++;
    word->start = parser->start_words;
    word->end = parser->end_words;
    parser->start_words = parser->end_words = 0;
    return 1;
}

int main(int argc, char **argv) {
    const size_t granularity = 1024;
    struct Lexer lexer = { 1, 0, 0, 0 };
    struct Parser parser;
    size_t nread;
    struct Buffer buf = { 0, 0, 0 };
    char *b;
    FILE *fp = 0;
    int success = 0, end_of_buffer = 0;

    /* Open. */
    if(argc != 2) return fprintf(stderr, "Needs filename.\n"), EXIT_FAILURE;
    if(!(fp = fopen(argv[1], "r"))) goto catch;

    /* Read. */
    do {
        if(!(b = buffer_reserve(&buf, granularity))) goto catch;
        nread = fread(b, 1, granularity, fp);
        buf.size += nread;
    } while(nread == granularity);
    if(ferror(fp)) goto catch;
    fclose(fp), fp = 0;
    if(!(b = buffer_reserve(&buf, 1))) goto catch;
    *b = '\0'; /* Make sure it's a string. */

    /* Parse. */
    lexer.cursor = buf.data;
    clear_parser(&parser);
    do {
        enum Token tok;
        switch((tok = lex(&lexer))) {
        case ERROR: goto catch;
        case END: end_of_buffer = 1; break;
        case COMMA:
            if(!parser.id_set) { errno = EILSEQ; goto catch; }
            if(parser.first_comma) { parser.first_comma = 0; break; }
            if(!store_word(&parser)) goto catch;
            break;
        case NEWLINE:
            if(parser.id_set) {
                /* We require at least key, data. */
                if(!store_word(&parser)) goto catch;
                print_parser(&parser);
                clear_parser(&parser);
            } else if(parser.start_words) {
                errno = EILSEQ; goto catch;
            }
            break;
        case ESCAPE:
            if(!parser.id_set) { errno = EILSEQ; goto catch; }
            expand_word(&parser, &lexer);
            break;
        case WSP: break;
        case NUMBER:
            if(parser.id_set) {
                expand_word(&parser, &lexer);
            } else {
                char *end;
                long i = strtol(lexer.from, &end, 0);
                if(end != lexer.cursor || i < INT_MIN || i > INT_MAX)
                    { errno = EDOM; goto catch; }
                parser.id = (int)i;
                parser.id_set = 1;
            }
            break;
        case WORD:
            expand_word(&parser, &lexer);
            break;
        }
    } while(!end_of_buffer);
    success = EXIT_SUCCESS;
    goto finally;
catch:
    fprintf(stderr, "While on line %lu.\n", (unsigned long)lexer.line);
    perror("parsing");
    assert(!lexer.from || (lexer.from < lexer.cursor
        && lexer.from + INT_MAX >= lexer.cursor));
    if(lexer.from) fprintf(stderr, "While on %.*s.\n",
        (int)(lexer.cursor - lexer.from), lexer.from);
finally:
    free(buf.data);
    if(fp) fclose(fp);
    return success;
}
#2409: <blah>, <blah>, <blah>
#16: <foo>, <bar>, <baz>, <qux>
#8: <a\
a>, <b  b\#\\\,still b>, <c>