C 试着加上撇号_C_Dictionary_Nodes_Trie

C 试着加上撇号

c dictionary

C 试着加上撇号,c,dictionary,nodes,trie,C,Dictionary,Nodes,Trie,我试图用C编写一个trie来读取一个文件，并将文件中的所有单词添加到trie中，效果很好，但我无法让它接受撇号： typedef struct node { bool wordBool; struct node* next[27]; // 26 letters and one space for the apostrophe } node; node* base; int numWords = 0; bool load(const char* dictionary) {

我试图用C编写一个trie来读取一个文件，并将文件中的所有单词添加到trie中，效果很好，但我无法让它接受撇号：

 typedef struct node
{
    bool wordBool;
    struct node* next[27]; // 26 letters and one space for the apostrophe
}
node;

node* base;
int numWords = 0;

bool load(const char* dictionary)
{

FILE* dictionaryf = fopen(dictionary, "r"); // the file to read

base = malloc(sizeof(node));

node variable;
node *currNode = &variable;

int n = 0;

while((n = fgetc(dictionaryf)) != EOF)
{
   if (n == '\n')
   {
      if (!currNode->wordBool)
      {
          currNode->wordBool = true;
          numWords++;
      }
      currNode = base;
   }
   else if (n == 39) //I tried putting this in so it would accept apostrophes
    {
         if(currNode->next[n-39] == NULL)
        {
            currNode->next[n-39] = malloc(sizeof(node));
        } 
        currNode = currNode->next[n-39];
    }
   else {
        if(currNode->next[n-96] == NULL)
        {
            currNode->next[n-96] = malloc(sizeof(node));
        }      
        currNode = currNode->next[n-96];
   }
}
if (currNode!= base && !currNode->wordBool)
{
    currNode->wordBool = true; 
    numWords++;
}
printf("%i\n", numWords);
fclose(dictionaryf);
return true;
}

这是生成trie的代码，但它不会在trie中添加撇号

这段代码紧密基于您的代码，但解决了一个稍有不同的问题，即它接受任意文本文件并处理它，而不考虑其中的字符。在英语使用者的典型风格中，它将重音字符视为“非字母字符”（部分原因是它不使用

setlocale（）

，部分原因是它不处理多字节或宽字符）。它统计每个字出现的次数（在64位机器上，它不会占用数据结构中的额外空间）。它包括一个打印功能，这对于检查它是否正确完成了工作很重要

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node
{
    bool wordBool;
    int  wordCount;
    struct node *next[27]; // 26 letters and one space for the apostrophe
} node;

static const char trie_map[] = "'abcdefghijklmnopqrstuvwxyz";
static node *base = 0;
static int numWords = 0;

static void oom(void)
{
    fprintf(stderr, "Out of memory\n");
    exit(EXIT_FAILURE);
}

static int trie_index(char c)
{
    char *p = strchr(trie_map, tolower(c));
    if (p == 0)
        return -1;
    else
        return (p - trie_map);
}

static
bool load(const char *dictionary)
{
    FILE *dictionaryf = fopen(dictionary, "r"); // the file to read
    if (dictionaryf == 0)
        return false;

    base = calloc(sizeof(node), 1);

    node *currNode = base;

    int n;

    while ((n = fgetc(dictionaryf)) != EOF)
    {
        n = trie_index(n);
        if (n >= 0)
        {
            if (currNode->next[n] == NULL)
            {
                currNode->next[n] = calloc(sizeof(node), 1);
                if (currNode->next[n] == NULL)
                    oom();
            }
            currNode = currNode->next[n];
        }
        else if (currNode != base)
        {
            if (!currNode->wordBool)
            {
                currNode->wordBool = true;
                numWords++;
            }
            currNode->wordCount++;
            currNode = base;
        }
        /* else: consecutive non-letters, non-apostrophes */
    }

    if (currNode != base && !currNode->wordBool)
    {
        currNode->wordBool = true;
        numWords++;
    }
    printf("%i distinct words\n", numWords);
    fclose(dictionaryf);
    return true;
}

static void print_trie(node *trie, char *buffer, size_t buflen)
{
    if (trie != 0)
    {
        if (trie->wordBool)
            printf("Word: %3d [%s]\n", trie->wordCount, buffer);
        size_t len = strlen(buffer);
        if (len >= buflen - 2)
        {
            fprintf(stderr, "Word too long!\n[%s]\n", buffer);
            exit(EXIT_FAILURE);
        }
        for (int i = 0; i < 27; i++)
        {
            if (trie->next[i] != 0)
            {
                buffer[len] = trie_map[i];
                buffer[len+1] = '\0';
                print_trie(trie->next[i], buffer, buflen);
            }
        }
    }
}

int main(int argc, char **argv)
{
    const char *data = "data";
    if (argc == 2)
        data = argv[1];
    if (load(data))
    {
        printf("Loaded file '%s' OK\n", data);
        char buffer[256] = "";
        print_trie(base, buffer, sizeof(buffer));
    }
    else
        printf("Load failed!\n");

    return 0;
}

一些单词包括撇号（有

本身，

'abcdefghijklmnopqrstuvwxyz

和

s'

）。对于包含以下内容的文件：

So she went into the garden
to cut a cabbage-leaf
to make an apple-pie
and at the same time
a great she-bear coming down the street
pops its head into the shop
What no soap
So he died
and she very imprudently married the Barber
and there were present
the Picninnies
and the Joblillies
and the Garyulies
and the great Panjandrum himself
with the little round button at top
and they all fell to playing the game of catch-as-catch-can
till the gunpowder ran out at the heels of their boots

输出为：

66 distinct words
Loaded file 'great.panjandrum' OK
Word:   2 [a]
Word:   1 [all]
Word:   1 [an]
Word:   7 [and]
Word:   1 [apple]
Word:   1 [as]
Word:   3 [at]
Word:   1 [barber]
Word:   1 [bear]
Word:   1 [boots]
Word:   1 [button]
Word:   1 [cabbage]
Word:   1 [can]
Word:   2 [catch]
Word:   1 [coming]
Word:   1 [cut]
Word:   1 [died]
Word:   1 [down]
Word:   1 [fell]
Word:   1 [game]
Word:   1 [garden]
Word:   1 [garyulies]
Word:   2 [great]
Word:   1 [gunpowder]
Word:   1 [he]
Word:   1 [head]
Word:   1 [heels]
Word:   1 [himself]
Word:   1 [imprudently]
Word:   2 [into]
Word:   1 [its]
Word:   1 [joblillies]
Word:   1 [leaf]
Word:   1 [little]
Word:   1 [make]
Word:   1 [married]
Word:   1 [no]
Word:   2 [of]
Word:   1 [out]
Word:   1 [panjandrum]
Word:   1 [picninnies]
Word:   1 [pie]
Word:   1 [playing]
Word:   1 [pops]
Word:   1 [present]
Word:   1 [ran]
Word:   1 [round]
Word:   1 [same]
Word:   3 [she]
Word:   1 [shop]
Word:   2 [so]
Word:   1 [soap]
Word:   1 [street]
Word:  13 [the]
Word:   1 [their]
Word:   1 [there]
Word:   1 [they]
Word:   1 [till]
Word:   1 [time]
Word:   3 [to]
Word:   1 [top]
Word:   1 [very]
Word:   1 [went]
Word:   1 [were]
Word:   1 [what]
Word:   1 [with]

硬编码的数字通常（当然在这种情况下）是一件可怕的事情。幸运的是，该语言允许您使用诸如

'a'

和

'\'\'

之类的东西将字符表示为它们的数字部分。这就是说，看起来你的逻辑是把你的撇号放在0槽，但我担心你缺乏范围检查。。。如果字符既不是“\”，也不是介于“a”和“z”之间（包括“\”），该怎么办？谢谢，我已经解决了您提到的两个问题，我使用的字典文件只包含撇号和字母表中的字符，所以这不应该是个问题，但我已经解决了，因为如果我使用不同的加载文件，可能会导致错误。：）请注意，

malloc

不会清除内存，因此您需要自己清除内存，或者使用

calloc

分配内存。此外，还可以为

base

分配内存，但随后将

currenode

指向

variable

。这不太可能有好的结局。

66 distinct words
Loaded file 'great.panjandrum' OK
Word:   2 [a]
Word:   1 [all]
Word:   1 [an]
Word:   7 [and]
Word:   1 [apple]
Word:   1 [as]
Word:   3 [at]
Word:   1 [barber]
Word:   1 [bear]
Word:   1 [boots]
Word:   1 [button]
Word:   1 [cabbage]
Word:   1 [can]
Word:   2 [catch]
Word:   1 [coming]
Word:   1 [cut]
Word:   1 [died]
Word:   1 [down]
Word:   1 [fell]
Word:   1 [game]
Word:   1 [garden]
Word:   1 [garyulies]
Word:   2 [great]
Word:   1 [gunpowder]
Word:   1 [he]
Word:   1 [head]
Word:   1 [heels]
Word:   1 [himself]
Word:   1 [imprudently]
Word:   2 [into]
Word:   1 [its]
Word:   1 [joblillies]
Word:   1 [leaf]
Word:   1 [little]
Word:   1 [make]
Word:   1 [married]
Word:   1 [no]
Word:   2 [of]
Word:   1 [out]
Word:   1 [panjandrum]
Word:   1 [picninnies]
Word:   1 [pie]
Word:   1 [playing]
Word:   1 [pops]
Word:   1 [present]
Word:   1 [ran]
Word:   1 [round]
Word:   1 [same]
Word:   3 [she]
Word:   1 [shop]
Word:   2 [so]
Word:   1 [soap]
Word:   1 [street]
Word:  13 [the]
Word:   1 [their]
Word:   1 [there]
Word:   1 [they]
Word:   1 [till]
Word:   1 [time]
Word:   3 [to]
Word:   1 [top]
Word:   1 [very]
Word:   1 [went]
Word:   1 [were]
Word:   1 [what]
Word:   1 [with]