C 线性探测哈希表

C 线性探测哈希表,c,C,我正在用线性探测编写一个哈希表,但我的程序有一个错误。我的任务是在文本中写出每个单词出现的次数。例如,我的文件包含以下文字: lol lol lol a c d 输出为: lol = 3, a = 1, c = 1, d = 2. (但是d不应该是2!)当_表的大小为10时会发生这种情况。当表的大小为2时,程序就不工作了。真正的结果必须是: lol = 3, a = 1, c = 1, d = 1. 我的代码: #include <stdio.h> #include <s

我正在用线性探测编写一个哈希表,但我的程序有一个错误。我的任务是在文本中写出每个单词出现的次数。例如,我的文件包含以下文字:

lol lol lol a c d
输出为:

lol = 3, a = 1, c = 1, d = 2.
(但是
d
不应该是2!)当_表的大小为10时会发生这种情况。当表的大小为2时,程序就不工作了。真正的结果必须是:

lol = 3, a = 1, c = 1, d = 1.
我的代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#define MAX 10  
#define SIZE_OF_TABLE 10
#define MAX_STRING 256
#define THAT_OCCUP 0

struct HT{
    int amount;
    int occup;//occupancy   
    char string[MAX_STRING];
};

unsigned int long hash(const char *str);
struct HT* init(int size);
struct HT* reHT(struct HT* table,int* size,char* word, int* occup);
struct HT* put(struct HT* table,char* word, int* size,int* occup);
int take(struct HT* table,char* word, int* size);



unsigned int long hash(const char *str) // hash function
{
    int long hash = 5381;
    int c = 0;

    while (c == *str++)
        hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

    return hash;
}

struct HT* init(int size) // create a hash table
{
    struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
    int i = 0;

    if (size < 1)
        return NULL;

    if(NULL == table)
        return  NULL;

    for (i = 0; i < size; ++i)
    {
        table[i].amount = 0;
        table[i].occup = 1;
    }

    return table;
}
struct HT* reHT(struct HT* table,int* size,char* word, int* occup) //rehash
{
    assert(table);
    assert(word);
    assert(size);
    assert(occup);

    table = (struct HT*)calloc(sizeof(struct HT*),(*size)*MAX);
    int i = 0;

    while ( i < (*size)/MAX)
    {   
        table = put(table, table[i].string, size, occup);
        i++;
    }
    return table;
}

struct HT* put(struct HT* table,char* word, int* size,int* occup)
{
    assert(table);
    assert(word);
    assert(size);
    assert(occup);

    int i = 0;

    i = hash(word) % (*size);

    if ((*occup) > ((*size) / 2))
        table = reHT(table, size, word, occup);


    if(1 == table[i].occup ) // if free put it
    {
        strcpy(table[i].string,word);
        table[i].amount++;
        table[i].occup = -1;
        (*occup)++;
    }

    else if (-1 == table[i].occup && strstr(table[i].string,word)) // if place isnt free and it is a similar world  just increase amount
        table[i].amount++;

    else if (-1 == table[i].occup && !strstr(table[i].string,word)) // if place isnt free and  it the words arent similar then use linear probing
    {
        i++;
        while(1)
        {
            i = (i + 1) % (*size);
            if(table[i].occup == 1)
            {
                strcpy(table[i].string,word);
                table[i].amount++;
                table[i].occup = -1;
                (*occup)++;
                break;
            }
            else if ( -1 == table[i].occup && strstr(table[i].string,word))
            {
                table[i].amount++;
                break;
            }
        }

        i = 0; // go to start and do the same thing

        while(1)
        {
            if(1 == table[i].occup)
            {
                strcpy(table[i].string,word);
                table[i].amount++;
                table[i].occup = -1;
                (*occup)++;
                break;
            }

            else if (strstr(table[i].string,word) && table[i].occup == -1)
            {
                table[i].amount++;
                break;
            }

            i++;
        }
    }

    return table;
}


int take(struct HT* table,char* word, int* size) // take amount
{
    assert(size);
    assert(word);
    assert(table);

    int i = 0;

    i = hash(word) % (*size);

    while( i < (*size))
    {
        if(strstr(table[i].string, word))
            return table[i].amount;
        i++;
    }

    i = 0;

    while( i < (*size))
    {
        if(strstr(table[i].string, word))
            return table[i].amount;
        i++;
    }

    return 0;
}

int main(int argc, char *argv[])
{
    FILE* file = fopen("text.txt","r");
    struct HT* table = NULL;
    char string[256] = {0};

    int size = SIZE_OF_TABLE;
    int occup = THAT_OCCUP;

    if (NULL == file)
        return -1;

    table = init(size); //create hash

    if ( NULL == table)
        return - 1;

    while(1 == fscanf(file, "%s", string)) // put words to hash table
    {
        table = put(table,string,&size,&occup);
    }

    printf("HASH_TABLE IS READY!!!!11111\n");
    printf("Enter WORDS!!!1111\n");

    while(1)
    {
        scanf("%s",string);
        if(strstr(string,"END")) // if you want to stop just enter "END"
            break;
        printf(" KOLI4ESTVO!! = %d\n", take(table, string, &size)); 
    }

    free(table);
    return 0;
}
#包括
#包括
#包括
#包括
#定义最大值10
#定义表10的尺寸
#定义最大字符串256
#定义_占用0
结构HT{
整数金额;
int-occup;//占用率
字符字符串[最大字符串];
};
无符号整数长散列(const char*str);
结构HT*init(整数大小);
struct HT*reHT(struct HT*table,int*size,char*word,int*occup);
结构HT*put(结构HT*table,char*word,int*size,int*occup);
int take(结构HT*表,字符*字,int*大小);
无符号整数长哈希(const char*str)//哈希函数
{
int-long散列=5381;
int c=0;
而(c==*str++)
哈希=((哈希((*size)/2))
表=reHT(表、大小、字、占用率);
if(1==表[i].occup)//if释放它
{
strcpy(表[i]。字符串,单词);
表[i].金额++;
表[i]。占空比=-1;
(*占用)++;
}
else if(-1==table[i].occup&&strstr(table[i].string,word))//如果位置不是空闲的,并且是一个类似的世界,只需增加数量即可
表[i].金额++;
else if(-1==table[i].occup&&!strstr(table[i].string,word))//如果place不可用且单词不相似,则使用线性探测
{
i++;
而(1)
{
i=(i+1)%(*大小);
if(表[i]。占用率==1)
{
strcpy(表[i]。字符串,单词);
表[i].金额++;
表[i]。占空比=-1;
(*占用)++;
打破
}
else if(-1==表[i].occup&&strstr(表[i].string,word))
{
表[i].金额++;
打破
}
}
i=0;//转到开始并执行相同的操作
而(1)
{
如果(1==表[i]。占用)
{
strcpy(表[i]。字符串,单词);
表[i].金额++;
表[i]。占空比=-1;
(*占用)++;
打破
}
else if(strstr(表[i]。字符串,单词)&&table[i]。占用==-1)
{
表[i].金额++;
打破
}
i++;
}
}
返回表;
}
int take(struct HT*table,char*word,int*size)//take amount
{
断言(大小);
断言(词);
断言(表);
int i=0;
i=散列(字)%(*大小);
而(i<(*尺寸))
{
if(strstr(表[i].字符串,字))
返回表[i]。金额;
i++;
}
i=0;
而(i<(*尺寸))
{
if(strstr(表[i].字符串,字))
返回表[i]。金额;
i++;
}
返回0;
}
int main(int argc,char*argv[])
{
FILE*FILE=fopen(“text.txt”、“r”);
struct HT*table=NULL;
字符字符串[256]={0};
int size=表的大小;
int-occup=该_-occup;
if(NULL==文件)
返回-1;
table=init(size);//创建哈希
if(NULL==表)
返回-1;
while(1==fscanf(文件“%s”,字符串))//将单词放入哈希表
{
table=put(表、字符串、大小和占用);
}
printf(“哈希表准备就绪!!!!11111\n”);
printf(“输入单词!!!1111\n”);
而(1)
{
scanf(“%s”,字符串);
if(strstr(string,“END”)//如果要停止,只需输入“END”
打破
printf(“KOLI4ESTVO!!=%d\n”,take(表、字符串和大小));
}
免费(餐桌);
返回0;
}

当我第一次编译您的代码时,我只收到两个警告-即
argc
argv
main()
未使用。这做得很好-很少有程序能如此干净地编译

我创建了一个文件
text.txt
,其中包含:

lol
abracadabra
a
a
d
d
d
a
d
c
当我运行该程序时,我得到了以下信息:

这很奇怪;我没有为任何一个单词创建那么多条目。在下运行时,会出现很多问题:

==98849== Invalid write of size 4
==98849==    at 0x100001173: init (ht.c:49)
==98849==    by 0x1000019A9: main (ht.c:188)
==98849==  Address 0x10080b588 is 120 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 4
==98849==    at 0x100001180: init (ht.c:50)
==98849==    by 0x1000019A9: main (ht.c:188)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
Adding [lol]
==98849== Invalid read of size 4
==98849==    at 0x1000015C9: put (ht.c:89)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 1
==98849==    at 0x1003FE3A0: _platform_memmove$VARIANT$Nehalem (in /usr/lib/system/libsystem_platform.dylib)
==98849==    by 0x1001B4113: strcpy (in /usr/lib/system/libsystem_c.dylib)
==98849==    by 0x10000175A: put (ht.c:91)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b590 is 128 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 4
==98849==    at 0x10000175B: put (ht.c:93)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
…and a whole lot more in a similar vein…
快速查看
init()
可以发现一些问题:

struct HT* init(int size) // create a hash table
{
    struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
    int i = 0;

    if (size < 1)
        return NULL;

    if(NULL == table)
        return  NULL;

    for (i = 0; i < size; ++i)
    {
        table[i].amount = 0;
        table[i].occup = 1;
    }
这将分配一个结构数组,而不是指针数组

正在修复删除
valgrind
错误的问题。输出仍然不正确,但:

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [a]
Adding [d]
Adding [d]
Adding [d]
Adding [a]
Adding [d]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 9
lol
 KOLI4ESTVO!! [lol] = 1
abracadabra
 KOLI4ESTVO!! [abracadabra] = 9
b
 KOLI4ESTVO!! [b] = 9
c
 KOLI4ESTVO!! [c] = 9
d
 KOLI4ESTVO!! [d] = 9
e
 KOLI4ESTVO!! [e] = 0
antimony
 KOLI4ESTVO!! [antimony] = 0
$
我想我已经确定了9不是巧合,因为数据文件中有10行。当我将数据减少到6行时,仅重复
a
,输出为:

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [d]
Adding [a]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 5
d
 KOLI4ESTVO!! [d] = 5
c
 KOLI4ESTVO!! [c] = 5
abracadabra
 KOLI4ESTVO!! [abracadabra] = 5
lol
 KOLI4ESTVO!! [lol] = 1
$
我还尝试了大量重复的行(15行),程序崩溃了。我认为命理学应该给你一些提示。我不会惊讶地发现重建哈希表的代码与
init()
中的代码有类似的大小错误

我认为您需要自己创建一个函数,该函数可以打印哈希表的详细信息和内容,以便您可以使用该函数查看正在创建的内容。

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [a]
Adding [d]
Adding [d]
Adding [d]
Adding [a]
Adding [d]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 9
lol
 KOLI4ESTVO!! [lol] = 1
abracadabra
 KOLI4ESTVO!! [abracadabra] = 9
b
 KOLI4ESTVO!! [b] = 9
c
 KOLI4ESTVO!! [c] = 9
d
 KOLI4ESTVO!! [d] = 9
e
 KOLI4ESTVO!! [e] = 0
antimony
 KOLI4ESTVO!! [antimony] = 0
$
$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [d]
Adding [a]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 5
d
 KOLI4ESTVO!! [d] = 5
c
 KOLI4ESTVO!! [c] = 5
abracadabra
 KOLI4ESTVO!! [abracadabra] = 5
lol
 KOLI4ESTVO!! [lol] = 1
$