C 线性探测哈希表_C - Fatal编程技术网

C 线性探测哈希表

C 线性探测哈希表,c,C,我正在用线性探测编写一个哈希表，但我的程序有一个错误。我的任务是在文本中写出每个单词出现的次数。例如，我的文件包含以下文字： lol lol lol a c d 输出为： lol = 3, a = 1, c = 1, d = 2. （但是d不应该是2！）当_表的大小为10时会发生这种情况。当表的大小为2时，程序就不工作了。真正的结果必须是： lol = 3, a = 1, c = 1, d = 1. 我的代码： #include <stdio.h> #include <s

我正在用线性探测编写一个哈希表，但我的程序有一个错误。我的任务是在文本中写出每个单词出现的次数。例如，我的文件包含以下文字：

lol lol lol a c d

输出为：

lol = 3, a = 1, c = 1, d = 2.

（但是

不应该是2！）当_表的大小为10时会发生这种情况。当表的大小为2时，程序就不工作了。真正的结果必须是：

lol = 3, a = 1, c = 1, d = 1.

我的代码：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#define MAX 10  
#define SIZE_OF_TABLE 10
#define MAX_STRING 256
#define THAT_OCCUP 0

struct HT{
    int amount;
    int occup;//occupancy   
    char string[MAX_STRING];
};

unsigned int long hash(const char *str);
struct HT* init(int size);
struct HT* reHT(struct HT* table,int* size,char* word, int* occup);
struct HT* put(struct HT* table,char* word, int* size,int* occup);
int take(struct HT* table,char* word, int* size);



unsigned int long hash(const char *str) // hash function
{
    int long hash = 5381;
    int c = 0;

    while (c == *str++)
        hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

    return hash;
}

struct HT* init(int size) // create a hash table
{
    struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
    int i = 0;

    if (size < 1)
        return NULL;

    if(NULL == table)
        return  NULL;

    for (i = 0; i < size; ++i)
    {
        table[i].amount = 0;
        table[i].occup = 1;
    }

    return table;
}
struct HT* reHT(struct HT* table,int* size,char* word, int* occup) //rehash
{
    assert(table);
    assert(word);
    assert(size);
    assert(occup);

    table = (struct HT*)calloc(sizeof(struct HT*),(*size)*MAX);
    int i = 0;

    while ( i < (*size)/MAX)
    {   
        table = put(table, table[i].string, size, occup);
        i++;
    }
    return table;
}

struct HT* put(struct HT* table,char* word, int* size,int* occup)
{
    assert(table);
    assert(word);
    assert(size);
    assert(occup);

    int i = 0;

    i = hash(word) % (*size);

    if ((*occup) > ((*size) / 2))
        table = reHT(table, size, word, occup);


    if(1 == table[i].occup ) // if free put it
    {
        strcpy(table[i].string,word);
        table[i].amount++;
        table[i].occup = -1;
        (*occup)++;
    }

    else if (-1 == table[i].occup && strstr(table[i].string,word)) // if place isnt free and it is a similar world  just increase amount
        table[i].amount++;

    else if (-1 == table[i].occup && !strstr(table[i].string,word)) // if place isnt free and  it the words arent similar then use linear probing
    {
        i++;
        while(1)
        {
            i = (i + 1) % (*size);
            if(table[i].occup == 1)
            {
                strcpy(table[i].string,word);
                table[i].amount++;
                table[i].occup = -1;
                (*occup)++;
                break;
            }
            else if ( -1 == table[i].occup && strstr(table[i].string,word))
            {
                table[i].amount++;
                break;
            }
        }

        i = 0; // go to start and do the same thing

        while(1)
        {
            if(1 == table[i].occup)
            {
                strcpy(table[i].string,word);
                table[i].amount++;
                table[i].occup = -1;
                (*occup)++;
                break;
            }

            else if (strstr(table[i].string,word) && table[i].occup == -1)
            {
                table[i].amount++;
                break;
            }

            i++;
        }
    }

    return table;
}


int take(struct HT* table,char* word, int* size) // take amount
{
    assert(size);
    assert(word);
    assert(table);

    int i = 0;

    i = hash(word) % (*size);

    while( i < (*size))
    {
        if(strstr(table[i].string, word))
            return table[i].amount;
        i++;
    }

    i = 0;

    while( i < (*size))
    {
        if(strstr(table[i].string, word))
            return table[i].amount;
        i++;
    }

    return 0;
}

int main(int argc, char *argv[])
{
    FILE* file = fopen("text.txt","r");
    struct HT* table = NULL;
    char string[256] = {0};

    int size = SIZE_OF_TABLE;
    int occup = THAT_OCCUP;

    if (NULL == file)
        return -1;

    table = init(size); //create hash

    if ( NULL == table)
        return - 1;

    while(1 == fscanf(file, "%s", string)) // put words to hash table
    {
        table = put(table,string,&size,&occup);
    }

    printf("HASH_TABLE IS READY!!!!11111\n");
    printf("Enter WORDS!!!1111\n");

    while(1)
    {
        scanf("%s",string);
        if(strstr(string,"END")) // if you want to stop just enter "END"
            break;
        printf(" KOLI4ESTVO!! = %d\n", take(table, string, &size)); 
    }

    free(table);
    return 0;
}

#包括
#包括
#包括
#包括
#定义最大值10
#定义表10的尺寸
#定义最大字符串256
#定义_占用0
结构HT{
整数金额；
int-occup；//占用率
字符字符串[最大字符串]；
};
无符号整数长散列（const char*str）；
结构HT*init（整数大小）；
struct HT*reHT（struct HT*table，int*size，char*word，int*occup）；
结构HT*put（结构HT*table，char*word，int*size，int*occup）；
int take（结构HT*表，字符*字，int*大小）；
无符号整数长哈希（const char*str）//哈希函数
{
int-long散列=5381；
int c=0；
而（c==*str++）
哈希=（（哈希（（*size）/2））
表=reHT（表、大小、字、占用率）；
if（1==表[i].occup）//if释放它
{
strcpy（表[i]。字符串，单词）；
表[i].金额++；
表[i]。占空比=-1；
（*占用）++；
}
else if（-1==table[i].occup&&strstr（table[i].string，word））//如果位置不是空闲的，并且是一个类似的世界，只需增加数量即可
表[i].金额++；
else if（-1==table[i].occup&&！strstr（table[i].string，word））//如果place不可用且单词不相似，则使用线性探测
{
i++；
而(1)
{
i=（i+1）%（*大小）；
if（表[i]。占用率==1）
{
strcpy（表[i]。字符串，单词）；
表[i].金额++；
表[i]。占空比=-1；
（*占用）++；
打破
}
else if（-1==表[i].occup&&strstr（表[i].string，word））
{
表[i].金额++；
打破
}
}
i=0；//转到开始并执行相同的操作
而(1)
{
如果（1==表[i]。占用）
{
strcpy（表[i]。字符串，单词）；
表[i].金额++；
表[i]。占空比=-1；
（*占用）++；
打破
}
else if（strstr（表[i]。字符串，单词）&&table[i]。占用==-1）
{
表[i].金额++；
打破
}
i++；
}
}
返回表；
}
int take（struct HT*table，char*word，int*size）//take amount
{
断言（大小）；
断言（词）；
断言（表）；
int i=0；
i=散列（字）%（*大小）；
而（i<（*尺寸））
{
if（strstr（表[i].字符串，字））
返回表[i]。金额；
i++；
}
i=0；
而（i<（*尺寸））
{
if（strstr（表[i].字符串，字））
返回表[i]。金额；
i++；
}
返回0；
}
int main（int argc，char*argv[]）
{
FILE*FILE=fopen（“text.txt”、“r”）；
struct HT*table=NULL；
字符字符串[256]={0}；
int size=表的大小；
int-occup=该_-occup；
if（NULL==文件）
返回-1；
table=init（size）；//创建哈希
if（NULL==表）
返回-1；
while（1==fscanf（文件“%s”，字符串））//将单词放入哈希表
{
table=put（表、字符串、大小和占用）；
}
printf（“哈希表准备就绪！！！！11111\n”）；
printf（“输入单词！！！1111\n”）；
而(1)
{
scanf（“%s”，字符串）；
if（strstr（string，“END”）//如果要停止，只需输入“END”
打破
printf（“KOLI4ESTVO！！=%d\n”，take（表、字符串和大小））；
}
免费（餐桌）；
返回0；
}

当我第一次编译您的代码时，我只收到两个警告-即

argc

和

argv

到

main（）

未使用。这做得很好-很少有程序能如此干净地编译

我创建了一个文件

text.txt

，其中包含：

lol
abracadabra
a
a
d
d
d
a
d
c

当我运行该程序时，我得到了以下信息：

这很奇怪；我没有为任何一个单词创建那么多条目。在下运行时，会出现很多问题：

==98849== Invalid write of size 4
==98849==    at 0x100001173: init (ht.c:49)
==98849==    by 0x1000019A9: main (ht.c:188)
==98849==  Address 0x10080b588 is 120 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 4
==98849==    at 0x100001180: init (ht.c:50)
==98849==    by 0x1000019A9: main (ht.c:188)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
Adding [lol]
==98849== Invalid read of size 4
==98849==    at 0x1000015C9: put (ht.c:89)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 1
==98849==    at 0x1003FE3A0: _platform_memmove$VARIANT$Nehalem (in /usr/lib/system/libsystem_platform.dylib)
==98849==    by 0x1001B4113: strcpy (in /usr/lib/system/libsystem_c.dylib)
==98849==    by 0x10000175A: put (ht.c:91)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b590 is 128 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 4
==98849==    at 0x10000175B: put (ht.c:93)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
…and a whole lot more in a similar vein…

快速查看

init（）

可以发现一些问题：

struct HT* init(int size) // create a hash table
{
    struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
    int i = 0;

    if (size < 1)
        return NULL;

    if(NULL == table)
        return  NULL;

    for (i = 0; i < size; ++i)
    {
        table[i].amount = 0;
        table[i].occup = 1;
    }

这将分配一个结构数组，而不是指针数组

正在修复删除

valgrind

错误的问题。输出仍然不正确，但：

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [a]
Adding [d]
Adding [d]
Adding [d]
Adding [a]
Adding [d]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 9
lol
 KOLI4ESTVO!! [lol] = 1
abracadabra
 KOLI4ESTVO!! [abracadabra] = 9
b
 KOLI4ESTVO!! [b] = 9
c
 KOLI4ESTVO!! [c] = 9
d
 KOLI4ESTVO!! [d] = 9
e
 KOLI4ESTVO!! [e] = 0
antimony
 KOLI4ESTVO!! [antimony] = 0
$

我想我已经确定了9不是巧合，因为数据文件中有10行。当我将数据减少到6行时，仅重复

，输出为：

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [d]
Adding [a]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 5
d
 KOLI4ESTVO!! [d] = 5
c
 KOLI4ESTVO!! [c] = 5
abracadabra
 KOLI4ESTVO!! [abracadabra] = 5
lol
 KOLI4ESTVO!! [lol] = 1
$

我还尝试了大量重复的行（15行），程序崩溃了。我认为命理学应该给你一些提示。我不会惊讶地发现重建哈希表的代码与

init（）

中的代码有类似的大小错误

我认为您需要自己创建一个函数，该函数可以打印哈希表的详细信息和内容，以便您可以使用该函数查看正在创建的内容。

？

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [a]
Adding [d]
Adding [d]
Adding [d]
Adding [a]
Adding [d]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 9
lol
 KOLI4ESTVO!! [lol] = 1
abracadabra
 KOLI4ESTVO!! [abracadabra] = 9
b
 KOLI4ESTVO!! [b] = 9
c
 KOLI4ESTVO!! [c] = 9
d
 KOLI4ESTVO!! [d] = 9
e
 KOLI4ESTVO!! [e] = 0
antimony
 KOLI4ESTVO!! [antimony] = 0
$

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [d]
Adding [a]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
 KOLI4ESTVO!! [a] = 5
d
 KOLI4ESTVO!! [d] = 5
c
 KOLI4ESTVO!! [c] = 5
abracadabra
 KOLI4ESTVO!! [abracadabra] = 5
lol
 KOLI4ESTVO!! [lol] = 1
$