Data structures 双向哈希表_Data Structures_Hash_Hashmap_Hashtable

Data structures 双向哈希表

data-structures hash

Data structures 双向哈希表,data-structures,hash,hashmap,hashtable,Data Structures,Hash,Hashmap,Hashtable,对于某些编译语言，是否有一个高效的哈希表实现，它将键（整数）映射到值（字符串），反之亦然当然，一个表可能总是有两个表，一个用于key=>value映射，另一个用于value=>key。然而，这不是很有效，至少在内存方面不是这样。如果类型系统和预期用途允许，两个映射可能都可以在一个表中。其中一个名称是BiMap（如双向映射）。明显的限制是键是不同的（就像在普通字典/映射中一样），但值也是不同的对于Java，有一个简单的定义，但一般的建议是 < C和C++，在内部，您提到的“低效”实现保留了两

对于某些编译语言，是否有一个高效的哈希表实现，它将键（整数）映射到值（字符串），反之亦然

当然，一个表可能总是有两个表，一个用于key=>value映射，另一个用于value=>key。然而，这不是很有效，至少在内存方面不是这样。如果类型系统和预期用途允许，两个映射可能都可以在一个表中。

其中一个名称是BiMap（如双向映射）。明显的限制是键是不同的（就像在普通字典/映射中一样），但值也是不同的

对于Java，有一个简单的定义，但一般的建议是

< C和C++，

在内部，您提到的“低效”实现保留了两个哈希表。事情是这样的：它是高效的，二级查找结构需要使用两倍的内存，很少有什么大不了的。

这是我用于bihash的数据结构：开销是每个条目四个整数（对于索引）

在下面的示例中，使用

typedef unsigned char Index，开销为四个字符，最大表容量为255

/*出于演示目的，这些类型非常小。
**通常使用无符号short或无符号int。
*/
typedef无符号字符索引；
typedef无符号短Hashval；
/*使用最大可表示值作为哨兵值*/
#定义零（（索引）-1）
结构条目{
索引头\ U键/*链指针的头*/
，head_val；/*…和值*/
索引下一个_key/*链接列表以链接键*/
，next_val；/*…和值*/
};
结构表{
未签字的托特伦、凯伦；
/*自由点指向自由树的根*/
索引大小，自由；
/*键和值的完整有效负载。
*布局=[key0 | val0 | key1 | val1 |……]（无填充/对齐）
*/
字符*数据；
struct entry*entries；/*所有条目。不是指针，而是实际条目*/
};
/*用于访问有效负载池的宏*/
#定义节点_键（p，n）（（p）->data+（n）*（p）->totlen）
#定义节点（p，n）（（p）->data+（n）*（（p）->totlen+（p）->keylen））
#定义THU OK 0
#定义未找到的密钥1
#定义未找到的值2
#定义两个未找到的项目3
#定义表格完整4
#定义第5个键
#定义THU VAL_副本6
#定义两个重复项7
#定义全食8
/********************************************/
/*分配并初始化哈希表。
**注：给定固定大小，表格和有效载荷可以静态分配，
**（但我们仍然需要进行初始化）
*/
struct table*table_new（无符号keylen、无符号vallen、无符号totcount）
{
指数idx；
结构表*this；
如果（总计数>零）{
fprintf（标准，“新表（%zu，%zu，%zu）：总计数（%zu）大于最大索引（%zu）\n”
，（尺寸）凯伦，（尺寸）瓦伦，（尺寸）总计
，（size_t）totcount，（（size_t）NIL）-1）；
返回NULL；
}
this=malloc（sizeof*this）；
此->大小=总计数；
此->keylen=keylen；
这->totlen=keylen+vallen；
this->data=malloc（totcount*this->totlen）；
this->entries=malloc（totcount*sizeof*this->entries）；
此->自由=0；/*自由列表的开始*/
对于（idx=0；idxsize；idx++）{
此->条目[idx]。head\u key=NIL；
此->条目[idx]。head_val=NIL；
此->条目[idx]。下一个\u键=NIL；
此->条目[idx].next\u val=idx+1；/*未使用的下一个指针重新用作自由列表*/
};
此->条目[idx-1]。下一个值=NIL；/*自由列表结束*/
fprintf（标准，“表格新（%zu，%zu，%zu）大小=%zu+%zu+%zu\n”
，（尺寸）凯伦，（尺寸）瓦伦，（尺寸）总计
，sizeof*this，（size\t）totcount*this->totlen，totcount*sizeof*this->entries
);
归还这个；
}

您如何知道它内部保存了两个哈希表？你说的“它”是什么意思？因为“这两个BIMAP都有相同的数据支持”，从中我可以推测，没有双重性。说“Boost.MultiIndex实际上是bimap容器的核心”，在我看来，只有一组数据有两个索引。请注意两个内部哈希表。“相同数据”的意思是存储在这两个映射中的引用是相同的，因此数据实际上没有被复制，只是引用而已。实现起来并不难。只要用payload+（2组{head，next}指针）创建条目，再加上一些处理它的机制就可以了。对，这似乎比使用两个哈希表更节省内存。另一方面，指针在实现表的重新分配时可能会引起头痛。固定大小与可变大小可能是一种设计选择。OTOH调整大小/倍增并不是那么难（如果不使用存储指针，它相对容易），但在您的方法中，您会，不？不，在这种情况下，我更喜欢为{head，next}存储索引，因为在调整大小/realloc（）之后，这些索引是稳定的
        /* For demonstration purposes these types are VERY SMALL.
        ** Normally one would use unsigned short, or unsigned int.
        */
typedef unsigned char Index;
typedef unsigned short Hashval;
        /* Use the maximal representable value as sentinel value */
#define NIL ((Index)-1)

struct entry {
        Index head_key          /* The head-of-the-chain pointers */
                , head_val;     /* ... and for the values */
        Index next_key          /* linked list for chaining the keys */
                , next_val;     /* ... and for the values */
        };

struct table {
        unsigned totlen, keylen;
                /* free points to the root of the freetree */
        Index size, free;
                /* The complete payload, for both keys and values.
                 * layout = [key0|val0|key1|val1|..] (without padding/alignment)
                 */
        char *data;
        struct entry *entries; /* All the entries. Not pointers, but the actual entries. */
        };
        /* Macros for accessing the pool of payload */
#define NODE_KEY(p,n) ((p)->data + (n) * (p)->totlen)
#define NODE_VAL(p,n) ((p)->data + (n) * ((p)->totlen+(p)->keylen))

#define TH_OK 0
#define TH_KEY_NOT_FOUND 1
#define TH_VAL_NOT_FOUND 2
#define TH_BOTH_NOT_FOUND 3
#define TH_TABLE_FULL 4
#define TH_KEY_DUPLICATE 5
#define TH_VAL_DUPLICATE 6
#define TH_BOTH_DUPLICATE 7
#define TH_TOTAL_ECLIPSE 8

/********************************************/

    /* Allocate and initialise the hash table.
    ** Note: given fixed size, the table and the payload could be statically allocated,
    ** (but we'd still need to do the initialisation)
    */


struct table * table_new( unsigned keylen, unsigned vallen, unsigned totcount )
{
Index idx;
struct table *this;

if (totcount > NIL) {
        fprintf(stderr, "Table_new(%zu,%zu,%zu): totcount(%zu) larger than largest Index(%zu)\n"
                , (size_t) keylen, (size_t) vallen, (size_t) totcount
                , (size_t) totcount, ((size_t)NIL) -1 );
        return NULL;
        }
this = malloc (sizeof *this);
this->size = totcount;
this->keylen = keylen;
this->totlen = keylen+vallen;
this->data = malloc (totcount * this->totlen );
this->entries = malloc (totcount * sizeof *this->entries );

this->free = 0; /* start of freelist */
for( idx=0; idx < this->size; idx++ ) {
        this->entries[idx].head_key = NIL;
        this->entries[idx].head_val = NIL;
        this->entries[idx].next_key = NIL;
        this->entries[idx].next_val = idx+1; /* unused next pointer reused as freelist */
        };
this-> entries[idx-1].next_val = NIL; /* end of freelist */

fprintf(stderr, "Table_new(%zu,%zu,%zu) size = %zu+%zu+%zu\n"
                , (size_t) keylen, (size_t) vallen, (size_t) totcount
        , sizeof *this, (size_t)totcount * this->totlen, totcount * sizeof *this->entries
         );

return this;
}