redis7.x源码分析:(3) dict字典

dict字典采用经典hash表数据结构实现，由键值对组成，类似于C++中的unordered_map。两者在代码实现层面存在一些差异，比如gnustl的unordered_map分配的桶数组个数是（质数n），而dict分配的桶数组个数是（2^n）；另外，dict对hash值相同的key采用了常规的开链法存储，而unordered_map在采用开链法的前提下，又使用了_M_before_begin将不同桶中的链表串联成了一个大链表，从而将遍历算法复杂度优化为O(n)；还有就是，dict为应对服务器性能上的特殊要求，设计成了双hash表的形式，这也使得它在rehash，各种操作存在一些特殊性，我在下面的代码分析中会说到。

dict在redis里面的用途十分广泛，几乎所有的模块都会用到，其中的两大核心用途是：

16个数据库空间
hash和zset类型数据的存储

dict相关结构定义：

// 节点
typedef struct dictEntry {
    // 任意类型键
    void *key;
    // 存储的值
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    // 同一个桶中链表的下一个元素
    struct dictEntry *next;     /* Next entry in the same hash bucket. */
    void *metadata[];           /* An arbitrary number of bytes (starting at a
                                 * pointer-aligned address) of size as returned
                                 * by dictType's dictEntryMetadataBytes(). */
} dictEntry;

typedef struct dict dict;

// 存储不同类型数据的字典，设置不同的处理函数
typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(dict *d, const void *key);
    void *(*valDup)(dict *d, const void *obj);
    int (*keyCompare)(dict *d, const void *key1, const void *key2);
    void (*keyDestructor)(dict *d, void *key);
    void (*valDestructor)(dict *d, void *obj);
    int (*expandAllowed)(size_t moreMem, double usedRatio);
    /* Allow a dictEntry to carry extra caller-defined metadata.  The
     * extra memory is initialized to 0 when a dictEntry is allocated. */
    size_t (*dictEntryMetadataBytes)(dict *d);
} dictType;

#define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp))
#define DICTHT_SIZE_MASK(exp) ((exp) == -1 ? 0 : (DICTHT_SIZE(exp))-1)

struct dict {
    // 字典类型，不同的类型有不同的hash函数，dup函数
    dictType *type;

    // 双哈希表指针数组
    dictEntry **ht_table[2];
    // 存放的节点数
    unsigned long ht_used[2];
    // rehash索引（哈希表的下标），大于 -1 表示正在rehash
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */

    /* Keep small vars at end for optimal (minimal) struct padding */
    // rehash暂停标志
    int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
    // 表示2的多少次幂，哈希表的大小=2^ht_size_exp
    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
};

hash表的创建比较简单直接略过，先看下 _dictExpand 的实现，它在hash表扩容缩容和创建时都会用到。当添加dictAdd时，存储的节点数 used / size >= 1，就需要调用它扩容。

int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
{
    if (malloc_failed) *malloc_failed = 0;

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    // 正在rehash或者 used / size > 1直接退出
    if (dictIsRehashing(d) || d->ht_used[0] > size)
        return DICT_ERR;

    /* the new hash table */
    dictEntry **new_ht_table;
    unsigned long new_ht_used;
    // 获取第一个 2^N > size 的N的大小
    signed char new_ht_size_exp = _dictNextExp(size);

    /* Detect overflows */
    // 2^N 作为hash数组的长度, 另外判断分配的大小是否合法
    size_t newsize = 1ul<<new_ht_size_exp;
    if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
        return DICT_ERR;

    /* Rehashing to the same table size is not useful. */
    if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    if (malloc_failed) {
        new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*));
        *malloc_failed = new_ht_table == NULL;
        if (*malloc_failed)
            return DICT_ERR;
    } else
        new_ht_table = zcalloc(newsize*sizeof(dictEntry*));

    new_ht_used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    // 如果是第一次创建hash表,则设置完第一个表后直接退出
    if (d->ht_table[0] == NULL) {
        d->ht_size_exp[0] = new_ht_size_exp;
        d->ht_used[0] = new_ht_used;
        d->ht_table[0] = new_ht_table;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    // 设置第二个表后退出,并且开始rehash
    d->ht_size_exp[1] = new_ht_size_exp;
    d->ht_used[1] = new_ht_used;
    d->ht_table[1] = new_ht_table;
    d->rehashidx = 0;
    return DICT_OK;
}

// 检查是否需要缩容
int htNeedsResize(dict *dict) {
    long long size, used;

    // 哈希表大小
    size = dictSlots(dict);

    // 哈希表已用节点数量
    used = dictSize(dict);

    // 当哈希表的大小大于 > 4 并且用量小于 10%时缩容
    return (size && used && size > DICT_HT_INITIAL_SIZE &&
            (used*100/size < REDIS_HT_MINFILL));
}

在执行databasesCron时，如果数据库满足 htNeedsResize 会进行缩容，另外hash和zset类型数据在执行删除操作时，也会判断是否需要缩容。

redis7.x源码分析:(3) dict字典

全站热榜