dict.c

Redis的字典结构，说起来和Java的HashMap有点相似。
主要是由dict.c来实现，在dict.h中进行了定义。
包含了dict、dictType、dictEntry、dictht四种结构。

字典的结构定义如下：

//字典结构
typedef struct dict {
    //字典类型
    dictType *type;
    //私有数据
    void *privdata;.
    //哈希表2个，0号和1号
    dictht ht[2];
    //用来记录rehash进度的标记，如果没有在rehash，则值为-1
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    //当前正在迭代的迭代器数
    unsigned long iterators; /* number of iterators currently running */
} dict;

字典结构包含了:

字典类型dictType
私有数据privdata
哈希表ht，有2个，一个正常使用，一个rehash的时候使用
rehash进度rehashidx
当前正在迭代的迭代器数iterators

字典类型的定义如下：

//字典类型
typedef struct dictType {
    //hash函数
    uint64_t (*hashFunction)(const void *key);
    //复制key
    void *(*keyDup)(void *privdata, const void *key);
    //复制value
    void *(*valDup)(void *privdata, const void *obj);
    //key比较器
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    //回收key
    void (*keyDestructor)(void *privdata, void *key);
    //回收value
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

字典类型结构包含了:

hash函数hashFunction，传入key计算出hash值
复制keykeyDup
复制valuevalueDup
key的比较器keyCompare
回收key的析构函数keyDestructor
回收value的析构函数valueDestructor

哈希表的结构如下：


/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
//哈希表结构
typedef struct dictht {
    //哈希表数组，内部采用拉链法
    dictEntry **table
    //哈希表大小
    unsigned long size;
    //哈希表大小掩码
    unsigned long sizemask;
    //哈希表现有节点的数量
    unsigned long used;
} dictht;

哈希表结构包含了:

哈希表数组dictEntry **table，哈希桶dictEntry内部则是链表结构。
哈希表大小size
哈希表大小掩码sizemask
哈希表现有节点数used

哈希桶的定义如下：

//哈希桶，k-v，链表
typedef struct dictEntry {
    //key
    void *key;
    //value 支持4种数据类型
    union {
        //自定义
        void *val;
        //无符号64位整型
        uint64_t u64;
        //64位整型
        int64_t s64;
        //浮点型
        double d;
    } v;
    //next下一个节点
    struct dictEntry *next;
} dictEntry;

哈希桶结构包含了:

键名key
值v包含了4种可选类型
下一个节点指针next

dict.png

从整体上来看，dict中包含了2个dictht哈希表，每个哈希表中包含了一个dictEntry哈希桶数组，以及哈希桶的统计属性、rehash进度标记，而每个哈希桶内部是一个k-v结构，且包含一个next指针，多个哈希桶挂在一个链表上。

具体的用法如下：
1. 字典的创建

//创建一个字典
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));

    _dictInit(d,type,privDataPtr);
    return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    //0号哈希表
    _dictReset(&d->ht[0]);
    //1号哈希表，渐进式扩容的时候用到
    _dictReset(&d->ht[1]);
    //通过这个type指定数据类型，如hash、zset
    d->type = type;
    d->privdata = privDataPtr;
    // -1 表示处于非rehash状态
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}

static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

主要是进行了内存的分配及2个哈希表的初始化及一些属性的赋值操作。
type在Redis的数据结构中会用到。

2. 添加元素

/* Low level add or find:
 * This function adds the entry but instead of setting a value returns the
 * dictEntry structure to the user, that will make sure to fill the value
 * field as he wishes.
 *
 * This function is also directly exposed to the user API to be called
 * mainly in order to store non-pointers inside the hash value, example:
 *
 * entry = dictAddRaw(dict,mykey,NULL);
 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
 *
 * Return values:
 *
 * If key already exists NULL is returned, and "*existing" is populated
 * with the existing entry if existing is not NULL.
 *
 * If key was added, the hash entry is returned to be manipulated by the caller.
 */
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;
    //如果正在rehash，则执行rehash步骤
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    //查找当前key是否存在，利用hashFunction将key进行hash，如果存在，则返回NULL
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    //检查是否正在rehash，是的话，使用新的哈希表
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    //将新的entry插入链表头部
    entry->next = ht->table[index];
    ht->table[index] = entry;
    //节点数+1
    ht->used++;

    /* Set the hash entry fields. */
    //设置新节点的key
    dictSetKey(d, entry, key);
    return entry;
}

需要判断是否处于rehash状态及key是否存在。
key的寻找过程类似HashMap。
不同的是，存在着2个哈希表。
采用头插法，添加一个key。

3. 删除及断开元素

/* Remove an element, returning DICT_OK on success or DICT_ERR if the
 * element was not found. */
int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}
/* Remove an element from the table, but without actually releasing
 * the key, value and dictionary entry. The dictionary entry is returned
 * if the element was found (and unlinked from the table), and the user
 * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
 * Otherwise if the key is not found, NULL is returned.
 *
 * This function is useful when we want to remove something from the hash
 * table but want to use its value before actually deleting the entry.
 * Without this function the pattern would require two lookups:
 *
 *  entry = dictFind(...);
 *  // Do something with entry
 *  dictDelete(dictionary,entry);
 *
 * Thanks to this function it is possible to avoid this, and use
 * instead:
 *
 * entry = dictUnlink(dictionary,entry);
 * // Do something with entry
 * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
 */
dictEntry *dictUnlink(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);
}

/* You need to call this function to really free the entry after a call
 * to dictUnlink(). It's safe to call this function with 'he' = NULL. */
void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
    if (he == NULL) return;
    dictFreeKey(d, he);
    dictFreeVal(d, he);
    zfree(he);
}

/* Search and remove an element. This is an helper function for
 * dictDelete() and dictUnlink(), please check the top comment
 * of those functions. */
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;
    //如果节点数量为0，则返回NULL
    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

    //如果正在rehash，则执行rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);

    //通过hash函数计算key的hash值
    h = dictHashKey(d, key);

    //遍历2个dictht
    for (table = 0; table <= 1; table++) {
        //计算key所在哈希桶数组的下标
        idx = h & d->ht[table].sizemask;
        //dictEntry
        he = d->ht[table].table[idx];
        prevHe = NULL;
        //循环遍历链表查找key
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
                //将找到的节点存储在prevHe中，然后删除
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                //通过nofree参数控制是否要释放内存空间，delete的时候nofree=0，unlink的时候nofree=1
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
                //节点数-1
                d->ht[table].used--;
                return he;
            }
            prevHe = he;
            //继续查找下一个节点
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
}

删除元素的时候，需要同时释放内存。
断开元素的时候，不需要同时释放内存。
对于小对象key，可以直接删除元素dictDelete。
对于大对象key，一般是断开元素dictUnlink，再慢慢回收内存dictFreeUnlinkedEntry。

同样，都需要判断是否rehash状态，对key进行hash，再定位哈希桶数组下标，再遍历链表查找到最终的key。

4. 扩容

/* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
//扩容
int dictResize(dict *d)
{
    int minimal;
    //通过dictEnableResize、dictDisableResize修改dict_can_resize
    //判断是否允许resize或者是否处于渐进式hash状态，如果不允许或者正在rehash，则返回错误。
    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
    minimal = d->ht[0].used;
    //最小容量
    if (minimal < DICT_HT_INITIAL_SI***imal = DICT_HT_INITIAL_SIZE;
    //扩容逻辑
    return dictExpand(d, minimal);
}

/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    //如果正在rehash或者新的size过小，则报错。
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    dictht n; /* the new hash table */
    //在未达到unsigned long阈值的情况下，2倍扩容。
    unsigned long realsize = _dictNextPower(size);

    /* Rehashing to the same table size is not useful. */
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    //调整参数值，分配内存
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    //如果不是因为rehash而去扩容，则判定为初始化，那么直接赋值即可。
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    //将新建的哈希表指向1号哈希表，为rehash做准备。
    d->ht[1] = n;
    //修改进度为0,进度为-1的话则表示非rehash状态。
    d->rehashidx = 0;
    return DICT_OK;
}

5. 渐进式哈希rehash

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does  would be unbound and the function may block for a long time. */
//渐进式哈希
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    //n表示要移动的槽数
    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        //如果还有元素，查找哈希槽
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        //获取下标为rehashidx哈希桶链表
        de = d->ht[0].table[d->rehashidx];
        //遍历，移动元素到新的哈希表
        while(de) {
            uint64_t h;
            //移动到下一个节点
            nextde = de->next;
           //获取数组下标 hash&sizemask
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            //0号哈希表元素个数-1
            d->ht[0].used--;
            //1号哈希表元素个数+1
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        //rehash进度+1
        d->rehashidx++;
    }

    //校验元素是否为0，是否都迁移完毕了
    if (d->ht[0].used == 0) {
        //移动完所有的元素，就把原来的哈希表释放
        zfree(d->ht[0].table);
        //把1号哈希表指向0号哈希表
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        //rehash状态清除
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

//该方法在server.c启动的时候，启动 databasesCron() 定时任务
//调用incrementallyRehash，进而调用。
int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;
    //每次rehash 100 steps，rehash时间为ms毫秒
    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}