Redis dict 实现相关源码阅读笔记,源码文件 dict.h & dict.c


数据结构

在 Redis 中,dict 不仅用于实现不同的对象类型,还用于数据库的实现,其数据结构如下所示。

1
2
3
4
5
6
7
typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

dictType

其中字典类型 dictType 的结构如下,可以看出,用户可以传入各种自定义的操作函数以实现不同的字典类型。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
typedef struct dictType {
    /* 自定义哈希值计算函数 */
    uint64_t (*hashFunction)(const void *key);
    /* 自定义键复制函数 */
    void *(*keyDup)(void *privdata, const void *key);
    /* 自定义值复制函数 */
    void *(*valDup)(void *privdata, const void *obj);
    /* 自定义键比较函数 */
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    /* 自定义键析构函数 */
    void (*keyDestructor)(void *privdata, void *key);
    /* 自定义值析构函数 */
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

dictht

可以看到在 dict 中有两个哈希表 dictht,方便实现渐进式 Rehash,以免在字典中数据量较大时 Rehash 操作带来较大延时,关于 Rehash 的详细内容将在之后描述。

dictht 的结构如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
typedef struct dictht {
    dictEntry **table;
    unsigned long size; /* size of table */
    unsigned long sizemask; /* size - 1 */
    unsigned long used; /* total dictEntries in table */
} dictht;

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

Redis 哈希表使用单向链表解决哈希冲突,新加入的节点 dictEntry 置于 table 中相应 bucket 的头部。

  • dictht 中,table 的长度 size 始终为 2 的整数倍(在 _dictNextPower 函数中实现),而 sizemask = size - 1,从而可以将取余操作转化为位运算
  • dictEntry 中,使用 union 来存储不同类型的值 v

Tips: 对 $b = 2^n$ 取余

1
a % b = a & (b - 1)

迭代器

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
/* If safe is set to 1 this is a safe iterator, that means, you can call
 * dictAdd, dictFind, and other functions against the dictionary even while
 * iterating. Otherwise it is a non safe iterator, and only dictNext()
 * should be called while iterating. */
typedef struct dictIterator {
    dict *d;
    long index;
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint;
} dictIterator;

dict 结构中的 iterators 表示的是当前正在运行的安全(safe=1)迭代器个数,其不为 0,意味着此时不能进行 Rehash 操作。

dict APIs

Get/Set Interfaces

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/* Enable resizing of the hash table  */
void dictEnableResize(void);

/* Disable resizing of the hash table  */
void dictDisableResize(void);

/* Get hash value of the specified key
 * Use dictType->hashFunction */
uint64_t dictGetHash(dict *d, const void *key);

/* The default hashing function uses SipHash implementation
 * in siphash.c. */
void dictSetHashFunctionSeed(uint8_t *seed) {
    memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
}
uint8_t *dictGetHashFunctionSeed(void) {
    return dict_hash_function_seed;
}
uint64_t dictGenHashFunction(const void *key, int len) {
    return siphash(key,len,dict_hash_function_seed);
}
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
    return siphash_nocase(buf,len,dict_hash_function_seed);
}

/* Get stats of the specified dict */
void dictGetStats(char *buf, size_t bufsize, dict *d);

dictCreate

依据 typeprivDataPtr 创建一个 dict 并初始化。

1
2
/* Create a new hash table */
dict *dictCreate(dictType *type, void *privDataPtr);

dictExpand

1
2
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size);

处于以下情形之一时,返回 DICT_ERR:

  • dict 正在 Rehash
  • 传入的 size 小于 dictht.used
  • 新的 dictht.size(大于 size 的最小 2 的幂次方) 和现有哈希表 size 相同

如果当前 ht[0].table == NULL,表示该操作仅为初始化 ht[0];否则,该操作初始化 ht[1] 以用于 Rehash。

dictResize

1
2
3
/* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictResize(dict *d);

dictRehash

1
int dictRehash(dict *d, int n);

在每次调用 dictRehash 时,最多迁移 n 个非空 bucket,返回 1 时表示 Rehash 操作还未完成,未处于 Rehash 状态或 Rehash 完成时返回 0.

dictRehashMilliseconds

1
2
/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
int dictRehashMilliseconds(dict *d, int ms);

dictAddRaw

1
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);

dict 中添加一个键值为 keydictEntry,并返回该 dictEntry 的指针,便于事后设置 value

  • 每次添加前会检查 dict 是否处于 Rehash 状态,如果处于 Rehash 状态,且 dict.iterators = 0 时,迁移一个非空 bucket
  • 如果 key 已存在,返回 NULL,此时如果传入的 existing 不为空,则 *existing 会设置为指向与 key 对应的 dictEntry 的指针
  • 如果正处于 Rehash 状态,欲添加的 dictEntry 会添加至 ht[1]

dictAdd

1
2
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val);

先调用 dictAddRaw 插入 key,再调用 dictSetVal 关联 keyval。如果 key 已存在则返回 DICT_ERR,添加成功返回 DICT_OK

dictAddOrFind

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
/* Add or Find:
 * dictAddOrFind() is simply a version of dictAddRaw() that always
 * returns the hash entry of the specified key, even if the key already
 * exists and can't be added (in that case the entry of the already
 * existing key is returned.) */
dictEntry *dictAddOrFind(dict *d, void *key) {
    dictEntry *entry, *existing;
    entry = dictAddRaw(d,key,&existing);
    return entry ? entry : existing;
}

dictReplace

1
2
3
4
5
6
/* Add or Overwrite:
 * Add an element, discarding the old value if the key already exists.
 * Return 1 if the key was added from scratch, 0 if there was already an
 * element with such key and dictReplace() just performed a value update
 * operation. */
int dictReplace(dict *d, void *key, void *val);

dictDelete

1
2
3
/* Remove an element, returning DICT_OK on success or DICT_ERR if the
 * element was not found. */
int dictDelete(dict *d, const void *key);
1
2
3
4
5
6
/* Remove an element from the table, but without actually releasing
 * the key, value and dictionary entry. The dictionary entry is returned
 * if the element was found (and unlinked from the table), and the user
 * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
 * Otherwise if the key is not found, NULL is returned. */
dictEntry *dictUnlink(dict *ht, const void *key);

dictFreeUnlinkedEntry

1
2
3
/* You need to call this function to really free the entry after a call
 * to dictUnlink(). It's safe to call this function with 'he' = NULL. */
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);

dictRelease

1
2
/* Clear & Release the hash table */
void dictRelease(dict *d);

dictEmpty

1
2
/* Clear & Reset the hash table */
void dictEmpty(dict *d, void(callback)(void*));

dictFind

1
2
/* Returns the hash entry of the specified key, or NULL */
dictEntry * dictFind(dict *d, const void *key);

dictFetchValue

1
2
/* Returns the value of the specified key,  or NULL */
void *dictFetchValue(dict *d, const void *key);

dictGetIterator

1
2
/* Return the dict iterator of the specified dict */
dictIterator *dictGetIterator(dict *d);

在调用 dictGetIterator 后,并不会修改 d->iterators

dictGetSafeIterator

1
2
3
4
5
6
dictIterator *dictGetSafeIterator(dict *d) {
    dictIterator *i = dictGetIterator(d);

    i->safe = 1;
    return i;
}

dictNext

获取下一个 dictEntry

1
dictEntry *dictNext(dictIterator *iter);

dictReleaseIterator – 释放迭代器

1
void dictReleaseIterator(dictIterator *iter);

dictGetRandomKey

1
2
3
/* Return a random entry from the hash table. Useful to
 * implement randomized algorithms */
dictEntry *dictGetRandomKey(dict *d);

首先随机选择一个非空的 bucket,然后从该 bucket 中随机选择一个 dictEntry 作为返回值。可以看出,长度越短的 bucket 中的 dictEntry 被选中的概率越大。

dictGetSomeKeys

1
2
3
/* This function samples the dictionary to return a few keys from random
 * locations. */
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
  • 函数返回时,在 des 中存储采样所得的 dictEntry 数组结果,不保证元素不会重复,des 所指的内存空闲需能容纳 countdictEntry 指针
  • 函数返回值为 des 中包含的 dictEntry 个数,其可能小于 count
  • 比调用 countdictGetRandomKey 高效

dictGetFairRandomKey

1
2
3
/* This is like dictGetRandomKey() from the POV of the API, but will do more
 * work to ensure a better distribution of the returned element. */
dictEntry *dictGetFairRandomKey(dict *d);
  • 会先尝试调用 dictGetSomeKeys 获取 GETFAIR_NUM_ENTRIESdictEntry,再随机挑选一个返回
  • 调用 dictGetSomeKeys 返回值为 0 时,直接调用 dictGetRandomKey,返回其结果

dictFindEntryRefByPtrAndHash

1
2
3
4
5
6
/* Finds the dictEntry reference by using pointer and pre-calculated hash.
 * oldkey is a dead pointer and should not be accessed.
 * the hash value should be provided using dictGetHash.
 * no string / key comparison is performed.
 * return value is the reference to the dictEntry if found, or NULL if not found. */
dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);

dictScan

1
2
3
4
5
6
/* dictScan() is used to iterate over the elements of a dictionary. */
unsigned long dictScan(dict *d,
                       unsigned long cursor,
                       dictScanFunction *fn,
                       dictScanBucketFunction *bucketfn,
                       void *privdata);
  • Scan 开始时,设置 cursor = 0 调用该函数

  • 该函数每次迭代后,返回 cursor 用于下次调用

  • 当返回的 cursor 等于 0 时,表示 Scan 结束

  • 保证返回所有 dictEntry,但不保证不重复

  • 如下所示,cursor 并不是按序递增,而是按位反转之后递增,这保证了在 Rehash 时不会漏掉 bucket

    1
    2
    3
    
    cursor = rev(cursor);
    cursor++;
    cursor = rev(cursor);