ziplist.c
Redis中的ziplist
,又名压缩列表
,是一种经过特殊编码的双链接列表
,极度节约内存
的数据结构。
可以存储字符串
和整数值
,其中整数被编码为实际整数,而不是一系列字符。
它允许在 O(1)
时间内在列表的任一侧执行push
和pop
操作。
但是,由于每个操作都需要重新分配
ziplist使用的内存
,因此实际的复杂性与ziplist使用的内存量有关。
ziplist的数据结构如下:
zlbytes
是一个uint32_t
的无符号整数,存储着整个ziplist
所占的字节数,包含zlbytes
自身。zltail
是一个uint32_t
的无符号整数,存储着ziplist
中最后一个entry
的偏移量,方便O(1)
操作队尾,而不需要遍历整个链表.zllen
表示entry
的个数,存储的最大值为2^16-1
,超过这个范围的话,就要进行一次遍历
。zlend
是一个比较特殊的节点,单字节,值为255,代表着列表的尾结点。entry
存储着前一个节点的长度prevlen
,这么做的原因是为了方便从后往前遍历
,数据的格式encoding
取值为string
或者integer
,节点的数据entry-data
,如果,这个entry
表示的是自身,那么entry-data
就不需要了。
关于prevlen
,不同的数据长度会有不同的表示方法。
- 当前置节点的长度
小于254字节
,只需要1个字节无符号整数
即可。 - 当前置节点的长度
大于等于254字节
,则需要1个5字节的空间
来存储,第1
个字节保存的值为0xFE
,用来标识
节点是一个大值,后4
个字节用来保存真实的节点长度值。
ziplist的entry
定义如下:
/* We use this function to receive information about a ziplist entry.
* Note that this is not how the data is actually encoded, is just what we
* get filled by a function in order to operate more easily. */
typedef struct zlentry {
//前置节点字节数
unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
//前置节点长度
unsigned int prevrawlen; /* Previous entry len. */
unsigned int lensize; /* Bytes used to encode this entry type/len.
For example strings have a 1, 2 or 5 bytes
//当前节点大小 header. Integers always use a single byte.*/
unsigned int len; /* Bytes used to represent the actual entry.
For strings this is just the string length
while for integers it is 1, 2, 3, 4, 8 or
0 (for 4 bit immediate) depending on the
number range. */
//当前节点header大小,prevrawlensize + lensize
unsigned int headersize; /* prevrawlensize + lensize. */
//当前节点的编码格式 string或者integer
unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on
the entry encoding. However for 4 bits
immediate integers this can assume a range
of values and must be range-checked. */
//指向当前节点的指针
unsigned char *p; /* Pointer to the very start of the entry, that
is, this points to prev-entry-len field. */
} zlentry;
1. 新建压缩列表
//新建一个空的压缩列表
unsigned char *ziplistNew(void) {
//ZIPLIST_HEADER_SIZE= 2个32位的+1个16位的
unsigned int bytes = ZIPLIST_HEADER_SIZE+1;
unsigned char *zl = zmalloc(bytes);
//返回组成ziplist的总字节数,intrev32ifbe转为32位int
ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
//返回ziplist最后一项的偏移量
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
//返回整个ziplist的大小,
ZIPLIST_LENGTH(zl) = 0;
zl[bytes-1] = ZIP_END;
return zl;
}
初始的时候,ziplist是个空的双端列表,每次插入元素的时候再分配内存。
2. 添加节点
//添加节点
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
//当前要插入的节点
unsigned char *p;
//ZIPLIST_ENTRY_HEAD返回的是首节点
//ZIPLIST_ENTRY_END返回的是末节点
//where表示插入节点的位置
p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
return __ziplistInsert(zl,p,s,slen);
}
/* Insert item at "p". */
//根据指针p所在的位置,将长度为slen的字符串s,插入到zl中,并返回插入节点后的zl
unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
//计算当前ziplist的总长度curlen
//reqlen用来存储此次插入节点需要的长度
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
//前置节点字节大小及长度
unsigned int prevlensize, prevlen = 0;
//偏移量
size_t offset;
int nextdiff = 0;
unsigned char encoding = 0;
long long value = 123456789; /* initialized to avoid warning. Using a value
that is easy to see if for some reason
we use it uninitialized. */
zlentry tail;
/* Find out prevlen for the entry that is inserted. */
if (p[0] != ZIP_END) {
//计算prevlen部分是用1个字节存还是5个字节存,取决于prevlensize是否大于等于254
ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
} else {
unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
if (ptail[0] != ZIP_END) {
prevlen = zipRawEntryLength(ptail);
}
}
/* See if the entry can be encoded */
//encoding部分尝试将string转为integer类型
//1. 计算reqlen第一部分长度,zllen 如果encoding是integer,则不使用slen,计算integer需要的长度,否则用slen
if (zipTryEncoding(s,slen,&value,&encoding)) {
/* 'encoding' is set to the appropriate integer encoding */
reqlen = zipIntSize(encoding);
} else {
/* 'encoding' is untouched, however zipStoreEntryEncoding will use the
* string length to figure out how to encode it. */
reqlen = slen;
}
/* We need space for both the length of the previous entry and
* the length of the payload. */
//2. 计算reqlen第二部分长度,prevlen
reqlen += zipStorePrevEntryLength(NULL,prevlen);
//3. 计算reqlen第三部分长度,encoding
reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
/* When the insert position is not equal to the tail, we need to
* make sure that the next entry can hold this entry's length in
* its prevlen field. */
//当
int forcelarge = 0;
//计算因前置节点大小变化引起的字节数差距,如果大于0,则表示需要更多空间,反之需要减少空间
//新增节点可能不是在head或者tail位置插入,可以根据index插入,需要计算字节差
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
if (nextdiff == -4 && reqlen < 4) {
nextdiff = 0;
forcelarge = 1;
}
/* Store offset because a realloc may change the address of zl. */
//存储原先的offset
offset = p-zl;
//重新设置ziplist的大小
zl = ziplistResize(zl,curlen+reqlen+nextdiff);
//新节点
p = zl+offset;
/* Apply memory move when necessary and update tail offset. */
if (p[0] != ZIP_END) {
/* Subtract one because of the ZIP_END bytes */
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
/* Encode this entry's raw length in the next entry. */
if (forcelarge)
zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
else
zipStorePrevEntryLength(p+reqlen,reqlen);
/* Update offset for tail */
//修改zltail的值
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
//将p所指向的zl节点的信息全部保存到zlentry中,并返回。
zipEntry(p+reqlen, &tail);
//修改zltail
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
}
} else {
/* This element will be the new tail. */
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
}
/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
//当nextdiff不等于0,表示下一个entry的字节长度变了,需要级联更新ziplist中entry的大小
if (nextdiff != 0) {
offset = p-zl;
//在列表的中间插入或者删除元素可能会造成级联更新的发生
zl = __ziplistCascadeUpdate(zl,p+reqlen);
p = zl+offset;
}
/* Write the entry */
//将prevlen存储到zipEntry中
p += zipStorePrevEntryLength(p,prevlen);
//将encoding存储到zipEntry中
p += zipStoreEntryEncoding(p,encoding,slen);
//根据encoding是string或者integer来确定要复制多少字节数据到p中
if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
} else {
zipSaveInteger(p,value,encoding);
}
//修改zllen,节点个数+1
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
}
节点可以在head
插入,可以在tail
插入,可以在index
位置插入,由于entry-data
的大小可能超过254
,一旦超过就会涉及到字节大小的扩充,引起级联更新
entry的大小。
插入完节点后,分别修改zllen、prevlen、encoding、zltail
属性。
3. 删除节点
/* Delete a single entry from the ziplist, pointed to by *p.
* Also update *p in place, to be able to iterate over the
* ziplist, while deleting entries. */
//从指针p处开始删除zl中的节点
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
size_t offset = *p-zl;
//删除1个节点
zl = __ziplistDelete(zl,*p,1);
/* Store pointer to current element in p, because ziplistDelete will
* do a realloc which might result in a different "zl"-pointer.
* When the delete direction is back to front, we might delete the last
* entry and end up with "p" pointing to ZIP_END, so check this. */
*p = zl+offset;
return zl;
}
/* Delete a range of entries from the ziplist. */
//从index处开始删除nums个节点
unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) {
//找出index处的指针
unsigned char *p = ziplistIndex(zl,index);
return (p == NULL) ? zl : __ziplistDelete(zl,p,num);
}
/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
//从指针p处开始,连续删除nums个节点
unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
unsigned int i, totlen, deleted = 0;
size_t offset;
int nextdiff = 0;
zlentry first, tail;
zipEntry(p, &first);
//deleted存储删除节点的个数
//p存储删除的总字节数
for (i = 0; p[0] != ZIP_END && i < num; i++) {
p += zipRawEntryLength(p);
deleted++;
}
totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
if (totlen > 0) {
//如果删除的不是尾结点,则需要移动内存
if (p[0] != ZIP_END) {
/* Storing `prevrawlen` in this entry may increase or decrease the
* number of bytes required compare to the current `prevrawlen`.
* There always is room to store this, because it was previously
* stored by an entry that is now being deleted. */
//计算字节差
nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
/* Note that there is always space when p jumps backward: if
* the new previous entry is large, one of the deleted elements
* had a 5 bytes prevlen header, so there is for sure at least
* 5 bytes free and we need just 4. */
p -= nextdiff;
zipStorePrevEntryLength(p,first.prevrawlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen);
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
zipEntry(p, &tail);
if (p[tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
}
/* Move tail to the front of the ziplist */
memmove(first.p,p,
intrev32ifbe(ZIPLIST_BYTES(zl))-(p-zl)-1);
} else {
//尾结点直接删除
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe((first.p-zl)-first.prevrawlen);
}
/* Resize and update length */
offset = first.p-zl;
zl = ziplistResize(zl, intrev32ifbe(ZIPLIST_BYTES(zl))-totlen+nextdiff);
//修改zllen
ZIPLIST_INCR_LENGTH(zl,-deleted);
p = zl+offset;
/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
if (nextdiff != 0)
//级联更新
zl = __ziplistCascadeUpdate(zl,p);
}
return zl;
}
4. 计算节点个数
/* Return length of ziplist. */
//计算ziplist中entry的个数
unsigned int ziplistLen(unsigned char *zl) {
unsigned int len = 0;
//当zllen小于2^16-1的时候,直接返回zllen的值
if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) {
len = intrev16ifbe(ZIPLIST_LENGTH(zl));
} else {
//当zllen大于等于2^16-1,则需要遍历ziplist,计算总节点个数
unsigned char *p = zl+ZIPLIST_HEADER_SIZE;
while (*p != ZIP_END) {
p += zipRawEntryLength(p);
len++;
}
/* Re-store length if small enough */
//如果长度小于2^16-1,则修改zllen
if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = intrev16ifbe(len);
}
return len;
}