

下面两张图即为其源码给的关于正常的前缀树和radis实现的前缀树的比较,可以看到其将一个节点的连续的节点都合并为一个节点,入[foo] [er],[ar]这些节点。



可以看到下面代码和图表示的数据结构,可以看到这个前缀树的单个节点只有iskey,isnull,iscompr,size,data 这5种数据,但是实际上其后面还会跟着两种指针数据ptr->child,ptr->da,下面分别介绍一下这些数据.

  • iskey: 表示这个节点是否为key节点,例如上面图中存入footer这个数据,这个footer在前缀树中的r指向的节点即为key节点。
  • isnull: 这个表示这个节点是否有数据,即是否有ptr->da这个数据,这个数据需要iskey是1的情况下才有意义
  • iscompr: 这个表示这个节点是否被压缩,例如上面图中[foo]节点即为被压缩的节点
  • size: 表示的是后面的data的长度。
  • data: data中存储了字符数组,其存储的数据有两种含义,在这个节点被压缩时,其存储的是当前节点被压缩的节点,例如上面图中[foo]节点中的data里面存储的即为foo这三个字母。当前没有被压缩时,其存储的又是这个节点所有子节点所对应的字母,如上面图中[tb]节点里面存储的即为t节点和b节点。


  • ptr->child: 这个节点存储的是当前节点子节点的指针,当当前节点是压缩节点时,只会有一个指针,当其不是压缩节点时,子节点指针的数量即为size的大小
  • ptr->da: 这个是指向这个节点数据的指针。
typedef struct raxNode {uint32_t iskey:1;     /* Does this node contain a key? */uint32_t isnull:1;    /* Associated value is NULL (don't store it). */uint32_t iscompr:1;   /* Node is compressed. */uint32_t size:29;     /* Number of children, or compressed string len. */unsigned char data[];
} raxNode;





static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {raxNode *h = rax->head;raxNode **parentlink = &rax->head;size_t i = 0; /* Position in the string. */size_t j = 0; /* Position in the node children (or bytes if compressed).*/while(h->size && i < len) {debugnode("Lookup current node",h);unsigned char *v = h->data;if (h->iscompr) {for (j = 0; j < h->size && i < len; j++, i++) {if (v[j] != s[i]) break;}//此处表示当前这个字符串只匹配到当前节点的数据的中间,跳出了当前的循环if (j != h->size) break;} else {/* Even when h->size is large, linear scan provides good* performances compared to other approaches that are in theory* more sounding, like performing a binary search. */for (j = 0; j < h->size; j++) {if (v[j] == s[i]) break;}//这边表示没有找到当前字符串的char到下一个节点的数据if (j == h->size) break;i++;}if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. *///数据结构大概是这样的|raxNode|padding|ptr->ch1|ptr->ch2|...|raxNode **children = raxNodeFirstChildPtr(h);if (h->iscompr) j = 0; /* Compressed node only child is at index 0. *///这边是将第j个子节点的结构体的数据拷贝到hmemcpy(&h,children+j,sizeof(h));parentlink = children+j;//父节点指针j = 0; }debugnode("Lookup stop node is",h);if (stopnode) *stopnode = h;if (plink) *plink = parentlink;if (splitpos && h->iscompr) *splitpos = j;return i;



  1. 通过raxLowWalk这个方法找到当前的字符串匹配的最终的节点
  2. 查看当前节点是否完全匹配,如果在当前的前缀树上已经找到了对应的节点即可以对当前的节点进行赋值
  3. 对当前的节点进行分裂
  4. 根据传入的字符串继续创建新的节点


  • 一种是当前的字符串已经完全匹配到了,如在上面的前缀树[a]->[bcd]种插入一个ab字符串,这种情况下只需要将当前的[bcd]节点分成两块就可以了,即分为[a]->[b]->[cd]
  • 另外一种情况则是当前的字符串没有匹配完成,则需要再分裂一个单独的节点来存储新的字符,例如再上面的前缀树上加入abh字符串,则原来的节点则需要分裂成[a]->[b]->[c]->[d]。下面图来具体展示这两种方式。





int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {size_t i;int j = 0; /* Split position. If raxLowWalk() stops in a compressednode, the index 'j' represents the char we stopped within thecompressed node, that is, the position where to split thenode for insertion. */raxNode *h, **parentlink;debugf("### Insert %.*s with value %p\n", (int)len, s, data);i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);//这边i==len表示的是当前的字典树中已经存在这个字符串//而如果当前的节点没有压缩或者当前的节点不是在节点中间,//表示当前节点即是可以直接存储数据的节点,即将数据放入到对应的节点中if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {debugf("### Insert: node representing key exists\n");/* Make space for the value pointer if needed. */if (!h->iskey || (h->isnull && overwrite)) {//重新分配当前节点,多分配一个可以存储数据指针的空间h = raxReallocForData(h,data);//更新父节点指向当前节点的指针的数据,指向新的空间if (h) memcpy(parentlink,&h,sizeof(h));}if (h == NULL) {errno = ENOMEM;return 0;}/* Update the existing key if there is already one. *///当前的节点已经是key,则需要替换原来这个key上存储的数据if (h->iskey) {if (old) *old = raxGetData(h);if (overwrite) raxSetData(h,data);errno = 0;return 0; /* Element already exists. */}/* Otherwise set the node as a key. Note that raxSetData()* will set h->iskey. */raxSetData(h,data);rax->numele++;return 1; /* Element inserted. */}//当前节点需要进行分裂if (h->iscompr && i != len) {debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",h->size, h->data, (void*)h);debugf("Still to insert: %.*s\n", (int)(len-i), s+i);debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);debugf("Other (key) letter is '%c'\n", s[i]);/* 1: Save next pointer. *///指向当前节点所有指向孩子的指针中的最后一个raxNode **childfield = raxNodeLastChildPtr(h);raxNode *next;memcpy(&next,childfield,sizeof(next));debugf("Next is %p\n", (void*)next);debugf("iskey %d\n", h->iskey);if (h->iskey) {debugf("key value is %p\n", raxGetData(h));}/* Set the length of the additional nodes we will need. *///j是分裂开始的节点,从j开始的后续的数据分裂成一个新的节点size_t trimmedlen = j;//后半部分的长度大小size_t postfixlen = h->size - j - 1;int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;size_t nodesize;/* 2: Create the split node. Also allocate the other nodes we'll need*    ASAP, so that it will be simpler to handle OOM. *///此处的分配方式为trimmed->splitnode->postfix三个节点raxNode *splitnode = raxNewNode(1, split_node_is_key);raxNode *trimmed = NULL;raxNode *postfix = NULL;if (trimmedlen) {nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+sizeof(raxNode*);if (h->iskey && !h->isnull) nodesize += sizeof(void*);trimmed = rax_malloc(nodesize);}if (postfixlen) {nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+sizeof(raxNode*);postfix = rax_malloc(nodesize);}/* OOM? Abort now that the tree is untouched. */if (splitnode == NULL ||(trimmedlen && trimmed == NULL) ||(postfixlen && postfix == NULL)){rax_free(splitnode);rax_free(trimmed);rax_free(postfix);errno = ENOMEM;return 0;}//splitnode只有一个子节点,其数据存储的charsplitnode->data[0] = h->data[j];if (j == 0) {//把当前的数据赋值给splitnode,并且将父节点指向当前节点/* 3a: Replace the old node with the split node. */if (h->iskey) {void *ndata = raxGetData(h);raxSetData(splitnode,ndata);}memcpy(parentlink,&splitnode,sizeof(splitnode));} else {/* 3b: Trim the compressed node. */trimmed->size = j;//data的前半部分数据复制到当前节点memcpy(trimmed->data,h->data,j);trimmed->iscompr = j > 1 ? 1 : 0;trimmed->iskey = h->iskey;trimmed->isnull = h->isnull;//if (h->iskey && !h->isnull) {void *ndata = raxGetData(h);raxSetData(trimmed,ndata);}raxNode **cp = raxNodeLastChildPtr(trimmed);memcpy(cp,&splitnode,sizeof(splitnode));//把当前节点的指针放入到父节点的数据的位置memcpy(parentlink,&trimmed,sizeof(trimmed));parentlink = cp; /* Set parentlink to splitnode parent. */rax->numnodes++;}/* 4: Create the postfix node: what remains of the original* compressed node after the split. *///此处是对后半部分的处理,如果后半部分还是有数据的话,则将其加入到原来节点的//子节点的位置if (postfixlen) {/* 4a: create a postfix node. */postfix->iskey = 0;postfix->isnull = 0;postfix->size = postfixlen;postfix->iscompr = postfixlen > 1;memcpy(postfix->data,h->data+j+1,postfixlen);raxNode **cp = raxNodeLastChildPtr(postfix);memcpy(cp,&next,sizeof(next));rax->numnodes++;} else {/* 4b: just use next as postfix node. */postfix = next;}/* 5: Set splitnode first child as the postfix node. *///splitnode的子节点指向postfixraxNode **splitchild = raxNodeLastChildPtr(splitnode);memcpy(splitchild,&postfix,sizeof(postfix));/* 6. Continue insertion: this will cause the splitnode to* get a new child (the non common character at the currently* inserted key). */rax_free(h);h = splitnode;} else if (h->iscompr && i == len) {//这种情况只需要分trimmed->postfix两个节点/* ------------------------- ALGORITHM 2 --------------------------- */debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",h->size, h->data, (void*)h, j);/* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */size_t postfixlen = h->size - j;size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+sizeof(raxNode*);if (data != NULL) nodesize += sizeof(void*);raxNode *postfix = rax_malloc(nodesize);nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*);if (h->iskey && !h->isnull) nodesize += sizeof(void*);raxNode *trimmed = rax_malloc(nodesize);if (postfix == NULL || trimmed == NULL) {rax_free(postfix);rax_free(trimmed);errno = ENOMEM;return 0;}/* 1: Save next pointer. */raxNode **childfield = raxNodeLastChildPtr(h);raxNode *next;memcpy(&next,childfield,sizeof(next));/* 2: Create the postfix node. */postfix->size = postfixlen;postfix->iscompr = postfixlen > 1;postfix->iskey = 1;postfix->isnull = 0;memcpy(postfix->data,h->data+j,postfixlen);raxSetData(postfix,data);raxNode **cp = raxNodeLastChildPtr(postfix);memcpy(cp,&next,sizeof(next));rax->numnodes++;/* 3: Trim the compressed node. */trimmed->size = j;trimmed->iscompr = j > 1;trimmed->iskey = 0;trimmed->isnull = 0;memcpy(trimmed->data,h->data,j);memcpy(parentlink,&trimmed,sizeof(trimmed));if (h->iskey) {void *aux = raxGetData(h);raxSetData(trimmed,aux);}/* Fix the trimmed node child pointer to point to* the postfix node. */cp = raxNodeLastChildPtr(trimmed);memcpy(cp,&postfix,sizeof(postfix));/* Finish! We don't need to continue with the insertion* algorithm for ALGO 2. The key is already inserted. */rax->numele++;rax_free(h);return 1; /* Key inserted. */}/* We walked the radix tree as far as we could, but still there are left* chars in our string. We need to insert the missing nodes. */while(i < len) {raxNode *child;/* If this node is going to have a single child, and there* are other characters, so that that would result in a chain* of single-childed nodes, turn it into a compressed node. *///当前节点是一个空节点,即可将这个字符串剩余的数据都存入这个节点中if (h->size == 0 && len-i > 1) {debugf("Inserting compressed node\n");size_t comprsize = len-i;if (comprsize > RAX_NODE_MAX_SIZE)comprsize = RAX_NODE_MAX_SIZE;raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);if (newh == NULL) goto oom;h = newh;memcpy(parentlink,&h,sizeof(h));parentlink = raxNodeLastChildPtr(h);i += comprsize;} else {debugf("Inserting normal node\n");raxNode **new_parentlink;raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);if (newh == NULL) goto oom;h = newh;memcpy(parentlink,&h,sizeof(h));parentlink = new_parentlink;i++;}rax->numnodes++;h = child;}raxNode *newh = raxReallocForData(h,data);if (newh == NULL) goto oom;h = newh;if (!h->iskey) rax->numele++;raxSetData(h,data);memcpy(parentlink,&h,sizeof(h));return 1; /* Element inserted. */oom:/* This code path handles out of memory after part of the sub-tree was* already modified. Set the node as a key, and then remove it. However we* do that only if the node is a terminal node, otherwise if the OOM* happened reallocating a node in the middle, we don't need to free* anything. */if (h->size == 0) {h->isnull = 1;h->iskey = 1;rax->numele++; /* Compensate the next remove. */assert(raxRemove(rax,s,i,NULL) != 0);}errno = ENOMEM;return 0;




1. 找到这个字符串所对应的最后一个节点。
2. 将当前节点的isKey设置为0,并且向上这个字符串路径的其他的只有当前节点一个子节点的节点
3. 尝试将只剩下一个节点当前节点的上下节点进行压缩。


1. 从当前节点向上找到最上面最后一个可以压缩的节点
2. 从这个节点开始向下遍历,看有多个节点符合压缩条件
3. 如果可以压缩的节点大于1则创建新的压缩节点
4. 再次向下遍历,将符合条件的节点释放掉,并压缩到新创建的节点中
5. 将最后一个压缩了的节点的子节点的指针存放到新创建额压缩节点上,并且将新压缩的节点的父节点的指向字节点的位置改为新创建的压缩节点的指针

int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {raxNode *h;raxStack ts;debugf("### Delete: %.*s\n", (int)len, s);raxStackInit(&ts);int splitpos = 0;size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts);if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {raxStackFree(&ts);return 0;}if (old) *old = raxGetData(h);h->iskey = 0;rax->numele--;int trycompress = 0; /* Will be set to 1 if we should try to optimize thetree resulting from the deletion. */if (h->size == 0) {debugf("Key deleted in node without children. Cleanup needed.\n");raxNode *child = NULL;//向上删除只有一个节点的节点while(h != rax->head) {child = h;debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,(int)child->size, (char*)child->data, child->iskey);rax_free(child);rax->numnodes--;h = raxStackPop(&ts);/* If this node has more then one child, or actually holds* a key, stop here. */if (h->iskey || (!h->iscompr && h->size != 1)) break;}if (child) {debugf("Unlinking child %p from parent %p\n",(void*)child, (void*)h);raxNode *new = raxRemoveChild(h,child);if (new != h) {raxNode *parent = raxStackPeek(&ts);raxNode **parentlink;if (parent == NULL) {parentlink = &rax->head;} else {parentlink = raxFindParentLink(parent,h);}memcpy(parentlink,&new,sizeof(new));}/* If after the removal the node has just a single child* and is not a key, we need to try to compress it. *///当前节点删除到只有一个子节点,则尝试对当前节点进行压缩if (new->size == 1 && new->iskey == 0) {trycompress = 1;h = new;}}} else if (h->size == 1) {/* If the node had just one child, after the removal of the key* further compression with adjacent nodes is pontentially possible. */trycompress = 1;}/* Don't try node compression if our nodes pointers stack is not* complete because of OOM while executing raxLowWalk() */if (trycompress && ts.oom) trycompress = 0;if (trycompress) {debugf("After removing %.*s:\n", (int)len, s);debugnode("Compression may be needed",h);debugf("Seek start node\n");/* Try to reach the upper node that is compressible.* At the end of the loop 'h' will point to the first node we* can try to compress and 'parent' to its parent. */raxNode *parent;//向上找可以进行压缩的节点while(1) {parent = raxStackPop(&ts);//向上找可以压缩的节点if (!parent || parent->iskey ||(!parent->iscompr && parent->size != 1)) break;h = parent;debugnode("Going up to",h);}raxNode *start = h; /* Compression starting node. *//* Scan chain of nodes we can compress. */size_t comprsize = h->size;int nodes = 1;//遍历出可以压缩的节点数量while(h->size != 0) {raxNode **cp = raxNodeLastChildPtr(h);memcpy(&h,cp,sizeof(h));if (h->iskey || (!h->iscompr && h->size != 1)) break;/* Stop here if going to the next node would result into* a compressed node larger than h->size can hold. */if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;nodes++;comprsize += h->size;}//nodes数量大于1,表示有节点需要进行压缩,则创建新的节点用于存储最终压缩后的数据if (nodes > 1) {/* If we can compress, create the new node and populate it. */size_t nodesize =sizeof(raxNode)+comprsize+raxPadding(comprsize)+sizeof(raxNode*);raxNode *new = rax_malloc(nodesize);/* An out of memory here just means we cannot optimize this* node, but the tree is left in a consistent state. */if (new == NULL) {raxStackFree(&ts);return 1;}new->iskey = 0;new->isnull = 0;new->iscompr = 1;new->size = comprsize;rax->numnodes++;/* Scan again, this time to populate the new node content and* to fix the new node child pointer. At the same time we free* all the nodes that we'll no longer use. */comprsize = 0;h = start;//向下遍历,并且清理掉需要压缩的节点while(h->size != 0) {memcpy(new->data+comprsize,h->data,h->size);comprsize += h->size;raxNode **cp = raxNodeLastChildPtr(h);raxNode *tofree = h;memcpy(&h,cp,sizeof(h));rax_free(tofree); rax->numnodes--;if (h->iskey || (!h->iscompr && h->size != 1)) break;}debugnode("New node",new);/* Now 'h' points to the first node that we still need to use,* so our new node child pointer will point to it. */raxNode **cp = raxNodeLastChildPtr(new);//把最后一个节点的子节点索引拷贝到当前节点的子节点索引的位置memcpy(cp,&h,sizeof(h));//将新节点加入到父节点的索引位置中/* Fix parent link. */if (parent) {raxNode **parentlink = raxFindParentLink(parent,start);memcpy(parentlink,&new,sizeof(new));} else {rax->head = new;}debugf("Compressed %d nodes, %d total bytes\n",nodes, (int)comprsize);}}raxStackFree(&ts);return 1;


