The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

## param $namespace = 'rbhash';
## param $min_bits = 8;
## param $max_bits = 64;
## param @default_search_args= ('int (*cmp_callback)(void *, size_t)', 'void *userdata');
## param $default_search_cmp = "cmp_callback(userdata, node_id)";
## param @treeprint_args;
## param $debug= 0;
## param $feature_print= 1;
## param $feature_demo= 0;
## param $run_with_scissors= 0;
## param @public_includes = (
## qw( <stdint.h> <stdlib.h> <stdbool.h> <assert.h> ),
## qw( <stdio.h> <string.h> )x!!($feature_print || $feature_demo)
## );
## param @private_includes = qw( "rbhash.h" );
## my $NAMESPACE= uc($namespace);
## my @bits= map +(1<<$_), (log($min_bits)/log(2)) .. (log($max_bits)/log(2));
## sub log2($x) { log($x)/log(2) }
## sub word_type($bits) { 'uint'.$bits.'_t' }
## section PUBLIC;
## use CodeGen::Cpppp::Template "format_commandline";
/*
Generated by rbhash.cpppp using command
${{ format_commandline }}
*/
#include $_ ## for @public_includes
## section PRIVATE;
/*
Generated by rbhash.cpppp using command
${{ format_commandline }}
*/
#include $_ ## for @private_includes
// A setting that disables all the runtime sanity checks and safeguards
#ifndef ${NAMESPACE}_RUN_WITH_SCISSORS
#define ${NAMESPACE}_RUN_WITH_SCISSORS $run_with_scissors
#endif
#ifndef ${NAMESPACE}_ASSERT
/* The assertions of this library are fairly important since so much of the
* implementation is exposed to the rest of the program, so only actually
* remove the checks if RUN_WITH_SCISSORS is set.
*/
#if ${NAMESPACE}_RUN_WITH_SCISSORS
#define ${NAMESPACE}_ASSERT(x) (void)0
#elif defined(NDEBUG)
#define ${NAMESPACE}_ASSERT(x) if (!(x)) return 0
#else
#define ${NAMESPACE}_ASSERT(x) assert(x)
#endif
#endif
## my $assert= "${NAMESPACE}_ASSERT";
## section PUBLIC;
/* MAX_TREE_HEIGHT is the maximum number of nodes from root to leaf in any
* correctly balanced tree. The exact formula for the maximum height (including
* root node) is floor(2*log2(N/2+1)) for a tree of N nodes.
*/
## for my $bits (@bits) {
#define ${NAMESPACE}_MAX_ELEMENTS_$bits 0x${{ sprintf "%X", (1<<($bits-1))-1 }}
#define ${NAMESPACE}_MAX_TREE_HEIGHT_$bits ${{ int(2*log2((2**($bits-1)-1)/2+1)) }}
## }
/* This macro tells you the word offset (treating rbhash as an array of words)
* of the first hash bucket.
*/
#define ${NAMESPACE}_TABLE_WORD_OFS(capacity) ( (capacity)*2 + 2 )
/* This macro selects the word size needed to index 'capacity' number of
* user elements.
*/
#define ${NAMESPACE}_SIZEOF_WORD(capacity) ( \
## for my $bits (@bits) {
## if ($bits < $max_bits) {
(capacity) <= ${NAMESPACE}_MAX_ELEMENTS_$bits? ${{ $bits/8 }} : \
## } else {
${{ $bits/8 }} \
## }
## }
)
/* This macro defines the total size (in bytes) of the rbhash storage
* for a given number of elements and buckets. This does not include
* the user's elements themselves, since those are whatever size the
* user wants them to be, and rbhash doesn't need to know.
*/
#define ${NAMESPACE}_SIZEOF(capacity, buckets) ( \
${NAMESPACE}_SIZEOF_WORD(capacity) \
* ( ${NAMESPACE}_TABLE_WORD_OFS(capacity) + buckets ) \
)
/* Several functions can operate on a "path", which is a list of
* references starting at the bucket and ending at a tree node.
* The path is allocated to the maximum depth that a tree of that
* word-bits-size could reach. Since this drastically affects the
* amount of stack used, a struct is declared for each word-bit size.
*
* The structs each record their length so that they can be passed
* interchangably to the functions. You could even allocate custom
* lengths with alloca, but that seems overcomplicated.
*/
## for my $bits (@bits) {
struct ${namespace}_path_${bits} {
uint8_t len, lim;
size_t refs[${NAMESPACE}_MAX_TREE_HEIGHT_${bits}];
};
## section PRIVATE;
void ${namespace}_path_${bits}_init(struct ${namespace}_path_${bits} *p);
## section PUBLIC;
inline void ${namespace}_path_${bits}_init(struct ${namespace}_path_${bits} *p) {
p->len= 0;
p->lim= ${NAMESPACE}_MAX_TREE_HEIGHT_${bits};
}
## }
// Different template output may end up with different structs claiming
// the name of ${namespace}_path, but that should be OK.
typedef struct ${namespace}_path_${max_bits} ${namespace}_path;
#define ${namespace}_path_init(p) ${namespace}_path_${max_bits}_init(p)
// Iterate one or more places through the R/B tree of a bucket, updating 'path'
extern size_t ${namespace}_path_step(void *rbhash, size_t capacity, ${namespace}_path *path, int ofs);
// Exchange the tree node of one node_id (at the end of 'path') for another node_id
extern size_t ${namespace}_path_swap(void *rbhash, size_t capacity, ${namespace}_path *path, size_t new_node_id);
// implementation detail used to reduce size of inline functions
extern size_t ${namespace}_capacity_bounds_assertion(size_t capacity);
// Add a node_id at the end of 'path', and balance the tree if needed
## for my $bits (@bits) {
## my $word_t= word_type($bits);
extern size_t ${namespace}_path_insert_$bits($word_t *rbhash, ${namespace}_path *path, size_t node_id);
## }
inline size_t ${namespace}_path_insert(void *rbhash, size_t capacity, ${namespace}_path *path, size_t node) {
return
## for my $bits (@bits) {
## my $word_t= word_type($bits);
(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits)? ${namespace}_path_insert_$bits(($word_t*) rbhash, path, node) :
## }
${namespace}_capacity_bounds_assertion(capacity);
}
// Remove the node_id from the end of 'path', and balance the tree if needed
## for my $bits (@bits) {
## my $word_t= word_type($bits);
extern size_t ${namespace}_path_delete_$bits($word_t *rbhash, ${namespace}_path *path);
## }
inline size_t ${namespace}_path_delete(void *rbhash, size_t capacity, ${namespace}_path *path) {
## for my $bits (@bits) {
## my $word_t= word_type($bits);
(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits)? ${namespace}_path_delete_$bits(($word_t*) rbhash, path) :
## }
${namespace}_capacity_bounds_assertion(capacity);
}
## # Define a search API for a custom comparison function
## # Parameters:
## # sub_ns: namespace component to be appended to the rbhash namespace
## # api_args: arrayref of C function parameters (strings) added to each search function
## # cmp: a string of C that calls a function to compare node_id with the search_args
## # header: if set, emits the API declarations into that named section. default is 'public'
## # unit: if set, emits the implementation into that named section. default is the current section.
##
## sub define_search_api(%options) {
## my $search_ns= $namespace;
## $search_ns .= '_' . $options{sub_ns} if length($options{sub_ns}//'');
## my @search_args= $options{api_args} or die "'search_args' are required\n";
## my $search_cmp= $options{cmp} or die "'cmp' is required\n";
##
## my $orig_section= $self->current_output_section;
## section($options{header} || PUBLIC);
// Find a node_id matching the search criteria and fill in the 'path' to it, else return 0
extern size_t ${search_ns}_find_path(void *rbhash, size_t capacity, ${namespace}_path *path, size_t bucket_idx, @search_args);
// Simplified search for node_id, without building a path
extern size_t ${search_ns}_find(void *rbhash, size_t capacity, size_t bucket_idx, @search_args);
// Insert a node_id, unless one already matches the search criteria
extern size_t ${search_ns}_insert(void *rbhash, size_t capacity, size_t node_id, size_t bucket_idx, @search_args);
// Delete a node matching the search criteria
extern size_t ${search_ns}_delete(void *rbhash, size_t capacity, size_t bucket_idx, @search_args);
## section($options{unit} || $orig_section);
/* Find a node in the hash table, or tree. Returns the node_id, or 0 if no
* nodes match.
*
* This is a simplified version of find_path that doesn't keep track of the
* path through the tree, saving time but not facilitating inserts or deletes.
*/
size_t ${search_ns}_find(
void *rbhash, size_t capacity, size_t bucket_idx,
@search_args
) {
size_t node_id= 0;
int cmp;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
node_id= (($word_t *)rbhash)[ ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx ] >> 1;
while (node_id && (cmp= $search_cmp))
node_id= (($word_t *)rbhash)[ (node_id<<1) | (cmp < 0? 0 : 1) ] >> 1;
}
## }
else {
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
}
return node_id;
}
/* Find a node in the hash table, and record the path to arrive at the node
* or the node pointer where it would exist. The path can be used for
* inserting or deleting without re-comparing any elements.
*
* The path should already have been initialized using `${namespace}_path_init`.
*/
size_t ${search_ns}_find_path(
void *rbhash, size_t capacity, ${namespace}_path *path, size_t bucket_idx,
@search_args
) {
size_t ref, node_id= 0;
int cmp, p_i= 0, p_lim= path->lim;
path->len= 0; // in case $assert calls 'return 0'
$assert(p_lim > 0);
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash;
path->refs[0]= ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx;
node_id= rbhash_w[ path->refs[0] ] >> 1;
while (node_id && (cmp= $search_cmp)) {
ref= (node_id<<1) | (cmp < 0? 0 : 1);
++p_i;
$assert(p_i < p_lim);
path->refs[p_i]= ref;
node_id= rbhash_w[ref] >> 1;
}
}
## }
else {
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
}
path->len= p_i+1;
return node_id;
}
/* Insert a node into the hashtable, storing collisions in a tree.
* If it finds a node with same key, it returns that index and does not insert
* the new node, else it will insert and return your 'new_node' value.
* If it returns node 0, you have a corrupted data structure.
*/
extern size_t ${search_ns}_insert(
void *rbhash, size_t capacity, size_t at_node_id, size_t bucket_idx,
@search_args
) {
size_t node_id= 0, ref= ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx;
int cmp, p_i= 0, p_lim;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash;
node_id= rbhash_w[ref] >> 1;
if (!node_id) {
rbhash_w[ref]= at_node_id << 1;
return at_node_id;
}
else {
struct ${namespace}_path_${bits} path;
${namespace}_path_${bits}_init(&path);
p_lim= path.lim;
path.refs[0]= ref;
do {
if (!(cmp= $search_cmp))
return node_id;
ref= (node_id<<1) | (cmp < 0? 0 : 1);
++p_i;
$assert(p_i < p_lim);
path.refs[p_i]= ref;
node_id= rbhash_w[ref] >> 1;
} while (node_id);
node_id= at_node_id;
// Handle simple case of adding to black parent without invoking balance.
if (!(rbhash_w[path.refs[p_i-1]] & 1)) {
rbhash_w[ref]= (node_id << 1) | 1;
return node_id;
}
path.len= p_i+1;
return ${namespace}_path_insert_$bits(rbhash_w, (${namespace}_path*) &path, node_id);
}
}
## }
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
return 0;
}
/* Find and delete a node in the hashtable. If found, this returns the node_id
* that was removed. If not found (or if the data structure is currupt) this
* returns 0. It may also return 0 if an assertion fails and you have disabled
* aborting on assertions.
*/
extern size_t ${search_ns}_delete(
void *rbhash, size_t capacity, size_t bucket_idx,
@search_args
) {
size_t cur= 0, ref= ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx;
int cmp, p_i= 0, p_lim;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash;
if ((cur= rbhash_w[ref])) {
struct ${namespace}_path_${bits} path;
${namespace}_path_${bits}_init(&path);
p_lim= path.lim;
path.refs[0]= ref;
#define node_id (cur >> 1)
while ((cmp= $search_cmp)) {
#undef node_id
ref= (cur|1) ^ (cmp < 0? 1 : 0);
cur= rbhash_w[ref];
if (!cur)
return 0;
++p_i;
$assert(p_i < p_lim);
path.refs[p_i]= ref;
}
path.len= p_i+1;
return ${namespace}_path_delete_$bits(rbhash_w, (${namespace}_path*) &path);
}
}
## }
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_${max_bits});
return 0;
}
## section $orig_section;
## } # end define_search_api
##
## section PRIVATE;
/* Only called when capacity is out of bounds. Used by the inline bit-selectors. */
extern size_t ${namespace}_capacity_bounds_assertion(size_t capacity) {
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
return 0;
}
## define_search_api(
## api_args => \@default_search_args,
## cmp => $default_search_cmp,
## );
/* Given a path to a node, make that path point to a new node assuming that
* the new node contains the same search key that the old node used to.
* This is used when moving array elements to a new index where the NodeID
* would be different.
*
* This is a O(1) operation, though building the path probably took O(log N).
*
* Returns the NodeID that the path previously ended with, and the path has
* been modified to end with new_node_id and is still valid. May return
* 0 if an assertion fails and you have disabled assertions.
*/
extern size_t ${namespace}_path_swap(
void *rbhash, size_t capacity, ${namespace}_path *path, size_t new_node_id
) {
size_t ref;
/* path must point to a node, and new_node_id must not be the sentinel */
$assert(path->len > 0 && new_node_id);
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $nodeint_t= $bits < 64? 'uint'.($bits*2).'_t' : undef;
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash, prev;
// It is an error if new_node_id is not already zeroed
## if ($nodeint_t) {
$assert((($nodeint_t*) rbhash)[new_node_id] == 0);
## } else {
$assert(rbhash_w[new_node_id << 1] == 0 && rbhash_w[(new_node_id << 1)|1] == 0);
## }
// Swap the references
ref= path->refs[path->len-1];
prev= rbhash_w[ref];
rbhash_w[ref]= (new_node_id << 1) | (prev&1);
## if ($nodeint_t) {
(($nodeint_t*) rbhash)[new_node_id]= (($nodeint_t*) rbhash)[prev>>1];
// and clear out the 'prev' before returning it
(($nodeint_t*) rbhash)[prev>>1]= 0;
## } else {
rbhash_w[new_node_id << 1]= rbhash_w[prev >> 1 << 1];
rbhash_w[(new_node_id << 1) | 1]= rbhash_w[prev|1];
// and clear out the 'prev' before returning it
rbhash_w[prev >> 1 << 1]= 0;
rbhash_w[prev|1]= 0;
## }
return prev >> 1;
}
## }
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_${max_bits});
return 0;
}
/* Insert a node_id at the end of a path which points to the Sentinel.
* The final ref will be updated to point to node_id, and then the tree
* will be balanced according to the Red/Black algorithm. The path is
* destroyed in the process and should not be used after this call.
* (the path could be updated during balance rotations, but would add
* overhead and users are unlikely to need it afterward anyway)
*
* Returns the node_id on success, or 0 on an assertion failure if you
* disabled assertions.
*/
## for my $bits (@bits) {
## my $word_t= word_type($bits);
extern size_t ${namespace}_path_insert_$bits(
$word_t *rbhash, ${namespace}_path *path, size_t node_id
) {
/*
Legend for deciphering crazy bitwise operations below:
X_ref - the index within the rbhash array which holds X
X= rbhash[X_ref] - an integer of ((node_id << 1) | red)
i.e. if pos is like a pointer with embedded color information,
pos_ref is like a pointer to that pointer.
X & 1 - 1 = red, 0 = black
X >> 1 - the node_id X is pointing to
If X_ref is from a tree node (true for all path->refs[i > 0]) then
the location of the ref also indicates which node it belongs to,
by virtue of nodes being located at rbhash[node_id*2].
X_ref >> 1 - the node_id of the parent of X
X_ref & 1 - true if X is the right-subtree of its parent
X_ref ^ 1 - a ref to the sibling of X
X | 1 - the ref to X's node_id's right subtree
X >> 1 << 1 - the ref to X's node_id's left subtree
(X|1) ^ 1 - same, maybe optimized if (X|1) is already in a register
X ^ 1 - a shortcut for one of the above when the color is known
*/
int p_i= path->len - 1;
// Empty paths or paths ending with a non-Sentinel reference are invalid.
$assert(path->len > 0 && rbhash[path->refs[p_i]] == 0);
// Add new_node to the final parent-ref of the path.
// If p_i is 0, this will be altering the hash bucket.
rbhash[path->refs[p_i--]]= (node_id << 1) | 1; // and make it red
// 'pos' will be the parent node of that.
while (p_i > 0) {
$word_t pos_ref= path->refs[p_i--];
$word_t pos= rbhash[pos_ref];
$word_t parent_ref= path->refs[p_i];
// if current is a black node, no rotations needed
if (!(pos & 1))
break;
// pos is red, its new child is red, and parent will be black.
// if the sibling is also red, we can pull down the color black from the parent
// if not, need a rotation.
if (!(rbhash[pos_ref^1]&1)) {
// Sibling is black, need a rotation
// if the imbalanced child (red node) is on the same side as the parent,
// need to rotate those lower nodes to the opposite side in preparation
// for the rotation.
// e.g. if pos_ref is leftward (even) and pos's rightward child (odd) is the red one...
$word_t child_ref= pos ^ (pos_ref&1);
$word_t child= rbhash[child_ref];
if (child&1) {
// rotate pos toward [side] so parent's [side] now points to pos's [otherside]
// set pos's child-ref to child's [otherside] ref
$word_t near_grandchild_ref= child ^ (child_ref&1);
rbhash[child_ref]= rbhash[near_grandchild_ref];
// set child's [side] to pos
rbhash[near_grandchild_ref]= pos;
pos= child; // keep pos as a red node, soon to become black
rbhash[pos_ref]= child;
// parent's [side] has not been updated here, but is about to become 'child'
child_ref= near_grandchild_ref^1;
child= rbhash[child_ref];
}
// Now we can rotate toward parent to balance the tree.
rbhash[pos_ref]= child;
rbhash[child_ref]= pos_ref|1; // = parent, colored red. simplification of ((pos_ref>>1)<<1)|1
rbhash[parent_ref]= pos^1; // also make pos black
// rotation finished, exit.
break;
}
rbhash[pos_ref^1] ^= 1; // toggle color of sibling
rbhash[pos_ref]= pos^1; // toggle color of pos
rbhash[parent_ref] ^= 1; // toggle color of parent
// Now pos is black.
// Jump twice up the tree so that once again, pos has one red child.
p_i--;
}
// Root of tree is always black
if (rbhash[path->refs[0]] & 1)
rbhash[path->refs[0]] ^= 1;
#if !${NAMESPACE}_RUN_WITH_SCISSORS
// Path is no longer valid, because rotations may have destroyed it.
path->len= 0;
#endif
return node_id;
}
## }
/* Delete a node at the end of a path. The path must end with a non-Sentinel
* reference, and must also be allocated to the maximum height of the tree,
* because the node you want to delete might need to be replaced by a node
* deeper in the tree. The tree will be re-balanced using the Red/Black
* algorithm. If this is the last node in the tree, it clears the hash bucket.
*
* The path is destroyed in the process, as rotations and node-replacement
* occur. You may not use the path afterward, even if the function fails.
* (the path could be updated during balance rotations, but would add
* overhead and users are unlikely to need it afterward anyway)
*
* Returns the deleted node_id on success, or 0 on an assertion failure if
* you disabled assertions.
*/
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $nodeint_t= $bits < 64? 'uint'.($bits*2).'_t' : undef;
## my sub clear_lsb($expr) { $expr= "($expr)" if $expr =~ /\W/; "($expr >> 1 << 1)" }
extern size_t ${namespace}_path_delete_$bits($word_t *rbhash, ${namespace}_path *path) {
// See ${namespace}_path_insert for the notes on the bitwise operations
$word_t pos, ch1, ch2, sibling;
int p_i= path->len-1, p_lim= path->lim;
size_t *parent_refs= path->refs, ref, pos_ref;
// Path should be at least 1 element (the bucket root ref)
$assert(path->len >= 1);
// Read the final ref to find 'pos_ref' and 'pos'
pos_ref= parent_refs[p_i];
pos= rbhash[pos_ref];
// Path must point to a non-sentinel node
$assert(pos != 0);
// If pos has children, find a leaf to swap with.
// Then delete this node in the leaf's position.
// Note that normal red/black would delete the element first, then swap, but if we do that
// a rotation could change the path->refs putting the node-to-delete somwhere else.
ch1= rbhash[pos], ch2= rbhash[pos ^ 1];
if (ch1 || ch2) {
if (ch1 && ch2) {
int orig_p_i= p_i;
$word_t alt= pos, alt2;
// Descend one level to the left.
// The path should always have room for this additional reference if it
// was allocated to max-tree-height and the tree is actually balanced.
++p_i;
$assert(p_i < p_lim);
parent_refs[p_i]= ref= ${{ clear_lsb('pos') }}; // go left;
alt= rbhash[ref]; // either ch1 or ch2, but now we know it's the left one
// descend as many levels as possible to the right
while ((alt= rbhash[ref= alt | 1])) {
++p_i;
$assert(p_i < p_lim);
parent_refs[p_i]= ref;
}
// 'alt' is the node we swap with.
alt= rbhash[parent_refs[p_i]];
// is there one to the left?
if ((alt2= rbhash[${{ clear_lsb('alt') }}])) {
$assert(alt2 & 1);
// it is required to be a red leaf, so replace alt with it
rbhash[parent_refs[p_i]]= alt2 ^ 1;
## if ($nodeint_t) {
(($nodeint_t *)rbhash)[alt2 >> 1]= 0;
// Now substitute this for pos and we're done.
(($nodeint_t *)rbhash)[alt >> 1]= (($nodeint_t *)rbhash)[pos >> 1];
## } else {
rbhash[alt2]= 0;
rbhash[alt2 ^ 1]= 0;
// Now substitute this for pos and we're done.
rbhash[alt | 1]= rbhash[pos | 1];
rbhash[(alt | 1) ^ 1]= rbhash[(pos | 1) ^ 1];
## }
rbhash[pos_ref]= ${{ clear_lsb('alt') }} | (pos & 1); // preserve color of pos
goto done;
}
else {
// swap colors of alt and pos
alt ^= pos & 1;
pos ^= alt & 1;
alt ^= pos & 1;
## if ($nodeint_t) {
(($nodeint_t *)rbhash)[alt >> 1]= (($nodeint_t *)rbhash)[pos >> 1];
## } else {
rbhash[alt | 1]= rbhash[pos | 1]; // copy right
rbhash[(alt | 1) ^ 1]= rbhash[(pos | 1) ^ 1]; // copy left
## }
rbhash[pos_ref]= alt;
// the parent ref at orig_p_i+1 just changed address, so update that
// (and this affects the next line if alt was a child of pos)
parent_refs[orig_p_i + 1]= ${{ clear_lsb('alt') }}; // was left branch at that point
pos_ref= parent_refs[p_i];
}
}
else {
// Node is black with one child. Swap with it.
rbhash[pos_ref]= ${{ clear_lsb('ch1 | ch2') }}; // and make it black
goto done;
}
}
// Remove it.
rbhash[pos_ref]= 0;
// It was a black node with no children. Now it gets interesting.
if (!(pos & 1)) {
// The tree must have the same number of black nodes along any path from root
// to leaf. We want to remove a black node, disrupting the number of black
// nodes along the path from the root to the current leaf. To correct this,
// we must either reduce all other paths, or add a black node to the current
// path.
// Loop until the current node is red, or until we get to the root node.
sibling= rbhash[pos_ref ^ 1];
--p_i; // p_i is now the index of the ref to the parent
while (p_i >= 0) {
size_t near_nephew_ref;
$word_t near_nephew;
// If the sibling is red, we are unable to reduce the number of black
// nodes in the sibling tree, and we can't increase the number of black
// nodes in our tree.. Thus we must do a rotation from the sibling
// tree to our tree to give us some extra (red) nodes to play with.
// This is Case 1 from the text
if (sibling & 1) {
// Node is black and sibling is red. Get ref to sibling's near subtree
near_nephew_ref= (sibling ^ 1) | (pos_ref & 1);
// sibling is new parent, and now black.
rbhash[parent_refs[p_i]]= sibling ^ 1;
// move sibling's child under parent, becoming new sibling (which is black)
sibling= rbhash[near_nephew_ref];
rbhash[pos_ref ^ 1]= sibling;
rbhash[near_nephew_ref]= pos_ref | 1; // former sibling sameside tree = parent, now red
++p_i;
$assert(p_i < p_lim);
parent_refs[p_i] = near_nephew_ref; // insert new parent into list
}
// sibling will be black here
// If the sibling is black and both children are black, we have to
// reduce the black node count in the sibling's tree to match ours.
// This is Case 2a from the text.
near_nephew_ref= sibling | (pos_ref & 1);
near_nephew= rbhash[near_nephew_ref];
if (!((near_nephew|rbhash[near_nephew_ref ^ 1]) & 1)) {
$assert(sibling > 1);
rbhash[pos_ref ^ 1] |= 1; // change sibling to red
// Now we move one level up the tree to continue fixing the other branches
if (p_i < 1)
break;
pos_ref= parent_refs[p_i--];
if (rbhash[pos_ref] & 1) {
// Now, make the current node black (to fulfill Case 2b)
rbhash[pos_ref] ^= 1;
break;
}
sibling= rbhash[pos_ref ^ 1];
}
else {
// sibling will be black with 1 or 2 red children here
// If one of the sibling's children are red, we again can't make the
// sibling red to balance the tree at the parent, so we have to do a
// rotation. If the "near" nephew is red and the "far" nephew is
// black, we need to rotate that tree away before rotating the
// parent toward.
// After doing a rotation and rearranging a few colors, the effect is
// that we maintain the same number of black nodes per path on the far
// side of the parent, and we gain a black node on the current side,
// so we are done.
if (near_nephew & 1) {
// Case 3 from the text, double rotation
size_t tmp_ref= near_nephew ^ (pos_ref & 1); // near nephew's far child
rbhash[near_nephew_ref]= rbhash[tmp_ref];
rbhash[pos_ref ^ 1]= near_nephew;
rbhash[tmp_ref]= sibling;
sibling= near_nephew ^ 1; // make it black
near_nephew_ref= sibling | (pos_ref & 1);
}
else
rbhash[near_nephew_ref ^ 1] ^= 1; // far nephew becomes black
// now Case 4 from the text
$assert(sibling > 1);
rbhash[pos_ref ^ 1]= rbhash[near_nephew_ref];
// parent becomes black, balancing current path
rbhash[near_nephew_ref]= ${{ clear_lsb('pos_ref') }};
// Sibling assumes parent's color and position
rbhash[parent_refs[p_i]]= sibling | (rbhash[parent_refs[p_i]] & 1);
break;
}
}
}
done:
// Ensure root-ref is black
if (rbhash[parent_refs[0]] & 1)
rbhash[parent_refs[0]] ^= 1;
// clean the 'pos' node for future use
## if ($nodeint_t) {
(($nodeint_t *)rbhash)[pos >> 1]= 0;
## } else {
rbhash[pos]= 0;
rbhash[pos ^ 1]= 0;
## }
#if !${NAMESPACE}_RUN_WITH_SCISSORS
// Path is no longer valid, because rotations may have destroyed it.
path->len= 0;
#endif
return pos >> 1;
}
## }
## if ($feature_print) {
## section PUBLIC;
// Handy for gdb:
// p ${namespace}_print(rbhash, capacity, buckets, NULL, NULL, stdout)
extern void ${namespace}_print(void *rbhash, size_t capacity, size_t n_buckets,
void (*print_node)(void*,size_t,FILE*), void* userdata, FILE *out);
## section PRIVATE;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
// Handy for gdb: "p ${namespace}_treeprint_$bits(rbhash, capacity, i, i, NULL, NULL, stdout)"
static size_t ${namespace}_print_tree_$bits(
$word_t *rbhash, $word_t max_node, $word_t node, $word_t mark_node,
void (*print_node)(void*,size_t,FILE*), void* userdata, FILE * out
) {
$word_t node_path[ 1+${NAMESPACE}_MAX_TREE_HEIGHT_$bits ];
bool cycle;
int i, pos, step= 0;
size_t nodecount= 0;
if (!node) {
fputs("(empty tree)\n", out);
return 0;
}
node_path[0]= 0;
node_path[pos= 1]= node << 1;
while (node && pos) {
switch (step) {
case 0:
// Check for cycles
cycle= false;
for (i= 1; i < pos; i++)
if ((node_path[i]>>1) == (node_path[pos]>>1))
cycle= true;
// Proceed down right subtree if possible
if (!cycle && pos < ${NAMESPACE}_MAX_TREE_HEIGHT_$bits
&& node <= max_node && rbhash[(node<<1)|1]
) {
node= rbhash[(node<<1)|1] >> 1;
node_path[++pos]= node << 1;
continue;
}
case 1:
// Print tree branches for nodes up until this one
for (i= 2; i < pos; i++)
fputs((node_path[i]&1) == (node_path[i+1]&1)? " " : " |", out);
if (pos > 1)
fputs((node_path[pos]&1)? " `" : " ,", out);
// Print content of this node
fprintf(out, "--%c%c%c #%ld%s ",
(node == mark_node? '(' : '-'),
(node > max_node? '!' : (rbhash[ (node_path[pos-1]|1) ^ (node_path[pos]&1) ]&1)? 'R':'B'),
(node == mark_node? ')' : ' '),
(long) node,
cycle? " CYCLE DETECTED"
: pos >= ${NAMESPACE}_MAX_TREE_HEIGHT_$bits? " MAX DEPTH EXCEEDED"
: node > max_node? " VALUE OUT OF BOUNDS"
: ""
);
if (print_node) print_node(userdata, node, out);
fputs("\n", out);
++nodecount;
// Proceed down left subtree if possible
if (!cycle && pos < ${NAMESPACE}_MAX_TREE_HEIGHT_$bits
&& node <= max_node && rbhash[node<<1]
) {
node= rbhash[node<<1] >> 1;
node_path[++pos]= (node << 1) | 1;
step= 0;
continue;
}
case 2:
// Return to parent
step= (node_path[pos]&1) + 1;
node= node_path[--pos] >> 1;
cycle= false;
}
}
return nodecount;
}
## }
void ${namespace}_print(
void *rbhash, size_t capacity, size_t n_buckets,
void (*print_node)(void*,size_t,FILE*), void* userdata, FILE *out
) {
size_t used= 0, collision= 0, empty=0, i;
fprintf(out, "# rbhash for capacity=%ld: %ld hash buckets, %ld bytes\n"
"--------------------\n",
(long) capacity, (long) n_buckets, (long) ${NAMESPACE}_SIZEOF(capacity, n_buckets));
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *nodes= ($word_t*) rbhash;
$word_t *table= nodes + ${NAMESPACE}_TABLE_WORD_OFS(capacity);
for (i= 0; i < n_buckets; i++) {
if (table[i]) {
if (empty) {
fprintf(out, "(%ld empty buckets)\n", (long) empty);
empty= 0;
}
++used;
collision += ${namespace}_print_tree_$bits(rbhash, capacity, table[i]>>1, 0, print_node, userdata, out) - 1;
} else
++empty;
}
if (empty) {
fprintf(out, "(%ld empty buckets)\n", (long) empty);
empty= 0;
}
}
## }
fprintf(out, "--------------------\n"
"# used %ld/%ld buckets, %ld collisions\n",
(long) used, (long) n_buckets, (long) collision);
}
## }
## if ($feature_demo) {
struct userdata {
int *el;
int el_count, el_alloc;
int key;
};
int hash_function(int x) { return x; }
int cmp_el(void *data_p, size_t node) {
struct userdata *data= (struct userdata *) data_p;
return data->key < data->el[node-1]? -1 : data->key > data->el[node-1]? 1 : 0;
}
void print_node(void *data_p, size_t node, FILE *out) {
struct userdata *data= (struct userdata *) data_p;
fprintf(out, "%ld", (long) data->el[node-1]);
}
int userdata_insert(struct userdata *data, int value) {
int next= data->el_count;
size_t node_id;
data->key= value;
node_id= ${namespace}_insert(data->el + data->el_alloc, data->el_alloc,
next+1, hash_function(value) % data->el_alloc,
cmp_el, data
);
if (node_id == next+1) {
data->el[data->el_count++]= value;
return next;
}
return -1;
}
void userdata_extend(struct userdata *data) {
int i, lim, n= data->el_alloc? data->el_alloc << 1 : 16;
int *el= (int*) malloc(n*sizeof(int) + ${NAMESPACE}_SIZEOF(n,n));
if (!el) { perror("malloc"); abort(); }
memset(el+n, 0, ${NAMESPACE}_SIZEOF(n,n));
if (data->el) {
memcpy(el, data->el, data->el_alloc * sizeof(int));
free(data->el);
}
data->el= el;
data->el_alloc= n;
for (i= 0, lim= data->el_count, data->el_count= 0; i < lim; i++)
if (userdata_insert(data, data->el[i]) < i) { printf("BUG: insert failed\n"); abort(); }
}
int userdata_delete(struct userdata *data, int value) {
size_t node_id, node_id2;
data->key= value;
node_id= ${namespace}_delete(data->el + data->el_alloc, data->el_alloc,
hash_function(value) % data->el_alloc, cmp_el, data);
if (node_id) {
// If it wasn't the final node, swap this node with the final one
// and swap the element to match.
if (node_id != data->el_count) {
${namespace}_path p;
${namespace}_path_init(&p);
data->key= data->el[node_id-1]= data->el[data->el_count-1];
node_id2= ${namespace}_find_path(data->el + data->el_alloc, data->el_alloc, &p,
hash_function(data->key) % data->el_alloc, cmp_el, data);
if (node_id2 != data->el_count)
return -1;
node_id2= ${namespace}_path_swap(data->el + data->el_alloc, data->el_alloc, &p, node_id);
if (node_id2 != data->el_count)
return -1;
}
data->el_count--;
}
return node_id - 1;
}
int main() {
struct userdata data= { NULL, 0, 0, 0 };
userdata_extend(&data);
int value, idx;
fputs("Demo on 16-element array of int.\n"
"Each integer is used as its own hash code.\n"
"Trigger collisions using multiples of the table size.\n",
stdout);
while (!feof(stdin)) {
fputs("\nEnter a number (negative to delete): ", stdout);
fflush(stdout);
if (!scanf("%d", &value)) return 0;
if (value < 0) {
idx= userdata_delete(&data, -value);
if (idx >= 0) {
rbhash_print(data.el + data.el_alloc, data.el_alloc, data.el_alloc, print_node, &data, stdout);
printf("Deleted el[%ld]\n", (long)idx);
}
else printf("Not found, or err\n");
}
else if (value > 0) {
if (data.el_count >= data.el_alloc) {
if (data.el_count >= ${NAMESPACE}_MAX_ELEMENTS_${max_bits})
printf("Array full\n");
else
userdata_extend(&data);
}
idx= userdata_insert(&data, value);
if (idx >= 0) {
rbhash_print(data.el + data.el_alloc, data.el_alloc, data.el_alloc, print_node, &data, stdout);
printf("inserted at el[%d]\n", idx);
}
else if (idx < 0)
printf("insert failed, tree corrupt?\n");
else
printf("already exists at el[%d]\n", idx);
}
}
fputs("\n", stdout);
}
## }