summaryrefslogtreecommitdiff
path: root/st.c
diff options
context:
space:
mode:
-rw-r--r--st.c258
1 files changed, 185 insertions, 73 deletions
@@ -90,6 +90,11 @@
o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
bins depending on the current hash table size.
This implementation speeds up the Ruby hash table benchmarks in
average by more 40% on Intel Haswell CPU.
@@ -174,6 +179,15 @@ static const struct st_hash_type type_strcasehash = {
#define PTR_EQUAL(tab, ptr, hash_val, key_) \
((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
/* Features of a table. */
struct st_features {
/* Power of 2 used for number of allocated entries. */
@@ -380,6 +394,11 @@ set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
/* Mark I-th bin of table TAB as corresponding to a deleted table
entry. Update number of entries in the table and number of bins
corresponding to deleted entries. */
@@ -823,17 +842,22 @@ secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
search. Return the index of the found entry in array `entries`.
- If it is not found, return UNDEFINED_ENTRY_IND. */
static inline st_index_t
find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
{
st_index_t i, bound;
st_table_entry *entries;
bound = tab->entries_bound;
entries = tab->entries;
for (i = tab->entries_start; i < bound; i++) {
- if (PTR_EQUAL(tab, &entries[i], hash_value, key))
return i;
}
return UNDEFINED_ENTRY_IND;
@@ -845,10 +869,12 @@ find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
/*#define QUADRATIC_PROBE*/
/* Return index of entry with HASH_VALUE and KEY in table TAB. If
- there is no such entry, return UNDEFINED_ENTRY_IND. */
static st_index_t
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@@ -869,10 +895,13 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
- if (! EMPTY_OR_DELETED_BIN_P(bin)
- && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
- break;
- else if (EMPTY_BIN_P(bin))
return UNDEFINED_ENTRY_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@@ -887,10 +916,12 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
/* Find and return index of table TAB bin corresponding to an entry
with HASH_VALUE and KEY. If there is no such bin, return
- UNDEFINED_BIN_IND. */
static st_index_t
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@@ -911,10 +942,13 @@ find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
- if (! EMPTY_OR_DELETED_BIN_P(bin)
- && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
- break;
- else if (EMPTY_BIN_P(bin))
return UNDEFINED_BIN_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@@ -955,7 +989,7 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bin = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_OR_DELETED_BIN_P(bin))
return ind;
- st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
d++;
@@ -973,11 +1007,13 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bigger entries array. Although we can reuse a deleted bin, the
result bin value is always empty if the table has no entry with
KEY. Return the entries array index of the found entry or
- UNDEFINED_ENTRY_IND if it is not found. */
static st_index_t
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_data_t key, st_index_t *bin_ind)
{
st_index_t ind;
st_hash_t curr_hash_value = *hash_value;
#ifdef QUADRATIC_PROBE
@@ -1015,7 +1051,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
break;
}
else if (! DELETED_BIN_P(entry_index)) {
- if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
break;
}
else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
@@ -1040,13 +1079,18 @@ st_lookup(st_table *tab, st_data_t key, st_data_t *value)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@@ -1064,13 +1108,18 @@ st_get_key(st_table *tab, st_data_t key, st_data_t *result)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@@ -1104,10 +1153,13 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
st_index_t bin_ind;
int new_p;
- rebuild_table_if_necessary(tab);
hash_value = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@@ -1116,6 +1168,8 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@@ -1192,10 +1246,13 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
st_index_t bin_ind;
int new_p;
- rebuild_table_if_necessary (tab);
hash_value = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@@ -1204,6 +1261,8 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@@ -1212,7 +1271,6 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
check = tab->rebuilds_num;
key = (*func)(key);
st_assert(check == tab->rebuilds_num);
- st_assert(do_hash(key, tab) == hash_value);
ind = tab->entries_bound++;
entry = &tab->entries[ind];
entry->hash = hash_value;
@@ -1220,6 +1278,7 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
entry->record = value;
if (bin_ind != UNDEFINED_BIN_IND)
set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
#ifdef ST_DEBUG
st_check(tab);
#endif
@@ -1281,8 +1340,11 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
st_assert(tab != NULL);
hash = do_hash(*key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash, *key);
if (bin == UNDEFINED_ENTRY_IND) {
if (value != 0) *value = 0;
return 0;
@@ -1290,6 +1352,8 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
}
else {
bin_ind = find_table_bin_ind(tab, hash, *key);
if (bin_ind == UNDEFINED_BIN_IND) {
if (value != 0) *value = 0;
return 0;
@@ -1344,21 +1408,33 @@ st_shift(st_table *tab, st_data_t *key, st_data_t *value)
for (i = tab->entries_start; i < bound; i++) {
curr_entry_ptr = &entries[i];
if (! DELETED_ENTRY_P(curr_entry_ptr)) {
if (value != 0) *value = curr_entry_ptr->record;
- *key = curr_entry_ptr->key;
if (tab->bins == NULL) {
- bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key);
st_assert(bin != UNDEFINED_ENTRY_IND);
- st_assert(&entries[bin] == curr_entry_ptr);
}
else {
- bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash,
- curr_entry_ptr->key);
st_assert(bin_ind != UNDEFINED_BIN_IND);
- st_assert(&entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
- - ENTRY_BASE] == curr_entry_ptr);
MARK_BIN_DELETED(tab, bin_ind);
}
MARK_ENTRY_DELETED(curr_entry_ptr);
tab->num_entries--;
update_range_for_deleted(tab, i);
@@ -1402,15 +1478,20 @@ st_update(st_table *tab, st_data_t key,
int retval, existing;
st_hash_t hash = do_hash(key, tab);
entries = tab->entries;
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
existing = bin != UNDEFINED_ENTRY_IND;
entry = &entries[bin];
bin_ind = UNDEFINED_BIN_IND;
}
else {
bin_ind = find_table_bin_ind(tab, hash, key);
existing = bin_ind != UNDEFINED_BIN_IND;
if (existing) {
bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
@@ -1489,14 +1570,19 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
hash = curr_entry_ptr->hash;
retval = (*func)(key, curr_entry_ptr->record, arg, 0);
if (rebuilds_num != tab->rebuilds_num) {
entries = tab->entries;
packed_p = tab->bins == NULL;
if (packed_p) {
i = find_entry(tab, hash, key);
error_p = i == UNDEFINED_ENTRY_IND;
}
else {
i = find_table_entry_ind(tab, hash, key);
error_p = i == UNDEFINED_ENTRY_IND;
i -= ENTRY_BASE;
}
@@ -1512,36 +1598,44 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
}
switch (retval) {
case ST_CONTINUE:
- break;
case ST_CHECK:
- if (check_p)
- break;
case ST_STOP:
#ifdef ST_DEBUG
- st_check(tab);
-#endif
- return 0;
- case ST_DELETE:
- if (packed_p) {
- bin = find_entry(tab, hash, curr_entry_ptr->key);
- if (bin == UNDEFINED_ENTRY_IND)
- break;
- }
- else {
- bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key);
- if (bin_ind == UNDEFINED_BIN_IND)
- break;
- bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
- MARK_BIN_DELETED(tab, bin_ind);
- }
- st_assert(&entries[bin] == curr_entry_ptr);
- MARK_ENTRY_DELETED(curr_entry_ptr);
- tab->num_entries--;
- update_range_for_deleted(tab, bin);
#ifdef ST_DEBUG
- st_check(tab);
#endif
- break;
}
}
#ifdef ST_DEBUG
@@ -2021,10 +2115,12 @@ st_expand_table(st_table *tab, st_index_t siz)
free(tmp);
}
-/* Rehash using linear search. */
-static void
st_rehash_linear(st_table *tab)
{
st_index_t i, j;
st_table_entry *p, *q;
if (tab->bins) {
@@ -2039,7 +2135,10 @@ st_rehash_linear(st_table *tab)
q = &tab->entries[j];
if (DELETED_ENTRY_P(q))
continue;
- if (PTR_EQUAL(tab, p, q->hash, q->key)) {
st_assert(p < q);
*p = *q;
MARK_ENTRY_DELETED(q);
@@ -2048,12 +2147,15 @@ st_rehash_linear(st_table *tab)
}
}
}
}
-/* Rehash using index */
-static void
st_rehash_indexed(st_table *tab)
{
st_index_t i;
st_index_t const n = bins_size(tab);
unsigned int const size_ind = get_size_ind(tab);
@@ -2082,26 +2184,32 @@ st_rehash_indexed(st_table *tab)
set_bin(bins, size_ind, ind, i + ENTRY_BASE);
break;
}
- else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
- /* duplicated key; delete it */
- st_assert(q < p);
- q->record = p->record;
- MARK_ENTRY_DELETED(p);
- tab->num_entries--;
- update_range_for_deleted(tab, bin);
- break;
- }
else {
- /* hash collision; skip it */
#ifdef QUADRATIC_PROBE
- ind = hash_bin(ind + d, tab);
- d++;
#else
- ind = secondary_hash(ind, tab, &peterb);
#endif
- }
}
}
}
/* Reconstruct TAB's bins according to TAB's entries. This function
@@ -2110,10 +2218,14 @@ st_rehash_indexed(st_table *tab)
static void
st_rehash(st_table *tab)
{
- if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
- st_rehash_linear(tab);
- else
- st_rehash_indexed(tab);
}
#ifdef RUBY