summaryrefslogtreecommitdiff
path: root/st.c
diff options
context:
space:
mode:
authornormal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-02-13 10:02:07 +0000
committernormal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-02-13 10:02:07 +0000
commit4663c224fa6c925ce54af32fd1c1cbac9508f5ec ()
tree149cddd08bdaeb1e21fe8a98e53a3874eba2880d /st.c
parent248ff4f1b19409f92bad9749d1ce202b7c81a754 (diff)
st.c: retry operations if rebuilt
Calling the .eql? and .hash methods during a Hash operation can result in a thread switch or a signal handler to run: allowing one execution context to rebuild the hash table while another is still reading or writing the table. This results in a use-after-free bug affecting the thread_safe-0.3.6 test suite and likely other bugs. This bug did not affect users of commonly keys (String, Symbol, Fixnum) as those are optimized to avoid method dis for .eql? and .hash methods. A separate version of this change needs to be ported to Ruby 2.3.x which had a different implementation of st.c but was affected by the same bug. * st.c: Add comment about table rebuilding during comparison. (DO_PTR_EQUAL_CHECK): New macro. (REBUILT_TABLE_ENTRY_IND, REBUILT_TABLE_BIN_IND): New macros. (find_entry, find_table_entry_ind, find_table_bin_ind): Use new macros. Return the rebuild flag. (find_table_bin_ptr_and_reserve): Ditto. (st_lookup, st_get_key, st_insert, st_insert2): Retry the operation if the table was rebuilt. (st_general_delete, st_shift, st_update, st_general_foreach): Ditto. (st_rehash_linear, st_rehash_indexed): Use DO_PTR_EQUAL_CHECK. Return the rebuild flag. (st_rehash): Retry the operation if the table was rebuilt. [ruby-core:85510] [Ruby trunk Bug#14357] Thanks to Vit Ondruch for reporting the bug. From: Vladimir Makarov <[email protected]> git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62396 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--st.c258
1 files changed, 185 insertions, 73 deletions
@@ -90,6 +90,11 @@
o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
bins depending on the current hash table size.
This implementation speeds up the Ruby hash table benchmarks in
average by more 40% on Intel Haswell CPU.
@@ -174,6 +179,15 @@ static const struct st_hash_type type_strcasehash = {
#define PTR_EQUAL(tab, ptr, hash_val, key_) \
((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
/* Features of a table. */
struct st_features {
/* Power of 2 used for number of allocated entries. */
@@ -380,6 +394,11 @@ set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
/* Mark I-th bin of table TAB as corresponding to a deleted table
entry. Update number of entries in the table and number of bins
corresponding to deleted entries. */
@@ -823,17 +842,22 @@ secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
search. Return the index of the found entry in array `entries`.
- If it is not found, return UNDEFINED_ENTRY_IND. */
static inline st_index_t
find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
{
st_index_t i, bound;
st_table_entry *entries;
bound = tab->entries_bound;
entries = tab->entries;
for (i = tab->entries_start; i < bound; i++) {
- if (PTR_EQUAL(tab, &entries[i], hash_value, key))
return i;
}
return UNDEFINED_ENTRY_IND;
@@ -845,10 +869,12 @@ find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
/*#define QUADRATIC_PROBE*/
/* Return index of entry with HASH_VALUE and KEY in table TAB. If
- there is no such entry, return UNDEFINED_ENTRY_IND. */
static st_index_t
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@@ -869,10 +895,13 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
- if (! EMPTY_OR_DELETED_BIN_P(bin)
- && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
- break;
- else if (EMPTY_BIN_P(bin))
return UNDEFINED_ENTRY_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@@ -887,10 +916,12 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
/* Find and return index of table TAB bin corresponding to an entry
with HASH_VALUE and KEY. If there is no such bin, return
- UNDEFINED_BIN_IND. */
static st_index_t
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@@ -911,10 +942,13 @@ find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
- if (! EMPTY_OR_DELETED_BIN_P(bin)
- && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
- break;
- else if (EMPTY_BIN_P(bin))
return UNDEFINED_BIN_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@@ -955,7 +989,7 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bin = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_OR_DELETED_BIN_P(bin))
return ind;
- st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
d++;
@@ -973,11 +1007,13 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bigger entries array. Although we can reuse a deleted bin, the
result bin value is always empty if the table has no entry with
KEY. Return the entries array index of the found entry or
- UNDEFINED_ENTRY_IND if it is not found. */
static st_index_t
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_data_t key, st_index_t *bin_ind)
{
st_index_t ind;
st_hash_t curr_hash_value = *hash_value;
#ifdef QUADRATIC_PROBE
@@ -1015,7 +1051,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
break;
}
else if (! DELETED_BIN_P(entry_index)) {
- if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
break;
}
else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
@@ -1040,13 +1079,18 @@ st_lookup(st_table *tab, st_data_t key, st_data_t *value)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@@ -1064,13 +1108,18 @@ st_get_key(st_table *tab, st_data_t key, st_data_t *result)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@@ -1104,10 +1153,13 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
st_index_t bin_ind;
int new_p;
- rebuild_table_if_necessary(tab);
hash_value = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@@ -1116,6 +1168,8 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@@ -1192,10 +1246,13 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
st_index_t bin_ind;
int new_p;
- rebuild_table_if_necessary (tab);
hash_value = do_hash(key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@@ -1204,6 +1261,8 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@@ -1212,7 +1271,6 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
check = tab->rebuilds_num;
key = (*func)(key);
st_assert(check == tab->rebuilds_num);
- st_assert(do_hash(key, tab) == hash_value);
ind = tab->entries_bound++;
entry = &tab->entries[ind];
entry->hash = hash_value;
@@ -1220,6 +1278,7 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
entry->record = value;
if (bin_ind != UNDEFINED_BIN_IND)
set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
#ifdef ST_DEBUG
st_check(tab);
#endif
@@ -1281,8 +1340,11 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
st_assert(tab != NULL);
hash = do_hash(*key, tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash, *key);
if (bin == UNDEFINED_ENTRY_IND) {
if (value != 0) *value = 0;
return 0;
@@ -1290,6 +1352,8 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
}
else {
bin_ind = find_table_bin_ind(tab, hash, *key);
if (bin_ind == UNDEFINED_BIN_IND) {
if (value != 0) *value = 0;
return 0;
@@ -1344,21 +1408,33 @@ st_shift(st_table *tab, st_data_t *key, st_data_t *value)
for (i = tab->entries_start; i < bound; i++) {
curr_entry_ptr = &entries[i];
if (! DELETED_ENTRY_P(curr_entry_ptr)) {
if (value != 0) *value = curr_entry_ptr->record;
- *key = curr_entry_ptr->key;
if (tab->bins == NULL) {
- bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key);
st_assert(bin != UNDEFINED_ENTRY_IND);
- st_assert(&entries[bin] == curr_entry_ptr);
}
else {
- bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash,
- curr_entry_ptr->key);
st_assert(bin_ind != UNDEFINED_BIN_IND);
- st_assert(&entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
- - ENTRY_BASE] == curr_entry_ptr);
MARK_BIN_DELETED(tab, bin_ind);
}
MARK_ENTRY_DELETED(curr_entry_ptr);
tab->num_entries--;
update_range_for_deleted(tab, i);
@@ -1402,15 +1478,20 @@ st_update(st_table *tab, st_data_t key,
int retval, existing;
st_hash_t hash = do_hash(key, tab);
entries = tab->entries;
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
existing = bin != UNDEFINED_ENTRY_IND;
entry = &entries[bin];
bin_ind = UNDEFINED_BIN_IND;
}
else {
bin_ind = find_table_bin_ind(tab, hash, key);
existing = bin_ind != UNDEFINED_BIN_IND;
if (existing) {
bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
@@ -1489,14 +1570,19 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
hash = curr_entry_ptr->hash;
retval = (*func)(key, curr_entry_ptr->record, arg, 0);
if (rebuilds_num != tab->rebuilds_num) {
entries = tab->entries;
packed_p = tab->bins == NULL;
if (packed_p) {
i = find_entry(tab, hash, key);
error_p = i == UNDEFINED_ENTRY_IND;
}
else {
i = find_table_entry_ind(tab, hash, key);
error_p = i == UNDEFINED_ENTRY_IND;
i -= ENTRY_BASE;
}
@@ -1512,36 +1598,44 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
}
switch (retval) {
case ST_CONTINUE:
- break;
case ST_CHECK:
- if (check_p)
- break;
case ST_STOP:
#ifdef ST_DEBUG
- st_check(tab);
-#endif
- return 0;
- case ST_DELETE:
- if (packed_p) {
- bin = find_entry(tab, hash, curr_entry_ptr->key);
- if (bin == UNDEFINED_ENTRY_IND)
- break;
- }
- else {
- bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key);
- if (bin_ind == UNDEFINED_BIN_IND)
- break;
- bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
- MARK_BIN_DELETED(tab, bin_ind);
- }
- st_assert(&entries[bin] == curr_entry_ptr);
- MARK_ENTRY_DELETED(curr_entry_ptr);
- tab->num_entries--;
- update_range_for_deleted(tab, bin);
#ifdef ST_DEBUG
- st_check(tab);
#endif
- break;
}
}
#ifdef ST_DEBUG
@@ -2021,10 +2115,12 @@ st_expand_table(st_table *tab, st_index_t siz)
free(tmp);
}
-/* Rehash using linear search. */
-static void
st_rehash_linear(st_table *tab)
{
st_index_t i, j;
st_table_entry *p, *q;
if (tab->bins) {
@@ -2039,7 +2135,10 @@ st_rehash_linear(st_table *tab)
q = &tab->entries[j];
if (DELETED_ENTRY_P(q))
continue;
- if (PTR_EQUAL(tab, p, q->hash, q->key)) {
st_assert(p < q);
*p = *q;
MARK_ENTRY_DELETED(q);
@@ -2048,12 +2147,15 @@ st_rehash_linear(st_table *tab)
}
}
}
}
-/* Rehash using index */
-static void
st_rehash_indexed(st_table *tab)
{
st_index_t i;
st_index_t const n = bins_size(tab);
unsigned int const size_ind = get_size_ind(tab);
@@ -2082,26 +2184,32 @@ st_rehash_indexed(st_table *tab)
set_bin(bins, size_ind, ind, i + ENTRY_BASE);
break;
}
- else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
- /* duplicated key; delete it */
- st_assert(q < p);
- q->record = p->record;
- MARK_ENTRY_DELETED(p);
- tab->num_entries--;
- update_range_for_deleted(tab, bin);
- break;
- }
else {
- /* hash collision; skip it */
#ifdef QUADRATIC_PROBE
- ind = hash_bin(ind + d, tab);
- d++;
#else
- ind = secondary_hash(ind, tab, &peterb);
#endif
- }
}
}
}
/* Reconstruct TAB's bins according to TAB's entries. This function
@@ -2110,10 +2218,14 @@ st_rehash_indexed(st_table *tab)
static void
st_rehash(st_table *tab)
{
- if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
- st_rehash_linear(tab);
- else
- st_rehash_indexed(tab);
}
#ifdef RUBY