diff options
author | normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-02-13 10:02:07 +0000 |
---|---|---|
committer | normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-02-13 10:02:07 +0000 |
commit | 4663c224fa6c925ce54af32fd1c1cbac9508f5ec () | |
tree | 149cddd08bdaeb1e21fe8a98e53a3874eba2880d /st.c | |
parent | 248ff4f1b19409f92bad9749d1ce202b7c81a754 (diff) |
st.c: retry operations if rebuilt
Calling the .eql? and .hash methods during a Hash operation can result in a thread switch or a signal handler to run: allowing one execution context to rebuild the hash table while another is still reading or writing the table. This results in a use-after-free bug affecting the thread_safe-0.3.6 test suite and likely other bugs. This bug did not affect users of commonly keys (String, Symbol, Fixnum) as those are optimized to avoid method dis for .eql? and .hash methods. A separate version of this change needs to be ported to Ruby 2.3.x which had a different implementation of st.c but was affected by the same bug. * st.c: Add comment about table rebuilding during comparison. (DO_PTR_EQUAL_CHECK): New macro. (REBUILT_TABLE_ENTRY_IND, REBUILT_TABLE_BIN_IND): New macros. (find_entry, find_table_entry_ind, find_table_bin_ind): Use new macros. Return the rebuild flag. (find_table_bin_ptr_and_reserve): Ditto. (st_lookup, st_get_key, st_insert, st_insert2): Retry the operation if the table was rebuilt. (st_general_delete, st_shift, st_update, st_general_foreach): Ditto. (st_rehash_linear, st_rehash_indexed): Use DO_PTR_EQUAL_CHECK. Return the rebuild flag. (st_rehash): Retry the operation if the table was rebuilt. [ruby-core:85510] [Ruby trunk Bug#14357] Thanks to Vit Ondruch for reporting the bug. From: Vladimir Makarov <[email protected]> git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62396 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | st.c | 258 |
1 files changed, 185 insertions, 73 deletions
@@ -90,6 +90,11 @@ o To save more memory we use 8-, 16-, 32- and 64- bit indexes in bins depending on the current hash table size. This implementation speeds up the Ruby hash table benchmarks in average by more 40% on Intel Haswell CPU. @@ -174,6 +179,15 @@ static const struct st_hash_type type_strcasehash = { #define PTR_EQUAL(tab, ptr, hash_val, key_) \ ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) /* Features of a table. */ struct st_features { /* Power of 2 used for number of allocated entries. */ @@ -380,6 +394,11 @@ set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) #define UNDEFINED_ENTRY_IND (~(st_index_t) 0) #define UNDEFINED_BIN_IND (~(st_index_t) 0) /* Mark I-th bin of table TAB as corresponding to a deleted table entry. Update number of entries in the table and number of bins corresponding to deleted entries. */ @@ -823,17 +842,22 @@ secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb) /* Find an entry with HASH_VALUE and KEY in TABLE using a linear search. Return the index of the found entry in array `entries`. - If it is not found, return UNDEFINED_ENTRY_IND. */ static inline st_index_t find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) { st_index_t i, bound; st_table_entry *entries; bound = tab->entries_bound; entries = tab->entries; for (i = tab->entries_start; i < bound; i++) { - if (PTR_EQUAL(tab, &entries[i], hash_value, key)) return i; } return UNDEFINED_ENTRY_IND; @@ -845,10 +869,12 @@ find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) /*#define QUADRATIC_PROBE*/ /* Return index of entry with HASH_VALUE and KEY in table TAB. If - there is no such entry, return UNDEFINED_ENTRY_IND. */ static st_index_t find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) { st_index_t ind; #ifdef QUADRATIC_PROBE st_index_t d; @@ -869,10 +895,13 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) FOUND_BIN; for (;;) { bin = get_bin(tab->bins, get_size_ind(tab), ind); - if (! EMPTY_OR_DELETED_BIN_P(bin) - && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) - break; - else if (EMPTY_BIN_P(bin)) return UNDEFINED_ENTRY_IND; #ifdef QUADRATIC_PROBE ind = hash_bin(ind + d, tab); @@ -887,10 +916,12 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) /* Find and return index of table TAB bin corresponding to an entry with HASH_VALUE and KEY. If there is no such bin, return - UNDEFINED_BIN_IND. */ static st_index_t find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) { st_index_t ind; #ifdef QUADRATIC_PROBE st_index_t d; @@ -911,10 +942,13 @@ find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) FOUND_BIN; for (;;) { bin = get_bin(tab->bins, get_size_ind(tab), ind); - if (! EMPTY_OR_DELETED_BIN_P(bin) - && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) - break; - else if (EMPTY_BIN_P(bin)) return UNDEFINED_BIN_IND; #ifdef QUADRATIC_PROBE ind = hash_bin(ind + d, tab); @@ -955,7 +989,7 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) bin = get_bin(tab->bins, get_size_ind(tab), ind); if (EMPTY_OR_DELETED_BIN_P(bin)) return ind; - st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)); #ifdef QUADRATIC_PROBE ind = hash_bin(ind + d, tab); d++; @@ -973,11 +1007,13 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) bigger entries array. Although we can reuse a deleted bin, the result bin value is always empty if the table has no entry with KEY. Return the entries array index of the found entry or - UNDEFINED_ENTRY_IND if it is not found. */ static st_index_t find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, st_data_t key, st_index_t *bin_ind) { st_index_t ind; st_hash_t curr_hash_value = *hash_value; #ifdef QUADRATIC_PROBE @@ -1015,7 +1051,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, break; } else if (! DELETED_BIN_P(entry_index)) { - if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key)) break; } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) @@ -1040,13 +1079,18 @@ st_lookup(st_table *tab, st_data_t key, st_data_t *value) st_index_t bin; st_hash_t hash = do_hash(key, tab); if (tab->bins == NULL) { bin = find_entry(tab, hash, key); if (bin == UNDEFINED_ENTRY_IND) return 0; } else { bin = find_table_entry_ind(tab, hash, key); if (bin == UNDEFINED_ENTRY_IND) return 0; bin -= ENTRY_BASE; @@ -1064,13 +1108,18 @@ st_get_key(st_table *tab, st_data_t key, st_data_t *result) st_index_t bin; st_hash_t hash = do_hash(key, tab); if (tab->bins == NULL) { bin = find_entry(tab, hash, key); if (bin == UNDEFINED_ENTRY_IND) return 0; } else { bin = find_table_entry_ind(tab, hash, key); if (bin == UNDEFINED_ENTRY_IND) return 0; bin -= ENTRY_BASE; @@ -1104,10 +1153,13 @@ st_insert(st_table *tab, st_data_t key, st_data_t value) st_index_t bin_ind; int new_p; - rebuild_table_if_necessary(tab); hash_value = do_hash(key, tab); if (tab->bins == NULL) { bin = find_entry(tab, hash_value, key); new_p = bin == UNDEFINED_ENTRY_IND; if (new_p) tab->num_entries++; @@ -1116,6 +1168,8 @@ st_insert(st_table *tab, st_data_t key, st_data_t value) else { bin = find_table_bin_ptr_and_reserve(tab, &hash_value, key, &bin_ind); new_p = bin == UNDEFINED_ENTRY_IND; bin -= ENTRY_BASE; } @@ -1192,10 +1246,13 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, st_index_t bin_ind; int new_p; - rebuild_table_if_necessary (tab); hash_value = do_hash(key, tab); if (tab->bins == NULL) { bin = find_entry(tab, hash_value, key); new_p = bin == UNDEFINED_ENTRY_IND; if (new_p) tab->num_entries++; @@ -1204,6 +1261,8 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, else { bin = find_table_bin_ptr_and_reserve(tab, &hash_value, key, &bin_ind); new_p = bin == UNDEFINED_ENTRY_IND; bin -= ENTRY_BASE; } @@ -1212,7 +1271,6 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, check = tab->rebuilds_num; key = (*func)(key); st_assert(check == tab->rebuilds_num); - st_assert(do_hash(key, tab) == hash_value); ind = tab->entries_bound++; entry = &tab->entries[ind]; entry->hash = hash_value; @@ -1220,6 +1278,7 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, entry->record = value; if (bin_ind != UNDEFINED_BIN_IND) set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); #ifdef ST_DEBUG st_check(tab); #endif @@ -1281,8 +1340,11 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) st_assert(tab != NULL); hash = do_hash(*key, tab); if (tab->bins == NULL) { bin = find_entry(tab, hash, *key); if (bin == UNDEFINED_ENTRY_IND) { if (value != 0) *value = 0; return 0; @@ -1290,6 +1352,8 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) } else { bin_ind = find_table_bin_ind(tab, hash, *key); if (bin_ind == UNDEFINED_BIN_IND) { if (value != 0) *value = 0; return 0; @@ -1344,21 +1408,33 @@ st_shift(st_table *tab, st_data_t *key, st_data_t *value) for (i = tab->entries_start; i < bound; i++) { curr_entry_ptr = &entries[i]; if (! DELETED_ENTRY_P(curr_entry_ptr)) { if (value != 0) *value = curr_entry_ptr->record; - *key = curr_entry_ptr->key; if (tab->bins == NULL) { - bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key); st_assert(bin != UNDEFINED_ENTRY_IND); - st_assert(&entries[bin] == curr_entry_ptr); } else { - bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash, - curr_entry_ptr->key); st_assert(bin_ind != UNDEFINED_BIN_IND); - st_assert(&entries[get_bin(tab->bins, get_size_ind(tab), bin_ind) - - ENTRY_BASE] == curr_entry_ptr); MARK_BIN_DELETED(tab, bin_ind); } MARK_ENTRY_DELETED(curr_entry_ptr); tab->num_entries--; update_range_for_deleted(tab, i); @@ -1402,15 +1478,20 @@ st_update(st_table *tab, st_data_t key, int retval, existing; st_hash_t hash = do_hash(key, tab); entries = tab->entries; if (tab->bins == NULL) { bin = find_entry(tab, hash, key); existing = bin != UNDEFINED_ENTRY_IND; entry = &entries[bin]; bin_ind = UNDEFINED_BIN_IND; } else { bin_ind = find_table_bin_ind(tab, hash, key); existing = bin_ind != UNDEFINED_BIN_IND; if (existing) { bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; @@ -1489,14 +1570,19 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, hash = curr_entry_ptr->hash; retval = (*func)(key, curr_entry_ptr->record, arg, 0); if (rebuilds_num != tab->rebuilds_num) { entries = tab->entries; packed_p = tab->bins == NULL; if (packed_p) { i = find_entry(tab, hash, key); error_p = i == UNDEFINED_ENTRY_IND; } else { i = find_table_entry_ind(tab, hash, key); error_p = i == UNDEFINED_ENTRY_IND; i -= ENTRY_BASE; } @@ -1512,36 +1598,44 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, } switch (retval) { case ST_CONTINUE: - break; case ST_CHECK: - if (check_p) - break; case ST_STOP: #ifdef ST_DEBUG - st_check(tab); -#endif - return 0; - case ST_DELETE: - if (packed_p) { - bin = find_entry(tab, hash, curr_entry_ptr->key); - if (bin == UNDEFINED_ENTRY_IND) - break; - } - else { - bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key); - if (bin_ind == UNDEFINED_BIN_IND) - break; - bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; - MARK_BIN_DELETED(tab, bin_ind); - } - st_assert(&entries[bin] == curr_entry_ptr); - MARK_ENTRY_DELETED(curr_entry_ptr); - tab->num_entries--; - update_range_for_deleted(tab, bin); #ifdef ST_DEBUG - st_check(tab); #endif - break; } } #ifdef ST_DEBUG @@ -2021,10 +2115,12 @@ st_expand_table(st_table *tab, st_index_t siz) free(tmp); } -/* Rehash using linear search. */ -static void st_rehash_linear(st_table *tab) { st_index_t i, j; st_table_entry *p, *q; if (tab->bins) { @@ -2039,7 +2135,10 @@ st_rehash_linear(st_table *tab) q = &tab->entries[j]; if (DELETED_ENTRY_P(q)) continue; - if (PTR_EQUAL(tab, p, q->hash, q->key)) { st_assert(p < q); *p = *q; MARK_ENTRY_DELETED(q); @@ -2048,12 +2147,15 @@ st_rehash_linear(st_table *tab) } } } } -/* Rehash using index */ -static void st_rehash_indexed(st_table *tab) { st_index_t i; st_index_t const n = bins_size(tab); unsigned int const size_ind = get_size_ind(tab); @@ -2082,26 +2184,32 @@ st_rehash_indexed(st_table *tab) set_bin(bins, size_ind, ind, i + ENTRY_BASE); break; } - else if (PTR_EQUAL(tab, q, p->hash, p->key)) { - /* duplicated key; delete it */ - st_assert(q < p); - q->record = p->record; - MARK_ENTRY_DELETED(p); - tab->num_entries--; - update_range_for_deleted(tab, bin); - break; - } else { - /* hash collision; skip it */ #ifdef QUADRATIC_PROBE - ind = hash_bin(ind + d, tab); - d++; #else - ind = secondary_hash(ind, tab, &peterb); #endif - } } } } /* Reconstruct TAB's bins according to TAB's entries. This function @@ -2110,10 +2218,14 @@ st_rehash_indexed(st_table *tab) static void st_rehash(st_table *tab) { - if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) - st_rehash_linear(tab); - else - st_rehash_indexed(tab); } #ifdef RUBY |