diff options
-rw-r--r-- | debug.c | 2 | ||||
-rw-r--r-- | ext/-test-/string/capacity.c | 9 | ||||
-rw-r--r-- | ext/-test-/string/cstr.c | 10 | ||||
-rw-r--r-- | gc.c | 145 | ||||
-rw-r--r-- | gc.rb | 10 | ||||
-rw-r--r-- | include/ruby/internal/config.h | 4 | ||||
-rw-r--r-- | include/ruby/internal/core/rstring.h | 20 | ||||
-rw-r--r-- | internal/gc.h | 27 | ||||
-rwxr-xr-x | misc/lldb_cruby.py | 3 | ||||
-rw-r--r-- | ruby.c | 7 | ||||
-rw-r--r-- | spec/ruby/optional/capi/string_spec.rb | 12 | ||||
-rw-r--r-- | string.c | 323 | ||||
-rw-r--r-- | test/-ext-/string/test_capacity.rb | 37 | ||||
-rw-r--r-- | test/-ext-/string/test_rb_str_dup.rb | 6 | ||||
-rw-r--r-- | test/objspace/test_objspace.rb | 4 | ||||
-rw-r--r-- | transcode.c | 4 |
16 files changed, 452 insertions, 171 deletions
@@ -56,7 +56,9 @@ const union { enum ruby_robject_consts robject_consts; enum ruby_rmodule_flags rmodule_flags; enum ruby_rstring_flags rstring_flags; enum ruby_rstring_consts rstring_consts; enum ruby_rarray_flags rarray_flags; enum ruby_rarray_consts rarray_consts; enum { @@ -4,10 +4,11 @@ static VALUE bug_str_capacity(VALUE klass, VALUE str) { - return - STR_EMBED_P(str) ? INT2FIX(RSTRING_EMBED_LEN_MAX) : \ - STR_SHARED_P(str) ? INT2FIX(0) : \ - LONG2FIX(RSTRING(str)->as.heap.aux.capa); } void @@ -62,9 +62,13 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen) if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len); str = rb_str_new_shared(str); if (STR_EMBED_P(str)) { RSTRING(str)->basic.flags &= ~RSTRING_EMBED_LEN_MASK; RSTRING(str)->basic.flags |= len << RSTRING_EMBED_LEN_SHIFT; - memmove(RSTRING(str)->as.ary, RSTRING(str)->as.ary + beg, len); } else { RSTRING(str)->as.heap.ptr += beg; @@ -112,7 +116,11 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str) Check_Type(str, T_STRING); FL_SET((str2), STR_NOEMBED); memcpy(buf, RSTRING_PTR(str), capacity); RBASIC(str2)->flags &= ~RSTRING_EMBED_LEN_MASK; RSTRING(str2)->as.heap.aux.capa = capacity; RSTRING(str2)->as.heap.ptr = buf; RSTRING(str2)->as.heap.len = RSTRING_LEN(str); @@ -888,6 +888,7 @@ static const bool USE_MMAP_ALIGNED_ALLOC = false; #endif struct heap_page { short total_slots; short free_slots; short pinned_slots; @@ -1849,7 +1850,7 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj if (RGENGC_CHECK_MODE && /* obj should belong to page */ !(&page->start[0] <= (RVALUE *)obj && - (uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size)) && obj % sizeof(RVALUE) == 0)) { rb_bug("heap_page_add_freeobj: %p is not rvalue.", (void *)p); } @@ -1938,7 +1939,7 @@ heap_pages_free_unused_pages(rb_objspace_t *objspace) } struct heap_page *hipage = heap_pages_sorted[heap_allocated_pages - 1]; - uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->size_pool->slot_size); GC_ASSERT(himem <= (uintptr_t)heap_pages_himem); heap_pages_himem = (RVALUE *)himem; @@ -2034,6 +2035,7 @@ heap_page_allocate(rb_objspace_t *objspace, rb_size_pool_t *size_pool) page->start = (RVALUE *)start; page->total_slots = limit; page->size_pool = size_pool; page_body->header.page = page; @@ -2091,7 +2093,6 @@ heap_add_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea { /* Adding to eden heap during incremental sweeping is forbidden */ GC_ASSERT(!(heap == SIZE_POOL_EDEN_HEAP(size_pool) && heap->sweeping_page)); - GC_ASSERT(page->size_pool == size_pool); page->flags.in_tomb = (heap == SIZE_POOL_TOMB_HEAP(size_pool)); list_add_tail(&heap->pages, &page->page_node); heap->total_pages++; @@ -2324,18 +2325,37 @@ static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page); static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap); static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page); -#if USE_RVARGC -void * -rb_gc_rvargc_object_data(VALUE obj) { - return (void *)(obj + sizeof(RVALUE)); } #endif static inline VALUE ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size) { - if (size != sizeof(RVALUE)) { return Qfalse; } @@ -2409,6 +2429,25 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3) } #if USE_RVARGC static inline VALUE heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap) { @@ -2430,25 +2469,6 @@ heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t * return (VALUE)p; } - -static inline rb_size_pool_t * -size_pool_for_size(rb_objspace_t *objspace, size_t size) -{ - size_t slot_count = CEILDIV(size, sizeof(RVALUE)); - - /* size_pool_idx is ceil(log2(slot_count)) */ - size_t size_pool_idx = 64 - nlz_int64(slot_count - 1); - GC_ASSERT(size_pool_idx > 0); - if (size_pool_idx >= SIZE_POOL_COUNT) { - rb_bug("size_pool_for_size: allocation size too large"); - } - - rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; - GC_ASSERT(size_pool->slot_size >= (short)size); - GC_ASSERT(size_pools[size_pool_idx - 1].slot_size < (short)size); - - return size_pool; -} #endif ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size)); @@ -2574,7 +2594,6 @@ VALUE rb_wb_unprotected_newobj_of(VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of(klass, flags, 0, 0, 0, FALSE, size); } @@ -2582,7 +2601,6 @@ VALUE rb_wb_protected_newobj_of(VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of(klass, flags, 0, 0, 0, TRUE, size); } @@ -2590,7 +2608,6 @@ VALUE rb_ec_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of_cr(rb_ec_ractor_ptr(ec), klass, flags, 0, 0, 0, TRUE, size); } @@ -2830,14 +2847,14 @@ is_pointer_to_heap(rb_objspace_t *objspace, void *ptr) mid = (lo + hi) / 2; page = heap_pages_sorted[mid]; if (page->start <= p) { - if ((uintptr_t)p < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size))) { RB_DEBUG_COUNTER_INC(gc_isptr_maybe); if (page->flags.in_tomb) { return FALSE; } else { - if ((NUM_IN_PAGE(p) * sizeof(RVALUE)) % page->size_pool->slot_size != 0) return FALSE; return TRUE; } @@ -4183,7 +4200,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace) /* run data/file object's finalizers */ for (i = 0; i < heap_allocated_pages; i++) { struct heap_page *page = heap_pages_sorted[i]; - short stride = page->size_pool->slot_size; uintptr_t p = (uintptr_t)page->start; uintptr_t pend = p + page->total_slots * stride; @@ -4780,13 +4797,13 @@ count_objects(int argc, VALUE *argv, VALUE os) for (i = 0; i < heap_allocated_pages; i++) { struct heap_page *page = heap_pages_sorted[i]; - short stride = page->size_pool->slot_size; uintptr_t p = (uintptr_t)page->start; uintptr_t pend = p + page->total_slots * stride; for (;p < pend; p += stride) { VALUE vp = (VALUE)p; - GC_ASSERT((NUM_IN_PAGE(vp) * sizeof(RVALUE)) % page->size_pool->slot_size == 0); void *poisoned = asan_poisoned_object_p(vp); asan_unpoison_object(vp, false); @@ -4916,7 +4933,7 @@ try_move_in_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa from_freelist = true; } - gc_move(objspace, (VALUE)p, dest, page->size_pool->slot_size); gc_pin(objspace, (VALUE)p); heap->compact_cursor_index = (RVALUE *)p; if (from_freelist) { @@ -5216,7 +5233,7 @@ gc_fill_swept_page_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, struct heap_page * sweep_page = ctx->page; if (bitset) { - short slot_size = sweep_page->size_pool->slot_size; short slot_bits = slot_size / sizeof(RVALUE); do { @@ -5307,7 +5324,7 @@ static inline void gc_plane_sweep(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct gc_sweep_context *ctx) { struct heap_page * sweep_page = ctx->page; - short slot_size = sweep_page->size_pool->slot_size; short slot_bits = slot_size / sizeof(RVALUE); GC_ASSERT(slot_bits > 0); @@ -5385,7 +5402,6 @@ static inline void gc_page_sweep(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, struct gc_sweep_context *ctx) { struct heap_page *sweep_page = ctx->page; - GC_ASSERT(sweep_page->size_pool == size_pool); int i; @@ -5603,27 +5619,31 @@ gc_sweep_finish_size_pool(rb_objspace_t *objspace, rb_size_pool_t *size_pool) size_t min_free_slots = (size_t)(total_slots * gc_params.heap_free_slots_min_ratio); if (swept_slots < min_free_slots) { - if (is_full_marking(objspace)) { - size_t extend_page_count = heap_extend_pages(objspace, swept_slots, total_slots, total_pages); - if (extend_page_count > size_pool->allocatable_pages) { - size_pool_allocatable_pages_set(objspace, size_pool, extend_page_count); - } - - heap_increment(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool)); - } - else { /* The heap is a growth heap if it freed more slots than had empty slots. */ bool is_growth_heap = size_pool->empty_slots == 0 || size_pool->freed_slots > size_pool->empty_slots; - /* Only growth heaps are allowed to start a major GC. */ - if (is_growth_heap && - objspace->profile.count - objspace->rgengc.last_major_gc >= RVALUE_OLD_AGE) { objspace->rgengc.need_major_gc |= GPR_FLAG_MAJOR_BY_NOFREE; size_pool->force_major_gc_count++; } } } } #endif @@ -5660,6 +5680,7 @@ gc_sweep_finish(rb_objspace_t *objspace) else { eden_heap->free_pages = eden_heap->pooled_pages; } objspace->rincgc.pooled_slots = 0; } #endif @@ -5701,8 +5722,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea #endif do { - GC_ASSERT(sweep_page->size_pool == size_pool); - RUBY_DEBUG_LOG("sweep_page:%p", (void *)sweep_page); struct gc_sweep_context ctx = { @@ -5831,7 +5850,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ bool from_freelist = FL_TEST_RAW(forwarding_object, FL_FROM_FREELIST); object = rb_gc_location(forwarding_object); - gc_move(objspace, object, forwarding_object, page->size_pool->slot_size); /* forwarding_object is now our actual object, and "object" * is the free slot for the original page */ struct heap_page *orig_page = GET_HEAP_PAGE(object); @@ -7654,7 +7673,7 @@ gc_verify_heap_page(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) int remembered_old_objects = 0; int free_objects = 0; int zombie_objects = 0; - int stride = page->size_pool->slot_size / sizeof(RVALUE); for (i=0; i<page->total_slots; i+=stride) { VALUE val = (VALUE)&page->start[i]; @@ -7776,7 +7795,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace) /* check relations */ for (size_t i = 0; i < heap_allocated_pages; i++) { struct heap_page *page = heap_pages_sorted[i]; - short slot_size = page->size_pool->slot_size; uintptr_t start = (uintptr_t)page->start; uintptr_t end = start + page->total_slots * slot_size; @@ -10019,7 +10038,19 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj) case T_STRING: if (STR_SHARED_P(obj)) { UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared); } break; @@ -13561,6 +13592,8 @@ Init_GC(void) rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_SIZE")), SIZET2NUM(HEAP_PAGE_BITMAP_SIZE)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_PLANES")), SIZET2NUM(HEAP_PAGE_BITMAP_PLANES)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_SIZE")), SIZET2NUM(HEAP_PAGE_SIZE)); OBJ_FREEZE(gc_constants); /* internal constants */ rb_define_const(rb_mGC, "INTERNAL_CONSTANTS", gc_constants); @@ -256,6 +256,16 @@ module GC def self.verify_compaction_references(toward: nil, double_heap: false) Primitive.gc_verify_compaction_references(double_heap, toward == :empty) end end module ObjectSpace @@ -146,4 +146,8 @@ # undef RBIMPL_TEST3 #endif /* HAVE_VA_ARGS_MACRO */ #endif /* RBIMPL_CONFIG_H */ @@ -42,9 +42,11 @@ /** @cond INTERNAL_MACRO */ #define RSTRING_NOEMBED RSTRING_NOEMBED #define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK #define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT #define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX #define RSTRING_FSTR RSTRING_FSTR #define RSTRING_EMBED_LEN RSTRING_EMBED_LEN #define RSTRING_LEN RSTRING_LEN @@ -160,6 +162,7 @@ enum ruby_rstring_flags { */ RSTRING_NOEMBED = RUBY_FL_USER1, /** * When a string employs embedded strategy (see ::RSTRING_NOEMBED), these * bits are used to store the number of bytes actually filled into @@ -172,6 +175,7 @@ enum ruby_rstring_flags { */ RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 | RUBY_FL_USER5 | RUBY_FL_USER6, /* Actually, string encodings are also encoded into the flags, using * remaining bits.*/ @@ -198,6 +202,7 @@ enum ruby_rstring_flags { RSTRING_FSTR = RUBY_FL_USER17 }; /** * This is an enum because GDB wants it (rather than a macro). People need not * bother. @@ -209,6 +214,7 @@ enum ruby_rstring_consts { /** Max possible number of characters that can be embedded. */ RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1 }; /** * Ruby's String. A string in ruby conceptually has these information: @@ -278,7 +284,17 @@ struct RString { * here. Could be sufficiently large. In this case the length is * encoded into the flags. */ char ary[RSTRING_EMBED_LEN_MAX + 1]; } embed; } as; }; @@ -407,9 +423,13 @@ RSTRING_EMBED_LEN(VALUE str) RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING); RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED)); VALUE f = RBASIC(str)->flags; f &= RSTRING_EMBED_LEN_MASK; f >>= RSTRING_EMBED_LEN_SHIFT; return RBIMPL_CAST((long)f); } @@ -18,10 +18,6 @@ struct rb_execution_context_struct; /* in vm_core.h */ struct rb_objspace; /* in vm_core.h */ -#ifndef USE_RVARGC -#define USE_RVARGC 0 -#endif - #ifdef NEWOBJ_OF # undef NEWOBJ_OF # undef RB_NEWOBJ_OF @@ -30,22 +26,21 @@ struct rb_objspace; /* in vm_core.h */ #define RVALUE_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX])) -/* optimized version of NEWOBJ() */ -#define RB_NEWOBJ_OF(var, T, c, f) \ - T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \ - rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \ - rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE)) - -#define RB_EC_NEWOBJ_OF(ec, var, T, c, f) \ - T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \ - rb_ec_wb_protected_newobj_of((ec), (c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \ - rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE)) - #define RB_RVARGC_NEWOBJ_OF(var, T, c, f, s) \ T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \ rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, s) : \ rb_wb_unprotected_newobj_of((c), (f), s)) #define NEWOBJ_OF(var, T, c, f) RB_NEWOBJ_OF((var), T, (c), (f)) #define RVARGC_NEWOBJ_OF(var, T, c, f, s) RB_RVARGC_NEWOBJ_OF((var), T, (c), (f), (s)) #define RB_OBJ_GC_FLAGS_MAX 6 /* used in ext/objspace */ @@ -102,6 +97,8 @@ static inline void *ruby_sized_xrealloc2_inlined(void *ptr, size_t new_count, si static inline void ruby_sized_xfree_inlined(void *ptr, size_t size); VALUE rb_class_allocate_instance(VALUE klass); void rb_gc_ractor_newobj_cache_clear(rb_ractor_newobj_cache_t *newobj_cache); RUBY_SYMBOL_EXPORT_BEGIN /* gc.c (export) */ @@ -190,6 +190,8 @@ def string2cstr(rstring): cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0) else: cptr = int(rstring.GetValueForExpressionPath(".as.ary").location, 0) clen = (flags & RSTRING_EMBED_LEN_MASK) >> RSTRING_EMBED_LEN_SHIFT return cptr, clen @@ -315,7 +317,6 @@ def lldb_inspect(debugger, target, result, val): else: len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() ptr = val.GetValueForExpressionPath("->as.heap.ptr") - #print(val.GetValueForExpressionPath("->as.heap"), file=result) result.write("T_ARRAY: %slen=%d" % (flaginfo, len)) if flags & RUBY_FL_USER1: result.write(" (embed)") @@ -566,7 +566,12 @@ static VALUE runtime_libruby_path(void) { #if defined _WIN32 || defined __CYGWIN__ - DWORD len = RSTRING_EMBED_LEN_MAX, ret; VALUE path; VALUE wsopath = rb_str_new(0, len*sizeof(WCHAR)); WCHAR *wlibpath; @@ -108,7 +108,7 @@ describe "C-API String function" do it "returns a string with the given capacity" do buf = @s.rb_str_buf_new(256, nil) - @s.rb_str_capacity(buf).should == 256 end it "returns a string that can be appended to" do @@ -682,27 +682,27 @@ describe "C-API String function" do describe "rb_str_modify_expand" do it "grows the capacity to bytesize + expand, not changing the bytesize" do str = @s.rb_str_buf_new(256, "abcd") - @s.rb_str_capacity(str).should == 256 @s.rb_str_set_len(str, 3) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 256 @s.rb_str_modify_expand(str, 4) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 7 @s.rb_str_modify_expand(str, 1024) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 1027 @s.rb_str_modify_expand(str, 1) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 4 end it "raises an error if the string is frozen" do @@ -106,14 +106,26 @@ VALUE rb_cSymbol; #define STR_SET_NOEMBED(str) do {\ FL_SET((str), STR_NOEMBED);\ - STR_SET_EMBED_LEN((str), 0);\ } while (0) #define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE)) -#define STR_SET_EMBED_LEN(str, n) do { \ long tmp_n = (n);\ RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\ RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\ } while (0) #define STR_SET_LEN(str, n) do { \ if (STR_EMBED_P(str)) {\ @@ -150,7 +162,7 @@ VALUE rb_cSymbol; } while (0) #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\ if (STR_EMBED_P(str)) {\ - if (!STR_EMBEDDABLE_P(capacity, termlen)) {\ char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\ const long tlen = RSTRING_LEN(str);\ memcpy(tmp, RSTRING_PTR(str), tlen);\ @@ -170,6 +182,8 @@ VALUE rb_cSymbol; #define STR_SET_SHARED(str, shared_str) do { \ if (!FL_TEST(str, STR_FAKESTR)) { \ RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \ FL_SET((str), STR_SHARED); \ FL_SET((shared_str), STR_SHARED_ROOT); \ @@ -193,8 +207,32 @@ VALUE rb_cSymbol; #define SHARABLE_SUBSTRING_P(beg, len, end) 1 #endif -#define STR_EMBEDDABLE_P(len, termlen) \ - ((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen)) static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str); static VALUE str_new_frozen(VALUE klass, VALUE orig); @@ -768,7 +806,11 @@ static size_t str_capacity(VALUE str, const int termlen) { if (STR_EMBED_P(str)) { return (RSTRING_EMBED_LEN_MAX + 1 - termlen); } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { return RSTRING(str)->as.heap.len; @@ -793,17 +835,36 @@ must_not_null(const char *ptr) } static inline VALUE -str_alloc(VALUE klass) { - NEWOBJ_OF(str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0)); return (VALUE)str; } static inline VALUE empty_str_alloc(VALUE klass) { RUBY_DTRACE_CREATE_HOOK(STRING, 0); - return str_alloc(klass); } static VALUE @@ -817,8 +878,14 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) RUBY_DTRACE_CREATE_HOOK(STRING, len); - str = str_alloc(klass); - if (!STR_EMBEDDABLE_P(len, termlen)) { RSTRING(str)->as.heap.aux.capa = len; /* :FIXME: @shyouhei guesses `len + termlen` is guaranteed to never * integer overflow. If we can STATIC_ASSERT that, the following @@ -827,9 +894,6 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen); STR_SET_NOEMBED(str); } - else if (len == 0) { - ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); - } if (ptr) { memcpy(RSTRING_PTR(str), ptr, len); } @@ -931,7 +995,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex) } else { RUBY_DTRACE_CREATE_HOOK(STRING, len); - str = str_alloc(klass); RSTRING(str)->as.heap.len = len; RSTRING(str)->as.heap.ptr = (char *)ptr; RSTRING(str)->as.heap.aux.capa = len; @@ -1228,8 +1292,8 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) long len; RSTRING_GETMEM(str, ptr, len); - if (STR_EMBEDDABLE_P(len, termlen)) { - char *ptr2 = RSTRING(str2)->as.embed.ary; STR_SET_EMBED(str2); memcpy(ptr2, RSTRING_PTR(str), len); STR_SET_EMBED_LEN(str2, len); @@ -1245,6 +1309,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) root = rb_str_new_frozen(str); RSTRING_GETMEM(root, ptr, len); } if (!STR_EMBED_P(str2) && !FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) { if (FL_TEST_RAW(str2, STR_SHARED_ROOT)) { rb_fatal("about to free a possible shared root"); @@ -1273,7 +1338,7 @@ str_replace_shared(VALUE str2, VALUE str) static VALUE str_new_shared(VALUE klass, VALUE str) { - return str_replace_shared(str_alloc(klass), str); } VALUE @@ -1336,25 +1401,53 @@ str_new_frozen(VALUE klass, VALUE orig) } static VALUE str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) { VALUE str; - if (STR_EMBED_P(orig)) { - str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig)); } else { if (FL_TEST_RAW(orig, STR_SHARED)) { VALUE shared = RSTRING(orig)->as.heap.aux.shared; - long ofs = RSTRING(orig)->as.heap.ptr - RSTRING(shared)->as.heap.ptr; - long rest = RSTRING(shared)->as.heap.len - ofs - RSTRING(orig)->as.heap.len; assert(!STR_EMBED_P(shared)); assert(OBJ_FROZEN(shared)); if ((ofs > 0) || (rest > 0) || (klass != RBASIC(shared)->klass) || ENCODING_GET(shared) != ENCODING_GET(orig)) { str = str_new_shared(klass, shared); RSTRING(str)->as.heap.ptr += ofs; RSTRING(str)->as.heap.len -= ofs + rest; } @@ -1364,24 +1457,15 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) return shared; } } - else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) { - str = str_alloc(klass); STR_SET_EMBED(str); memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig)); STR_SET_EMBED_LEN(str, RSTRING_LEN(orig)); TERM_FILL(RSTRING_END(str), TERM_LEN(orig)); } else { - str = str_alloc(klass); - STR_SET_NOEMBED(str); - RSTRING(str)->as.heap.len = RSTRING_LEN(orig); - RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig); - RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa; - RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; - RBASIC(orig)->flags &= ~STR_NOFREE; - STR_SET_SHARED(orig, str); - if (klass == 0) - FL_UNSET_RAW(str, STR_BORROWED); } } @@ -1405,17 +1489,24 @@ str_new_empty_String(VALUE str) } #define STR_BUF_MIN_SIZE 63 STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX); VALUE rb_str_buf_new(long capa) { - VALUE str = str_alloc(rb_cString); - if (capa <= RSTRING_EMBED_LEN_MAX) return str; if (capa < STR_BUF_MIN_SIZE) { capa = STR_BUF_MIN_SIZE; } FL_SET(str, STR_NOEMBED); RSTRING(str)->as.heap.aux.capa = capa; RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + 1); @@ -1508,7 +1599,7 @@ str_shared_replace(VALUE str, VALUE str2) str_discard(str); termlen = rb_enc_mbminlen(enc); - if (STR_EMBEDDABLE_P(RSTRING_LEN(str2), termlen)) { STR_SET_EMBED(str); memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen); STR_SET_EMBED_LEN(str, RSTRING_LEN(str2)); @@ -1516,6 +1607,21 @@ str_shared_replace(VALUE str, VALUE str2) ENC_CODERANGE_SET(str, cr); } else { STR_SET_NOEMBED(str); FL_UNSET(str, STR_SHARED); RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2); @@ -1581,42 +1687,77 @@ str_replace(VALUE str, VALUE str2) } static inline VALUE -ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass) { - RB_EC_NEWOBJ_OF(ec, str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0)); return (VALUE)str; } static inline VALUE str_duplicate_setup(VALUE klass, VALUE str, VALUE dup) { - enum {embed_size = RSTRING_EMBED_LEN_MAX + 1}; const VALUE flag_mask = RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK | - ENC_CODERANGE_MASK | ENCODING_MASK | FL_FREEZE ; VALUE flags = FL_TEST_RAW(str, flag_mask); int encidx = 0; - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, - char, embed_size); - if (flags & STR_NOEMBED) { if (FL_TEST_RAW(str, STR_SHARED)) { - str = RSTRING(str)->as.heap.aux.shared; } else if (UNLIKELY(!(flags & FL_FREEZE))) { - str = str_new_frozen(klass, str); flags = FL_TEST_RAW(str, flag_mask); - } - if (flags & STR_NOEMBED) { - RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, str); - flags |= STR_SHARED; - } - else { - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, - char, embed_size); - } } if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<<ENCODING_SHIFT)) { encidx = rb_enc_get_index(str); flags &= ~ENCODING_MASK; @@ -1629,14 +1770,28 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup) static inline VALUE ec_str_duplicate(struct rb_execution_context_struct *ec, VALUE klass, VALUE str) { - VALUE dup = ec_str_alloc(ec, klass); return str_duplicate_setup(klass, str, dup); } static inline VALUE str_duplicate(VALUE klass, VALUE str) { - VALUE dup = str_alloc(klass); return str_duplicate_setup(klass, str, dup); } @@ -1745,7 +1900,12 @@ rb_str_init(int argc, VALUE *argv, VALUE str) str_modifiable(str); if (STR_EMBED_P(str)) { /* make noembed always */ char *new_ptr = ALLOC_N(char, (size_t)capa + termlen); memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_EMBED_LEN_MAX + 1); RSTRING(str)->as.heap.ptr = new_ptr; } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { @@ -2133,7 +2293,7 @@ rb_str_times(VALUE str, VALUE times) return str_duplicate(rb_cString, str); } if (times == INT2FIX(0)) { - str2 = str_alloc(rb_cString); rb_enc_copy(str2, str); return str2; } @@ -2142,8 +2302,11 @@ rb_str_times(VALUE str, VALUE times) rb_raise(rb_eArgError, "negative argument"); } if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) { - str2 = str_alloc(rb_cString); - if (!STR_EMBEDDABLE_P(len, 1)) { RSTRING(str2)->as.heap.aux.capa = len; RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1); STR_SET_NOEMBED(str2); @@ -2244,11 +2407,11 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen) if (len > capa) len = capa; - if (!STR_EMBED_P(str) && STR_EMBEDDABLE_P(capa, termlen)) { ptr = RSTRING(str)->as.heap.ptr; STR_SET_EMBED(str); - memcpy(RSTRING(str)->as.embed.ary, ptr, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); STR_SET_EMBED_LEN(str, len); return; } @@ -2646,7 +2809,7 @@ rb_str_subseq(VALUE str, long beg, long len) } else { str2 = rb_str_new(RSTRING_PTR(str)+beg, len); - RB_GC_GUARD(str); } rb_enc_cr_str_copy_for_substr(str2, str); @@ -2885,19 +3048,19 @@ rb_str_resize(VALUE str, long len) const int termlen = TERM_LEN(str); if (STR_EMBED_P(str)) { if (len == slen) return str; - if (STR_EMBEDDABLE_P(len, termlen)) { STR_SET_EMBED_LEN(str, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); return str; } str_make_independent_expand(str, slen, len - slen, termlen); } - else if (STR_EMBEDDABLE_P(len, termlen)) { char *ptr = STR_HEAP_PTR(str); STR_SET_EMBED(str); if (slen > len) slen = len; - if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); STR_SET_EMBED_LEN(str, len); if (independent) ruby_xfree(ptr); return str; @@ -2925,7 +3088,9 @@ str_buf_cat(VALUE str, const char *ptr, long len) long capa, total, olen, off = -1; char *sptr; const int termlen = TERM_LEN(str); assert(termlen < RSTRING_EMBED_LEN_MAX + 1); /* < (LONG_MAX/2) */ RSTRING_GETMEM(str, sptr, olen); if (ptr >= sptr && ptr <= sptr + olen) { @@ -2934,8 +3099,8 @@ str_buf_cat(VALUE str, const char *ptr, long len) rb_str_modify(str); if (len == 0) return 0; if (STR_EMBED_P(str)) { - capa = RSTRING_EMBED_LEN_MAX + 1 - termlen; - sptr = RSTRING(str)->as.embed.ary; olen = RSTRING_EMBED_LEN(str); } else { @@ -4797,17 +4962,21 @@ rb_str_drop_bytes(VALUE str, long len) str_modifiable(str); if (len > olen) len = olen; nlen = olen - len; - if (STR_EMBEDDABLE_P(nlen, TERM_LEN(str))) { char *oldptr = ptr; int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE)); STR_SET_EMBED(str); STR_SET_EMBED_LEN(str, nlen); - ptr = RSTRING(str)->as.embed.ary; memmove(ptr, oldptr + len, nlen); if (fl == STR_NOEMBED) xfree(oldptr); } else { - if (!STR_SHARED_P(str)) rb_str_new_frozen(str); ptr = RSTRING(str)->as.heap.ptr += len; RSTRING(str)->as.heap.len = nlen; } @@ -10465,7 +10634,13 @@ rb_str_force_encoding(VALUE str, VALUE enc) static VALUE rb_str_b(VALUE str) { - VALUE str2 = str_alloc(rb_cString); str_replace_shared_without_enc(str2, str); ENC_CODERANGE_CLEAR(str2); return str2; @@ -4,13 +4,10 @@ require '-test-/string' require 'rbconfig/sizeof' class Test_StringCapacity < Test::Unit::TestCase - def capa(str) - Bug::String.capacity(str) - end - def test_capacity_embedded - size = RbConfig::SIZEOF['void*'] * 3 - 1 - assert_equal size, capa('foo') end def test_capacity_shared @@ -18,7 +15,8 @@ class Test_StringCapacity < Test::Unit::TestCase end def test_capacity_normal - assert_equal 128, capa('1'*128) end def test_s_new_capacity @@ -39,7 +37,10 @@ class Test_StringCapacity < Test::Unit::TestCase end def test_literal_capacity - s = "I am testing string literal capacity" assert_equal(s.length, capa(s)) end @@ -51,9 +52,27 @@ class Test_StringCapacity < Test::Unit::TestCase end def test_capacity_fstring - s = String.new("I am testing", capacity: 1000) s << "fstring capacity" s = -s assert_equal(s.length, capa(s)) end end @@ -3,13 +3,15 @@ require '-test-/string' class Test_RbStrDup < Test::Unit::TestCase def test_nested_shared_non_frozen - str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50)) assert_send([Bug::String, :shared_string?, str]) assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]') end def test_nested_shared_frozen - str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50).freeze) assert_send([Bug::String, :shared_string?, str]) assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]') end @@ -29,12 +29,12 @@ class TestObjSpace < Test::Unit::TestCase end def test_memsize_of_root_shared_string - a = "hello" * 5 b = a.dup c = nil ObjectSpace.each_object(String) {|x| break c = x if x == a and x.frozen?} rv_size = GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] - assert_equal([rv_size, rv_size, 26 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)}) end def test_argf_memsize @@ -3769,7 +3769,11 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self) rb_str_modify(output); if (NIL_P(output_bytesize_v)) { output_bytesize = RSTRING_EMBED_LEN_MAX; if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input)) output_bytesize = RSTRING_LEN(input); } |