diff options
author | Peter Zhu <[email protected]> | 2024-07-25 15:28:25 -0400 |
---|---|---|
committer | Peter Zhu <[email protected]> | 2024-07-26 08:42:38 -0400 |
commit | 7464514ca5e825ada31522b63a33e02d62a0a248 () | |
tree | e27e1fe961c8cfc3b03ec103f30fb41af0c3a7c5 /re.c | |
parent | 67e1ea002857b5c2115cd362f9230f3f4b432223 (diff) |
Fix memory in String#start_with? when regexp times out
[Bug #20653] This commit refactors how Onigmo handles timeout. Instead of raising a timeout error, onig_search will return a ONIGERR_TIMEOUT which the caller can free memory, and then raise a timeout error. This fixes a memory in String#start_with when the regexp times out. For example: regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001) str = "a" * 1000000 + "x" 10.times do 100.times do str.start_with?(regex) rescue end puts `ps -o rss= -p #{$$}` end Before: 33216 51936 71152 81728 97152 103248 120384 133392 133520 133616 After: 14912 15376 15824 15824 16128 16128 16144 16144 16160 16160
Notes: Merged: https://.com/ruby/ruby/pull/11247
-rw-r--r-- | re.c | 63 |
1 files changed, 15 insertions, 48 deletions
@@ -1719,10 +1719,16 @@ rb_reg_onig_match(VALUE re, VALUE str, if (result < 0) { onig_region_free(regs, 0); - if (result != ONIG_MISMATCH) { onig_errmsg_buffer err = ""; onig_error_code_to_str((UChar*)err, (int)result); rb_reg_raise(err, re); } } @@ -1783,23 +1789,6 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p ONIG_OPTION_NONE); } -struct rb_reg_onig_match_args { - VALUE re; - VALUE str; - struct reg_onig_search_args args; - struct re_registers regs; - - OnigPosition result; -}; - -static VALUE -rb_reg_onig_match_try(VALUE value_args) -{ - struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args; - args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs); - return Qnil; -} - /* returns byte offset */ static long rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match) @@ -1810,38 +1799,22 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back return -1; } - struct rb_reg_onig_match_args args = { - .re = re, - .str = str, - .args = { - .pos = pos, - .range = reverse ? 0 : len, - }, - .regs = {0} }; - /* If there is a timeout set, then rb_reg_onig_match could raise a - * Regexp::TimeoutError so we want to protect it from ing memory. */ - if (rb_reg_match_time_limit) { - int state; - rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state); - if (state) { - onig_region_free(&args.regs, false); - rb_jump_tag(state); - } - } - else { - rb_reg_onig_match_try((VALUE)&args); - } - if (args.result == ONIG_MISMATCH) { rb_backref_set(Qnil); return ONIG_MISMATCH; } VALUE match = match_alloc(rb_cMatch); rb_matchext_t *rm = RMATCH_EXT(match); - rm->regs = args.regs; if (set_backref_str) { RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str)); @@ -1858,7 +1831,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back rb_backref_set(match); if (set_match) *set_match = match; - return args.result; } long @@ -4720,12 +4693,6 @@ rb_reg_timeout_p(regex_t *reg, void *end_time_) return false; } -void -rb_reg_raise_timeout(void) -{ - rb_raise(rb_eRegexpTimeoutError, "regexp match timeout"); -} - /* * call-seq: * Regexp.timeout -> float or nil |