diff options
-rw-r--r-- | regexec.c | 223 | ||||
-rw-r--r-- | test/ruby/test_regexp.rb | 12 |
2 files changed, 150 insertions, 85 deletions
@@ -258,18 +258,19 @@ We encode a match cache point to an integer value by the following equation: A bit-array for memoizing (recording) match cache points once backtracked. */ -/* count the total number of cache opcodes for allocating a match cache buffer. */ -static OnigPosition -count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) { - UChar* p = reg->p; - UChar* pend = p + reg->used; LengthType len; MemNumType repeat_mem; OnigEncoding enc = reg->enc; - MemNumType current_repeat_mem = -1; - long num_cache_opcodes = 0; - int lookaround_nesting = 0; while (p < pend) { switch (*p++) { @@ -402,7 +403,16 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) if (reg->repeat_range[repeat_mem].lower == 0) { num_cache_opcodes++; } - current_repeat_mem = repeat_mem; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: @@ -411,14 +421,7 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) // A lone or invalid OP_REPEAT_INC is found. goto impossible; } - { - OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; - if (repeat_range->lower < repeat_range->upper) { - num_cache_opcodes++; - } - current_repeat_mem = -1; - } - break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: goto impossible; @@ -438,7 +441,10 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) // A look-around nested in a atomic grouping is found. goto impossible; } - lookaround_nesting++; break; case OP_PUSH_POS_NOT: if (lookaround_nesting < 0) { @@ -446,7 +452,10 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) goto impossible; } p += SIZE_RELADDR; - lookaround_nesting++; break; case OP_PUSH_LOOK_BEHIND_NOT: if (lookaround_nesting < 0) { @@ -455,25 +464,26 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) } p += SIZE_RELADDR; p += SIZE_LENGTH; - lookaround_nesting++; - break; - case OP_POP_POS: - case OP_FAIL_POS: - case OP_FAIL_LOOK_BEHIND_NOT: - lookaround_nesting--; break; case OP_PUSH_STOP_BT: if (lookaround_nesting != 0) { // A nested atomic grouping is found. goto impossible; } - lookaround_nesting++; - lookaround_nesting *= -1; break; case OP_POP_STOP_BT: - lookaround_nesting *= -1; - lookaround_nesting--; - break; case OP_LOOK_BEHIND: p += SIZE_LENGTH; break; @@ -507,9 +517,15 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) } } *num_cache_opcodes_ptr = num_cache_opcodes; return 0; impossible: *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE; return 0; @@ -518,48 +534,49 @@ bytecode_error: return ONIGERR_UNDEFINED_BYTECODE; } -/* collect cache opcodes from the given regex program, and compute the total number of cache points. */ static OnigPosition -init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num_cache_points_ptr) { - UChar* pbegin; UChar* p = reg->p; - UChar* pend = p + reg->used; LengthType len; MemNumType repeat_mem; OnigEncoding enc = reg->enc; - MemNumType current_repeat_mem = -1; - long cache_point = 0; - long num_cache_points_at_repeat = 0; - int lookaround_nesting = 0; - const OnigCacheOpcode* cache_opcodes_begin = cache_opcodes; - OnigCacheOpcode* cache_opcodes_at_repeat = NULL; # define INC_CACHE_OPCODES do {\ cache_opcodes->addr = pbegin;\ - cache_opcodes->cache_point = cache_point - num_cache_points_at_repeat;\ cache_opcodes->outer_repeat_mem = current_repeat_mem;\ - cache_opcodes->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;\ cache_opcodes->num_cache_points_in_outer_repeat = 0;\ cache_opcodes->lookaround_nesting = lookaround_nesting;\ cache_point += lookaround_nesting != 0 ? 2 : 1;\ cache_opcodes++;\ } while (0) -# define INC_LOOKAROUND_NESTING lookaround_nesting++ -# define DEC_LOOKAROUND_NESTING do {\ - UChar* match_addr = p - 1;\ - OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes - 1;\ - while (\ - cache_opcodes_in_lookaround >= cache_opcodes_begin &&\ - cache_opcodes_in_lookaround->lookaround_nesting == lookaround_nesting\ - ) {\ - cache_opcodes_in_lookaround->match_addr = match_addr;\ - cache_opcodes_in_lookaround--;\ - }\ - lookaround_nesting--;\ - } while (0) - while (p < pend) { pbegin = p; switch (*p++) { @@ -692,33 +709,31 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num if (reg->repeat_range[repeat_mem].lower == 0) { INC_CACHE_OPCODES; } - current_repeat_mem = repeat_mem; - num_cache_points_at_repeat = cache_point; - cache_opcodes_at_repeat = cache_opcodes; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(repeat_mem, p); { - long num_cache_points_in_repeat = cache_point - num_cache_points_at_repeat; OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; if (repeat_range->lower < repeat_range->upper) { INC_CACHE_OPCODES; cache_point -= lookaround_nesting != 0 ? 2 : 1; } - cache_point -= num_cache_points_in_repeat; int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower; cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds; - OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes - 1; - while (cache_opcodes_at_repeat <= cache_opcodes_in_repeat) { cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat; - cache_opcodes_in_repeat--; } - current_repeat_mem = -1; - num_cache_points_at_repeat = 0; - cache_opcodes_at_repeat = NULL; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: goto unexpected_bytecode_error; @@ -734,33 +749,51 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num break; case OP_PUSH_POS: - INC_LOOKAROUND_NESTING; break; case OP_PUSH_POS_NOT: p += SIZE_RELADDR; - INC_LOOKAROUND_NESTING; - break; case OP_PUSH_LOOK_BEHIND_NOT: p += SIZE_RELADDR; p += SIZE_LENGTH; - INC_LOOKAROUND_NESTING; break; case OP_POP_POS: case OP_FAIL_POS: case OP_FAIL_LOOK_BEHIND_NOT: - DEC_LOOKAROUND_NESTING; - break; - case OP_PUSH_STOP_BT: - INC_LOOKAROUND_NESTING; - lookaround_nesting *= -1; - break; case OP_POP_STOP_BT: - lookaround_nesting *= -1; - DEC_LOOKAROUND_NESTING; - break; case OP_LOOK_BEHIND: p += SIZE_LENGTH; - break; case OP_ABSENT_END: case OP_ABSENT: @@ -790,15 +823,35 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num } } *num_cache_points_ptr = cache_point; return 0; unexpected_bytecode_error: return ONIGERR_UNEXPECTED_BYTECODE; bytecode_error: return ONIGERR_UNDEFINED_BYTECODE; } #else static OnigPosition count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes) @@ -2016,6 +2016,18 @@ class TestRegexp < Test::Unit::TestCase assert(/^(?:.+){2,4}?b|b/.match? "aaaabaa") end def test_linear_time_p assert_send [Regexp, :linear_time?, /a/] assert_send [Regexp, :linear_time?, 'a'] |