summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regexec.c223
-rw-r--r--test/ruby/test_regexp.rb12
2 files changed, 150 insertions, 85 deletions
@@ -258,18 +258,19 @@ We encode a match cache point to an integer value by the following equation:
A bit-array for memoizing (recording) match cache points once backtracked.
*/
-/* count the total number of cache opcodes for allocating a match cache buffer. */
-static OnigPosition
-count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
{
- UChar* p = reg->p;
- UChar* pend = p + reg->used;
LengthType len;
MemNumType repeat_mem;
OnigEncoding enc = reg->enc;
- MemNumType current_repeat_mem = -1;
- long num_cache_opcodes = 0;
- int lookaround_nesting = 0;
while (p < pend) {
switch (*p++) {
@@ -402,7 +403,16 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
if (reg->repeat_range[repeat_mem].lower == 0) {
num_cache_opcodes++;
}
- current_repeat_mem = repeat_mem;
break;
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
@@ -411,14 +421,7 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
// A lone or invalid OP_REPEAT_INC is found.
goto impossible;
}
- {
- OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
- if (repeat_range->lower < repeat_range->upper) {
- num_cache_opcodes++;
- }
- current_repeat_mem = -1;
- }
- break;
case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG:
goto impossible;
@@ -438,7 +441,10 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
// A look-around nested in a atomic grouping is found.
goto impossible;
}
- lookaround_nesting++;
break;
case OP_PUSH_POS_NOT:
if (lookaround_nesting < 0) {
@@ -446,7 +452,10 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
goto impossible;
}
p += SIZE_RELADDR;
- lookaround_nesting++;
break;
case OP_PUSH_LOOK_BEHIND_NOT:
if (lookaround_nesting < 0) {
@@ -455,25 +464,26 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
}
p += SIZE_RELADDR;
p += SIZE_LENGTH;
- lookaround_nesting++;
- break;
- case OP_POP_POS:
- case OP_FAIL_POS:
- case OP_FAIL_LOOK_BEHIND_NOT:
- lookaround_nesting--;
break;
case OP_PUSH_STOP_BT:
if (lookaround_nesting != 0) {
// A nested atomic grouping is found.
goto impossible;
}
- lookaround_nesting++;
- lookaround_nesting *= -1;
break;
case OP_POP_STOP_BT:
- lookaround_nesting *= -1;
- lookaround_nesting--;
- break;
case OP_LOOK_BEHIND:
p += SIZE_LENGTH;
break;
@@ -507,9 +517,15 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
}
}
*num_cache_opcodes_ptr = num_cache_opcodes;
return 0;
impossible:
*num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
return 0;
@@ -518,48 +534,49 @@ bytecode_error:
return ONIGERR_UNDEFINED_BYTECODE;
}
-/* collect cache opcodes from the given regex program, and compute the total number of cache points. */
static OnigPosition
-init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num_cache_points_ptr)
{
- UChar* pbegin;
UChar* p = reg->p;
- UChar* pend = p + reg->used;
LengthType len;
MemNumType repeat_mem;
OnigEncoding enc = reg->enc;
- MemNumType current_repeat_mem = -1;
- long cache_point = 0;
- long num_cache_points_at_repeat = 0;
- int lookaround_nesting = 0;
- const OnigCacheOpcode* cache_opcodes_begin = cache_opcodes;
- OnigCacheOpcode* cache_opcodes_at_repeat = NULL;
# define INC_CACHE_OPCODES do {\
cache_opcodes->addr = pbegin;\
- cache_opcodes->cache_point = cache_point - num_cache_points_at_repeat;\
cache_opcodes->outer_repeat_mem = current_repeat_mem;\
- cache_opcodes->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;\
cache_opcodes->num_cache_points_in_outer_repeat = 0;\
cache_opcodes->lookaround_nesting = lookaround_nesting;\
cache_point += lookaround_nesting != 0 ? 2 : 1;\
cache_opcodes++;\
} while (0)
-# define INC_LOOKAROUND_NESTING lookaround_nesting++
-# define DEC_LOOKAROUND_NESTING do {\
- UChar* match_addr = p - 1;\
- OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes - 1;\
- while (\
- cache_opcodes_in_lookaround >= cache_opcodes_begin &&\
- cache_opcodes_in_lookaround->lookaround_nesting == lookaround_nesting\
- ) {\
- cache_opcodes_in_lookaround->match_addr = match_addr;\
- cache_opcodes_in_lookaround--;\
- }\
- lookaround_nesting--;\
- } while (0)
-
while (p < pend) {
pbegin = p;
switch (*p++) {
@@ -692,33 +709,31 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num
if (reg->repeat_range[repeat_mem].lower == 0) {
INC_CACHE_OPCODES;
}
- current_repeat_mem = repeat_mem;
- num_cache_points_at_repeat = cache_point;
- cache_opcodes_at_repeat = cache_opcodes;
- break;
- case OP_REPEAT_INC:
- case OP_REPEAT_INC_NG:
- GET_MEMNUM_INC(repeat_mem, p);
{
- long num_cache_points_in_repeat = cache_point - num_cache_points_at_repeat;
OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
if (repeat_range->lower < repeat_range->upper) {
INC_CACHE_OPCODES;
cache_point -= lookaround_nesting != 0 ? 2 : 1;
}
- cache_point -= num_cache_points_in_repeat;
int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
- OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes - 1;
- while (cache_opcodes_at_repeat <= cache_opcodes_in_repeat) {
cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
- cache_opcodes_in_repeat--;
}
- current_repeat_mem = -1;
- num_cache_points_at_repeat = 0;
- cache_opcodes_at_repeat = NULL;
}
break;
case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG:
goto unexpected_bytecode_error;
@@ -734,33 +749,51 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num
break;
case OP_PUSH_POS:
- INC_LOOKAROUND_NESTING;
break;
case OP_PUSH_POS_NOT:
p += SIZE_RELADDR;
- INC_LOOKAROUND_NESTING;
- break;
case OP_PUSH_LOOK_BEHIND_NOT:
p += SIZE_RELADDR;
p += SIZE_LENGTH;
- INC_LOOKAROUND_NESTING;
break;
case OP_POP_POS:
case OP_FAIL_POS:
case OP_FAIL_LOOK_BEHIND_NOT:
- DEC_LOOKAROUND_NESTING;
- break;
- case OP_PUSH_STOP_BT:
- INC_LOOKAROUND_NESTING;
- lookaround_nesting *= -1;
- break;
case OP_POP_STOP_BT:
- lookaround_nesting *= -1;
- DEC_LOOKAROUND_NESTING;
- break;
case OP_LOOK_BEHIND:
p += SIZE_LENGTH;
- break;
case OP_ABSENT_END:
case OP_ABSENT:
@@ -790,15 +823,35 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num
}
}
*num_cache_points_ptr = cache_point;
return 0;
unexpected_bytecode_error:
return ONIGERR_UNEXPECTED_BYTECODE;
bytecode_error:
return ONIGERR_UNDEFINED_BYTECODE;
}
#else
static OnigPosition
count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes)
@@ -2016,6 +2016,18 @@ class TestRegexp < Test::Unit::TestCase
assert(/^(?:.+){2,4}?b|b/.match? "aaaabaa")
end
def test_linear_time_p
assert_send [Regexp, :linear_time?, /a/]
assert_send [Regexp, :linear_time?, 'a']