summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorTSUYUSATO Kitsune <[email protected]>2022-10-03 22:21:56 +0900
committerYusuke Endoh <[email protected]>2022-11-09 23:21:26 +0900
commit881bf9a0b8b52c05a5917b95d988ae4b9a391a47 ()
treef5b724175cd10ee5dab64496e3f12112b900054e /regexec.c
parent230267d1a8f2b8245e911513926c06299ddeebc8 (diff)
Implement cache optimization for regexp matching
-rw-r--r--regexec.c469
1 files changed, 468 insertions, 1 deletions
@@ -231,6 +231,404 @@ onig_get_capture_tree(OnigRegion* region)
}
#endif /* USE_CAPTURE_HISTORY */
extern void
onig_region_clear(OnigRegion* region)
{
@@ -686,6 +1084,22 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
#define STACK_PUSH_REPEAT(id, pat) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT;\
@@ -1448,6 +1862,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
UChar *s, *q, *sbegin;
UChar *p = reg->p;
UChar *pkeep;
char *alloca_base;
char *xmalloc_base = NULL;
@@ -1461,6 +1876,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
unsigned char* state_check_buff = msa->state_check_buff;
int num_comb_exp_check = reg->num_comb_exp_check;
#endif
#if USE_TOKEN_THREADED_VM
# define OP_OFFSET 1
@@ -1469,7 +1892,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
# define DEFAULT L_DEFAULT:
# define NEXT sprev = sbegin; JUMP
-# define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
RB_GNUC_EXTENSION static const void *oplabels[] = {
&&L_OP_FINISH, /* matching process terminator (no more alternative) */
@@ -1645,6 +2068,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
# define VM_LOOP \
while (1) { \
OPCODE_EXEC_HOOK; \
sbegin = s; \
switch (*p++) {
# define VM_LOOP_END } sprev = sbegin; }
@@ -2843,6 +3267,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_PUSH) MOP_IN(OP_PUSH);
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
MOP_OUT;
JUMP;
@@ -3173,6 +3598,40 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = stk->u.state.pstr_prev;
pkeep = stk->u.state.pkeep;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (stk->u.state.state_check != 0) {
stk->type = STK_STATE_CHECK_MARK;
@@ -3191,23 +3650,31 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
finish:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return best_len;
#ifdef ONIG_DEBUG
stack_error:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return ONIGERR_STACK_BUG;
#endif
bytecode_error:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return ONIGERR_UNDEFINED_BYTECODE;
unexpected_bytecode_error:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return ONIGERR_UNEXPECTED_BYTECODE;
}