diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | enc/big5.c | 6 | ||||
-rw-r--r-- | enc/euc_jp.c | 7 | ||||
-rw-r--r-- | enc/iso_8859_1.c | 2 | ||||
-rw-r--r-- | enc/iso_8859_10.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_13.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_14.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_15.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_16.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_2.c | 2 | ||||
-rw-r--r-- | enc/iso_8859_3.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_4.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_5.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_7.c | 4 | ||||
-rw-r--r-- | enc/iso_8859_9.c | 4 | ||||
-rw-r--r-- | enc/koi8_r.c | 4 | ||||
-rw-r--r-- | enc/koi8_u.c | 4 | ||||
-rw-r--r-- | enc/shift_jis.c | 7 | ||||
-rw-r--r-- | enc/unicode.c | 1 | ||||
-rw-r--r-- | enc/us_ascii.c | 6 | ||||
-rw-r--r-- | enc/utf_16_32.h | 2 | ||||
-rw-r--r-- | enc/utf_16be.c | 4 | ||||
-rw-r--r-- | enc/utf_16le.c | 4 | ||||
-rw-r--r-- | enc/utf_8.c | 4 | ||||
-rw-r--r-- | enc/windows_1251.c | 4 | ||||
-rw-r--r-- | include/ruby/oniguruma.h | 4 | ||||
-rw-r--r-- | regcomp.c | 127 | ||||
-rw-r--r-- | regenc.c | 64 | ||||
-rw-r--r-- | regenc.h | 14 | ||||
-rw-r--r-- | regexec.c | 7 | ||||
-rw-r--r-- | regint.h | 6 | ||||
-rw-r--r-- | regparse.c | 299 | ||||
-rw-r--r-- | regparse.h | 4 |
33 files changed, 390 insertions, 236 deletions
@@ -1,3 +1,7 @@ Mon Sep 15 16:21:10 2014 Eric Wong <[email protected]> * io.c (struct io_advise_struct): 32 => 24 bytes on 64-bit @@ -167,19 +167,19 @@ big5_mbc_enc_len0(const UChar* p, const UChar* e, int tridx, const int tbl[]) static int big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { - return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5); } static int big5_hkscs_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { - return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS); } static int big5_uao_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { - return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_UAO); } static OnigCodePoint @@ -293,7 +293,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, f, arg); } @@ -504,13 +504,14 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { UChar *s = p, *e = end; - const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s)); if (!prop) { return onigenc_minimum_property_name_to_ctype(enc, s, e); } - return (int)prop->ctype; } static int @@ -29,8 +29,6 @@ #include "regenc.h" -#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) - #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) @@ -208,7 +208,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -219,7 +219,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -210,7 +210,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -221,7 +221,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -29,8 +29,6 @@ #include "regenc.h" -#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) - #define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] #define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ ((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) @@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -194,7 +194,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, f, arg); } @@ -205,7 +205,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, p, end, items); } @@ -190,7 +190,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, f, arg); } @@ -201,7 +201,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, p, end, items); } @@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, f, arg); } @@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, flag, p, end, items); } @@ -183,7 +183,7 @@ koi8_r_apply_all_case_fold(OnigCaseFoldType flag, void* arg, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, f, arg); } @@ -193,7 +193,7 @@ koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, p, end, items); } @@ -187,7 +187,7 @@ koi8_u_apply_all_case_fold(OnigCaseFoldType flag, void* arg, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, f, arg); } @@ -197,7 +197,7 @@ koi8_u_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, p, end, items); } @@ -278,7 +278,7 @@ apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, f, arg); } @@ -493,13 +493,14 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { UChar *s = p, *e = end; - const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s)); if (!prop) { return onigenc_minimum_property_name_to_ctype(enc, s, e); } - return (int)prop->ctype; } static int @@ -141,7 +141,6 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) #include "enc/unicode/name2ctype.h" -#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) #define CODE_RANGES_NUM numberof(CodeRanges) extern int @@ -3,9 +3,9 @@ static int us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc) { - if (*p & 0x80) - return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); - return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); } OnigEncodingDefine(us_ascii, US_ASCII) = { @@ -1,5 +1,5 @@ #include "regenc.h" /* dummy for unsupported, statefull encoding */ -#define ENC_DUMMY_UNICODE(name) ENC_REPLICATE(name, name "BE") ENC_DUMMY_UNICODE("UTF-16"); ENC_DUMMY_UNICODE("UTF-32"); @@ -29,10 +29,6 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) -#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) - #if 0 static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -29,10 +29,6 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) -#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) - #if 0 static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -367,7 +367,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) static int mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, - const UChar* end, UChar* fold, OnigEncoding enc) { const UChar* p = *pp; @@ -395,7 +395,7 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, static int get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, - const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) { *sb_out = 0x80; return onigenc_unicode_ctype_code_range(ctype, ranges); @@ -167,7 +167,7 @@ cp1251_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, f, arg); } @@ -176,7 +176,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( - sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, flag, p, end, items); } @@ -39,8 +39,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 5 -#define ONIGURUMA_VERSION_MINOR 14 -#define ONIGURUMA_VERSION_TEENY 1 #ifdef __cplusplus # ifndef HAVE_S @@ -330,9 +330,10 @@ static int compile_tree(Node* node, regex_t* reg); (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) static int -select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case) { int op; if (ignore_case) { switch (str_len) { @@ -434,11 +435,11 @@ compile_tree_n_times(Node* node, int n, regex_t* reg) } static int -add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, regex_t* reg ARG_UNUSED, int ignore_case) { int len; - int op = select_str_opcode(mb_len, str_len, ignore_case); len = SIZE_OPCODE; @@ -446,15 +447,15 @@ add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, if (IS_NEED_STR_LEN_OP_EXACT(op)) len += SIZE_LENGTH; - len += mb_len * (int )str_len; return len; } static int -add_compile_string(UChar* s, int mb_len, OnigDistance str_len, regex_t* reg, int ignore_case) { - int op = select_str_opcode(mb_len, str_len, ignore_case); add_opcode(reg, op); if (op == OP_EXACTMBN) @@ -462,12 +463,12 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len, if (IS_NEED_STR_LEN_OP_EXACT(op)) { if (op == OP_EXACTN_IC) - add_length(reg, mb_len * str_len); else - add_length(reg, str_len); } - add_bytes(reg, s, mb_len * str_len); return 0; } @@ -475,7 +476,7 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len, static int compile_length_string_node(Node* node, regex_t* reg) { - int rlen, r, len, prev_len, slen, ambig; OnigEncoding enc = reg->enc; UChar *p, *prev; StrNode* sn; @@ -489,24 +490,24 @@ compile_length_string_node(Node* node, regex_t* reg) p = prev = sn->s; prev_len = enclen(enc, p, sn->end); p += prev_len; - slen = 1; rlen = 0; for (; p < sn->end; ) { len = enclen(enc, p, sn->end); - if (len == prev_len) { - slen++; } else { - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); rlen += r; prev = p; - slen = 1; prev_len = len; } p += len; } - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); rlen += r; return rlen; } @@ -523,7 +524,7 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg) static int compile_string_node(Node* node, regex_t* reg) { - int r, len, prev_len, slen, ambig; OnigEncoding enc = reg->enc; UChar *p, *prev, *end; StrNode* sn; @@ -538,25 +539,25 @@ compile_string_node(Node* node, regex_t* reg) p = prev = sn->s; prev_len = enclen(enc, p, end); p += prev_len; - slen = 1; for (; p < end; ) { len = enclen(enc, p, end); - if (len == prev_len) { - slen++; } else { - r = add_compile_string(prev, prev_len, slen, reg, ambig); if (r) return r; prev = p; - slen = 1; prev_len = len; } p += len; } - return add_compile_string(prev, prev_len, slen, reg, ambig); } static int @@ -2591,6 +2592,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) return 0; } else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { int is_word; if (NCTYPE(y)->ascii_range) @@ -3311,7 +3313,7 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg) qn->next_head_exact = n; } #endif - /* automatic possessivation a*b ==> (?>a*)b */ if (qn->lower <= 1) { int ttype = NTYPE(qn->target); if (IS_NODE_TYPE_SIMPLE(ttype)) { @@ -3433,26 +3435,39 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, } static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, int slen, UChar *end, regex_t* reg, Node **rnode) { - int r, i, j, len, varlen, varclen; Node *anode, *var_anode, *snode, *xnode, *an; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; *rnode = var_anode = NULL_NODE; varlen = 0; - varclen = 0; for (i = 0; i < item_num; i++) { if (items[i].byte_len != slen) { varlen = 1; break; } - if (items[i].code_len != 1) { - varclen |= 1; - } } if (varlen != 0) { @@ -3537,8 +3552,6 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], } } - if (varclen && !varlen) - return 2; return varlen; mem_err2: @@ -3582,7 +3595,8 @@ expand_case_fold_string(Node* node, regex_t* reg) len = enclen(reg->enc, p, end); - if (n == 0) { if (IS_NULL(snode)) { if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { top_root = root = onig_node_list_add(NULL_NODE, prev_node); @@ -3607,11 +3621,14 @@ expand_case_fold_string(Node* node, regex_t* reg) } else { alt_num *= (n + 1); - if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) { - varlen = 1; /* Assume that expanded strings are variable length. */ - break; - } if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { top_root = root = onig_node_list_add(NULL_NODE, prev_node); if (IS_NULL(root)) { @@ -3622,7 +3639,6 @@ expand_case_fold_string(Node* node, regex_t* reg) r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); if (r < 0) goto mem_err; - if (r > 0) varlen = 1; if (r == 1) { if (IS_NULL(root)) { top_root = prev_node; @@ -3636,7 +3652,7 @@ expand_case_fold_string(Node* node, regex_t* reg) root = NCAR(prev_node); } - else { /* r == 0 || r == 2 */ if (IS_NOT_NULL(root)) { if (IS_NULL(onig_node_list_add(root, prev_node))) { onig_node_free(prev_node); @@ -3650,6 +3666,12 @@ expand_case_fold_string(Node* node, regex_t* reg) p += len; } if (p < end) { Node *srem; @@ -3679,20 +3701,9 @@ expand_case_fold_string(Node* node, regex_t* reg) /* ending */ top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); - if (!varlen) { - /* When all expanded strings are same length, case-insensitive - BM search will be used. */ - r = update_string_node_case_fold(reg, node); - if (r == 0) { - NSTRING_SET_AMBIG(node); - } - } - else { - swap_node(node, top_root); - r = 0; - } onig_node_free(top_root); - return r; mem_err: r = ONIGERR_MEMORY; @@ -4367,7 +4378,7 @@ map_position_value(OnigEncoding enc, int i) 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 }; - if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) return 20; else @@ -4399,7 +4410,7 @@ distance_value(MinMaxLen* mm) if (mm->max == ONIG_INFINITE_DISTANCE) return 0; d = mm->max - mm->min; - if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) /* return dist_vals[d] * 16 / (mm->min + 12); */ return (int )dist_vals[d]; else @@ -4507,6 +4518,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, if (right_len == 0) { to->right_anchor |= left->right_anchor; } } static int @@ -5080,7 +5094,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: - case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */ add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; @@ -5103,7 +5118,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND_NOT: break; } @@ -5369,7 +5383,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | ANCHOR_LOOK_BEHIND); - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { reg->anchor_dmin = opt.len.min; @@ -414,9 +414,7 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigCodePoint code; int i, r; - for (i = 0; - i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); - i++) { code = OnigAsciiLowerMap[i].to; r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); if (r != 0) return r; @@ -431,8 +429,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, extern int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) { if (0x41 <= *p && *p <= 0x5a) { items[0].byte_len = 1; @@ -570,9 +568,10 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, extern int -onigenc_not_support_get_ctype_code_range(OnigCtype ctype, - OnigCodePoint* sb_out, const OnigCodePoint* ranges[], - OnigEncoding enc) { return ONIG_NO_SUPPORT_CONFIG; } @@ -589,7 +588,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc A /* for single byte encodings */ extern int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, - const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); @@ -633,28 +632,31 @@ extern int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) { if (code > 0xff) - rb_raise(rb_eRangeError, "%u out of char range", code); *buf = (UChar )(code & 0xff); return 1; } extern UChar* -onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s, - const UChar* end, OnigEncoding enc ARG_UNUSED) { return (UChar* )s; } extern int -onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return TRUE; } extern int -onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { return FALSE; @@ -716,7 +718,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, #if 0 extern int onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, - const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED) { const UChar* p = *pp; @@ -791,27 +793,27 @@ extern int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { static const PosixBracketEntryType PBS[] = { - PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM), - PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA), - PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK), - PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL), - PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT), - PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH), - PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER), - PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT), - PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT), - PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE), - PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER), - PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT), - PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII), - PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD), }; - const PosixBracketEntryType *pb, *pbe; int len; len = onigenc_strlen(enc, p, end); - for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) { if (len == pb->len && onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0) return pb->ctype; @@ -29,15 +29,18 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef REGINT_H #ifndef RUBY_EXTERN #include "ruby/config.h" #include "ruby/defines.h" #endif #ifdef ONIG_ESCAPE_UCHAR_COLLISION #undef ONIG_ESCAPE_UCHAR_COLLISION #endif -#endif #include "ruby/oniguruma.h" RUBY_SYMBOL_EXPORT_BEGIN @@ -104,7 +107,13 @@ typedef struct { short int len; } PosixBracketEntryType; -#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)} #define USE_CRNL_AS_LINE_TERMINATOR #define USE_UNICODE_PROPERTIES @@ -159,6 +168,7 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ OnigEncISO_8859_1_ToLowerCaseTable[c] @@ -1397,7 +1397,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(sstart - str)); #endif - STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; s = (UChar* )sstart; pkeep = (UChar* )sstart; @@ -1406,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (s) { UChar *q, *bp, buf[50]; int len; - fprintf(stderr, "%4d> \"", (*p == OP_FINISH) ? -1 : (int )(s - str)); bp = buf; if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ for (i = 0, q = s; i < 7 && q < end; i++) { @@ -1419,6 +1419,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, *bp = 0; fputs((char* )buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode); fprintf(stderr, "\n"); } @@ -4183,7 +4184,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, prev = s; s += enclen(reg->enc, s, end); - if ((reg->anchor & ANCHOR_LOOK_BEHIND) == 0) { while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) && s < range) { prev = s; @@ -193,6 +193,8 @@ extern pthread_mutex_t gOnigMutex; #define USE_UPPER_CASE_TABLE #else #define st_init_table onig_st_init_table #define st_init_table_with_size onig_st_init_table_with_size #define st_init_numtable onig_st_init_numtable @@ -213,8 +215,6 @@ extern pthread_mutex_t gOnigMutex; /* */ #define onig_st_is_member st_is_member -#define CHECK_INTERRUPT_IN_MATCH_AT - #endif #define STATE_CHECK_STRING_THRESHOLD_LEN 7 @@ -913,9 +913,7 @@ typedef struct { extern OnigOpInfoType OnigOpInfo[]; -#ifdef ONIG_DEBUG extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc)); -#endif #ifdef ONIG_DEBUG_STATISTICS extern void onig_statistics_init P_((void)); @@ -4153,17 +4153,15 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, } static int -add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env) { - int maxcode, ascii_range; int c, r; const OnigCodePoint *ranges; OnigCodePoint sb_out; OnigEncoding enc = env->enc; OnigOptionType option = env->option; - ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0); - r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { if (ascii_range) { @@ -4280,31 +4278,32 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env) } static int -parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) { #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 #define POSIX_BRACKET_NAME_MIN_LEN 4 static const PosixBracketEntryType PBS[] = { - { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } }; const PosixBracketEntryType *pb; int not, i, r; OnigCodePoint c; OnigEncoding enc = env->enc; UChar *p = *src; @@ -4319,17 +4318,25 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) goto not_posix_bracket; - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { p = (UChar* )onigenc_step(enc, p, end, pb->len); if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - r = add_ctype_to_cc(cc, pb->ctype, not, - IS_POSIX_BRACKET_ALL_RANGE(env->option), - env); if (r != 0) return r; PINC_S; PINC_S; *src = p; return 0; @@ -4386,6 +4393,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) return r; } static int parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) @@ -4399,11 +4408,15 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, ctype, 0, 1, env); if (r != 0) return r; if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); - return 0; } @@ -4421,7 +4434,8 @@ enum CCVALTYPE { }; static int -next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) { int r; @@ -4430,11 +4444,18 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; if (*state == CCS_VALUE && *type != CCV_CLASS) { - if (*type == CCV_SB) BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; } } @@ -4444,7 +4465,8 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, } static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, int* vs_israw, int v_israw, enum CCVALTYPE intype, enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) @@ -4453,11 +4475,18 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, switch (*state) { case CCS_VALUE: - if (*type == CCV_SB) BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; } break; @@ -4474,10 +4503,16 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } bitset_set_range(env, cc->bs, (int )*vs, (int )v); } else { r = add_code_range(&(cc->mbuf), env, *vs, v); if (r < 0) return r; } } else { @@ -4493,6 +4528,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); if (r < 0) return r; #if 0 } else @@ -4542,22 +4582,24 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, } static int -parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, neg, len, fetched, and_start; OnigCodePoint v, vs; UChar *p; Node* node; CClassNode *cc, *prev_cc; - CClassNode work_cc; enum CCSTATE state; enum CCVALTYPE val_type, in_type; int val_israw, in_israw; - prev_cc = (CClassNode* )NULL; - *np = NULL_NODE; r = fetch_token_in_cc(tok, src, end, env); if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { neg = 1; @@ -4581,6 +4623,16 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(node); cc = NCCLASS(node); and_start = 0; state = CCS_START; p = *src; @@ -4671,13 +4723,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); val_entry2: - r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, &state, env); if (r != 0) goto err; break; case TK_POSIX_BRACKET_OPEN: - r = parse_posix_bracket(cc, &p, end, env); if (r < 0) goto err; if (r == 1) { /* is not POSIX bracket */ CC_ESC_WARN(env, (UChar* )"["); @@ -4690,11 +4742,18 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, break; case TK_CHAR_TYPE: - r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env); if (r != 0) return r; next_class: - r = next_state_class(cc, &vs, &val_type, &state, env); if (r != 0) goto err; break; @@ -4704,8 +4763,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ctype = fetch_char_property_to_ctype(&p, end, env); if (ctype < 0) return ctype; - r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env); if (r != 0) return r; goto next_class; } break; @@ -4766,15 +4830,20 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CC_CC_OPEN: /* [ */ { - Node *anode; CClassNode* acc; - r = parse_char_class(&anode, tok, &p, end, env); if (r == 0) { acc = NCCLASS(anode); r = or_cclass(cc, acc, env); } onig_node_free(anode); if (r != 0) goto err; } break; @@ -4782,7 +4851,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CC_AND: /* && */ { if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, &val_type, &state, env); if (r != 0) goto err; } @@ -4794,12 +4863,23 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = and_cclass(prev_cc, cc, env); if (r != 0) goto err; bbuf_free(cc->mbuf); } else { prev_cc = cc; cc = &work_cc; } initialize_cclass(cc); } break; @@ -4822,7 +4902,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, &val_type, &state, env); if (r != 0) goto err; } @@ -4832,12 +4912,24 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (r != 0) goto err; bbuf_free(cc->mbuf); cc = prev_cc; } - if (neg != 0) NCCLASS_SET_NOT(cc); - else NCCLASS_CLEAR_NOT(cc); if (IS_NCCLASS_NOT(cc) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { int is_empty; @@ -4865,6 +4957,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, err: if (cc != NCCLASS(*np)) bbuf_free(cc->mbuf); return r; } @@ -5489,6 +5583,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) typedef struct { ScanEnv* env; CClassNode* cc; Node* alt_root; Node** ptail; } IApplyCaseFoldArg; @@ -5500,37 +5595,57 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], IApplyCaseFoldArg* iarg; ScanEnv* env; CClassNode* cc; BitSetRef bs; iarg = (IApplyCaseFoldArg* )arg; env = iarg->env; cc = iarg->cc; bs = cc->bs; if (to_len == 1) { int is_in = onig_is_code_in_cc(env->enc, from, cc); #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || (is_in == 0 && IS_NCCLASS_NOT(cc))) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - add_code_range0(&(cc->mbuf), env, *to, *to, 0); - } - else { - BITSET_SET_BIT(bs, *to); } } #else if (is_in != 0) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); - add_code_range0(&(cc->mbuf), env, *to, *to, 0); - } - else { - if (IS_NCCLASS_NOT(cc)) { - BITSET_CLEAR_BIT(bs, *to); } - else - BITSET_SET_BIT(bs, *to); } } #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ @@ -5574,6 +5689,35 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], } static int node_linebreak(Node** np, ScanEnv* env) { /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */ @@ -5658,7 +5802,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) np1 = node_new_cclass(); if (IS_NULL(np1)) goto err; cc1 = NCCLASS(np1); - r = add_ctype_to_cc(cc1, ctype, 0, 1, env); if (r != 0) goto err; NCCLASS_SET_NOT(cc1); @@ -5666,7 +5810,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) np2 = node_new_cclass(); if (IS_NULL(np2)) goto err; cc2 = NCCLASS(np2); - r = add_ctype_to_cc(cc2, ctype, 0, 1, env); if (r != 0) goto err; qn = node_new_quantifier(0, REPEAT_INFINITE, 0); @@ -6013,7 +6157,8 @@ parse_exp(Node** np, OnigToken* tok, int term, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env); if (r != 0) return r; if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); #ifdef USE_SHARED_CCLASS_TABLE @@ -6036,15 +6181,20 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_CC_OPEN: { CClassNode* cc; OnigCodePoint code; - r = parse_char_class(np, tok, src, end, env); - if (r != 0) return r; cc = NCCLASS(*np); if (is_onechar_cclass(cc, &code)) { onig_node_free(*np); *np = node_new_empty(); CHECK_NULL_RETURN_MEMERR(*np); r = node_str_cat_codepoint(*np, env->enc, code); @@ -6052,28 +6202,13 @@ parse_exp(Node** np, OnigToken* tok, int term, goto string_loop; } if (IS_IGNORECASE(env->option)) { - IApplyCaseFoldArg iarg; - - iarg.env = env; - iarg.cc = cc; - iarg.alt_root = NULL_NODE; - iarg.ptail = &(iarg.alt_root); - - r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, - i_apply_case_fold, &iarg); if (r != 0) { - onig_node_free(iarg.alt_root); return r; } - if (IS_NOT_NULL(iarg.alt_root)) { - Node* work = onig_node_new_alt(*np, iarg.alt_root); - if (IS_NULL(work)) { - onig_node_free(iarg.alt_root); - return ONIGERR_MEMORY; - } - *np = work; - } } } break; @@ -193,8 +193,8 @@ typedef struct { int type; int regnum; OnigOptionType option; - struct _Node* target; AbsAddrType call_addr; /* for multiple call reference */ OnigDistance min_len; /* min length (byte) */ OnigDistance max_len; /* max length (byte) */ @@ -296,10 +296,10 @@ typedef struct { UChar* error; UChar* error_end; regex_t* reg; /* for reg->names only */ - int num_call; #ifdef USE_SUBEXP_CALL UnsetAddrList* unset_addr_list; #endif int num_mem; #ifdef USE_NAMED_GROUP int num_named; |