summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-02-04 12:31:19 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-02-04 12:31:19 +0000
commit086e235f035d44c3554cab364cd70644e537a919 ()
tree377844e954f01e7747b22c3acaabb23c23034baa /utf8.c
parentf0432871fd3f9489c87199700efa32a7597f135c (diff)
* oniguruma.h: merge Oniguruma 4.0.0 [ruby-dev:28290]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9885 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--utf8.c90
1 files changed, 58 insertions, 32 deletions
@@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
-static int EncLen_UTF8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -65,6 +65,29 @@ utf8_mbc_enc_len(const UChar* p)
return EncLen_UTF8[*p];
}
static OnigCodePoint
utf8_mbc_to_code(const UChar* p, const UChar* end)
{
@@ -307,16 +330,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
}
-static OnigCodePoint EmptyRange[] = { 0 };
-static OnigCodePoint SBAlnum[] = {
3,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlnum[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
411,
#else
@@ -738,13 +761,13 @@ static OnigCodePoint MBAlnum[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlnum */
-static OnigCodePoint SBAlpha[] = {
2,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlpha[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
394,
#else
@@ -1149,13 +1172,13 @@ static OnigCodePoint MBAlpha[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlpha */
-static OnigCodePoint SBBlank[] = {
2,
0x0009, 0x0009,
0x0020, 0x0020
};
-static OnigCodePoint MBBlank[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
7,
#else
@@ -1173,13 +1196,13 @@ static OnigCodePoint MBBlank[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBBlank */
-static OnigCodePoint SBCntrl[] = {
2,
0x0000, 0x001f,
0x007f, 0x007f
};
-static OnigCodePoint MBCntrl[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
18,
#else
@@ -1208,12 +1231,12 @@ static OnigCodePoint MBCntrl[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBCntrl */
-static OnigCodePoint SBDigit[] = {
1,
0x0030, 0x0039
};
-static OnigCodePoint MBDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
22,
#else
@@ -1245,12 +1268,12 @@ static OnigCodePoint MBDigit[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBDigit */
-static OnigCodePoint SBGraph[] = {
1,
0x0021, 0x007e
};
-static OnigCodePoint MBGraph[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
404,
#else
@@ -1665,12 +1688,12 @@ static OnigCodePoint MBGraph[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBGraph */
-static OnigCodePoint SBLower[] = {
1,
0x0061, 0x007a
};
-static OnigCodePoint MBLower[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
423,
#else
@@ -2104,13 +2127,13 @@ static OnigCodePoint MBLower[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBLower */
-static OnigCodePoint SBPrint[] = {
2,
0x0009, 0x000d,
0x0020, 0x007e
};
-static OnigCodePoint MBPrint[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
403,
#else
@@ -2524,7 +2547,7 @@ static OnigCodePoint MBPrint[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPrint */
-static OnigCodePoint SBPunct[] = {
9,
0x0021, 0x0023,
0x0025, 0x002a,
@@ -2537,7 +2560,7 @@ static OnigCodePoint SBPunct[] = {
0x007d, 0x007d
}; /* end of SBPunct */
-static OnigCodePoint MBPunct[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
77,
#else
@@ -2625,13 +2648,13 @@ static OnigCodePoint MBPunct[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPunct */
-static OnigCodePoint SBSpace[] = {
2,
0x0009, 0x000d,
0x0020, 0x0020
};
-static OnigCodePoint MBSpace[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
9,
#else
@@ -2651,12 +2674,12 @@ static OnigCodePoint MBSpace[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBSpace */
-static OnigCodePoint SBUpper[] = {
1,
0x0041, 0x005a
};
-static OnigCodePoint MBUpper[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
420,
#else
@@ -3087,19 +3110,19 @@ static OnigCodePoint MBUpper[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBUpper */
-static OnigCodePoint SBXDigit[] = {
3,
0x0030, 0x0039,
0x0041, 0x0046,
0x0061, 0x0066
};
-static OnigCodePoint SBASCII[] = {
1,
0x0000, 0x007f
};
-static OnigCodePoint SBWord[] = {
4,
0x0030, 0x0039,
0x0041, 0x005a,
@@ -3107,7 +3130,7 @@ static OnigCodePoint SBWord[] = {
0x0061, 0x007a
};
-static OnigCodePoint MBWord[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
432,
#else
@@ -3554,7 +3577,7 @@ static OnigCodePoint MBWord[] = {
static int
utf8_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
{
#define CR_SET(sbl,mbl) do { \
*sbr = sbl; \
@@ -3622,7 +3645,7 @@ static int
utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- OnigCodePoint *range;
#endif
if (code < 256) {
@@ -3674,6 +3697,9 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
case ONIGENC_CTYPE_ALNUM:
range = MBAlnum;
break;
default:
return ONIGENCERR_TYPE_BUG;
@@ -3723,7 +3749,7 @@ OnigEncodingType OnigEncodingUTF8 = {
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- onigenc_is_mbc_newline_0x0a,
utf8_mbc_to_code,
utf8_code_to_mbclen,
utf8_code_to_mbc,