summaryrefslogtreecommitdiff
path: root/enc/euc_jp.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-02-17 07:42:23 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-02-17 07:42:23 +0000
commit0424e152c684a85f4b0691f1e84aec203115333d ()
treeccb4af1c7fa12a09934f315edba0b9d839e57ceb /enc/euc_jp.c
parent5362e7fcddf70d16fc4261383f99175c90881353 (diff)
* Merge Onigmo-5.13.1. [ruby-dev:45057] [Feature #5820]
https://.com/k-takata/Onigmo cp reg{comp,enc,error,exec,parse,syntax}.c reg{enc,int,parse}.h cp oniguruma.h cp tool/enc-unicode.rb cp -r enc/ git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@34663 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--enc/euc_jp.c285
1 files changed, 262 insertions, 23 deletions
@@ -1,8 +1,9 @@
/**********************************************************************
- euc_jp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,7 +30,6 @@
#include "regint.h"
-
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
static const int EncLen_EUCJP[] = {
@@ -114,6 +114,97 @@ static const signed char trans[][0x100] = {
#undef A
#undef F
static int
mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
{
@@ -138,7 +229,7 @@ mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
int c, i, len;
OnigCodePoint n;
- len = enclen(enc, p, end);
n = (OnigCodePoint )*p++;
if (len == 1) return n;
@@ -154,10 +245,10 @@ static int
code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
{
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
- else if (code > 0xffffff)
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- else if (code & 0x800000) return 3;
- else if (code & 0x8000) return 2;
else
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
@@ -191,10 +282,87 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enclen(enc, buf, p) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
- return (int)(p - buf);
}
static int
@@ -202,7 +370,6 @@ mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* lower,
OnigEncoding enc)
{
- int len;
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
@@ -211,12 +378,11 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1;
}
else {
- int i;
- len = enclen(enc, p, end);
- for (i = 0; i < len; i++) {
- *lower++ = *p++;
- }
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
@@ -235,7 +401,7 @@ left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, Onig
p = s;
while (!eucjp_islead(*p) && p > start) p--;
- len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
@@ -260,15 +426,83 @@ static hash_table_type* PropertyNameTable;
static const OnigCodePoint CR_Hiragana[] = {
1,
0xa4a1, 0xa4f3
}; /* CR_Hiragana */
static const OnigCodePoint CR_Katakana[] = {
3,
0xa5a1, 0xa5f6,
- 0xaaa6, 0xaaaf,
- 0xaab1, 0xaadd
}; /* CR_Katakana */
static int
init_property_list(void)
@@ -277,6 +511,10 @@ init_property_list(void)
PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana);
PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana);
PropertyInited = 1;
end:
@@ -300,7 +538,7 @@ property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
return onigenc_minimum_property_name_to_ctype(enc, s, e);
}
- return (int)ctype;
}
static int
@@ -360,14 +598,15 @@ OnigEncodingDefine(euc_jp, EUC_JP) = {
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
- onigenc_ascii_apply_all_case_fold,
- onigenc_ascii_get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
- 0
};
/*
* Name: EUC-JP