diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-02-17 07:42:23 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-02-17 07:42:23 +0000 |
commit | 0424e152c684a85f4b0691f1e84aec203115333d () | |
tree | ccb4af1c7fa12a09934f315edba0b9d839e57ceb /enc/euc_jp.c | |
parent | 5362e7fcddf70d16fc4261383f99175c90881353 (diff) |
* Merge Onigmo-5.13.1. [ruby-dev:45057] [Feature #5820]
https://.com/k-takata/Onigmo cp reg{comp,enc,error,exec,parse,syntax}.c reg{enc,int,parse}.h cp oniguruma.h cp tool/enc-unicode.rb cp -r enc/ git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@34663 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | enc/euc_jp.c | 285 |
1 files changed, 262 insertions, 23 deletions
@@ -1,8 +1,9 @@ /********************************************************************** - euc_jp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,7 +30,6 @@ #include "regint.h" - #define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) static const int EncLen_EUCJP[] = { @@ -114,6 +114,97 @@ static const signed char trans[][0x100] = { #undef A #undef F static int mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { @@ -138,7 +229,7 @@ mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) int c, i, len; OnigCodePoint n; - len = enclen(enc, p, end); n = (OnigCodePoint )*p++; if (len == 1) return n; @@ -154,10 +245,10 @@ static int code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) { if (ONIGENC_IS_CODE_ASCII(code)) return 1; - else if (code > 0xffffff) - return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - else if (code & 0x800000) return 3; - else if (code & 0x8000) return 2; else return ONIGERR_INVALID_CODE_POINT_VALUE; } @@ -191,10 +282,87 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) *p++ = (UChar )(code & 0xff); #if 1 - if (enclen(enc, buf, p) != (p - buf)) return ONIGERR_INVALID_CODE_POINT_VALUE; #endif - return (int)(p - buf); } static int @@ -202,7 +370,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* lower, OnigEncoding enc) { - int len; const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { @@ -211,12 +378,11 @@ mbc_case_fold(OnigCaseFoldType flag, return 1; } else { - int i; - len = enclen(enc, p, end); - for (i = 0; i < len; i++) { - *lower++ = *p++; - } (*pp) += len; return len; /* return byte length of converted char to lower */ } @@ -235,7 +401,7 @@ left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, Onig p = s; while (!eucjp_islead(*p) && p > start) p--; - len = enclen(enc, p, end); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); @@ -260,15 +426,83 @@ static hash_table_type* PropertyNameTable; static const OnigCodePoint CR_Hiragana[] = { 1, 0xa4a1, 0xa4f3 }; /* CR_Hiragana */ static const OnigCodePoint CR_Katakana[] = { 3, 0xa5a1, 0xa5f6, - 0xaaa6, 0xaaaf, - 0xaab1, 0xaadd }; /* CR_Katakana */ static int init_property_list(void) @@ -277,6 +511,10 @@ init_property_list(void) PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana); PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana); PropertyInited = 1; end: @@ -300,7 +538,7 @@ property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) return onigenc_minimum_property_name_to_ctype(enc, s, e); } - return (int)ctype; } static int @@ -360,14 +598,15 @@ OnigEncodingDefine(euc_jp, EUC_JP) = { code_to_mbclen, code_to_mbc, mbc_case_fold, - onigenc_ascii_apply_all_case_fold, - onigenc_ascii_get_case_fold_codes_by_str, property_name_to_ctype, is_code_ctype, get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, - 0 }; /* * Name: EUC-JP |