diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-19 17:50:38 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-19 17:50:38 +0000 |
commit | 394d5dfa9ba625c99a1e6a411f81b628bfebd60a () | |
tree | 22a788ad3df769fa9c3b4f5f81efde5b1cad87a4 | |
parent | 57ffc79c4561b9249ef6b81101f1887f16f29e19 (diff) |
* string.c (str_scrub): add ruby method String#scrub which verify and
fix invalid byte sequence. * string.c (str_compat_and_valid): check given string is compatible and valid with given encoding. * transcode.c (str_transcode0): If invalid: :replace is specified for String#encode, replace invalid byte sequence even if the destination encoding equals to the source encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | NEWS | 8 | ||||
-rw-r--r-- | string.c | 267 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 34 | ||||
-rw-r--r-- | transcode.c | 17 |
5 files changed, 334 insertions, 4 deletions
@@ -1,3 +1,15 @@ Fri Apr 19 21:55:40 2013 Kouhei Sutou <[email protected]> * README.EXT.ja (Data_Wrap_Struct): Remove a description about @@ -26,6 +26,14 @@ with all sufficient information, see the ChangeLog file. * misc * Mutex#owned? is no longer experimental. * pack/unpack (Array/String) * Q! and q! directives for long long type if platform has the type. @@ -7741,6 +7741,272 @@ rb_str_ellipsize(VALUE str, long len) return ret; } /********************************************************************** * Document-class: Symbol * @@ -8226,6 +8492,7 @@ Init_String(void) rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1); rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2); rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1); rb_define_method(rb_cString, "to_i", rb_str_to_i, -1); rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); @@ -1489,4 +1489,38 @@ class TestM17N < Test::Unit::TestCase s.untrust assert_equal(true, s.b.untrusted?) end end @@ -2652,6 +2652,8 @@ str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2, return dencidx; } static int str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) { @@ -2686,6 +2688,17 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) ECONV_XML_ATTR_CONTENT_DECORATOR| ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) { if (senc && senc == denc) { return NIL_P(arg2) ? -1 : dencidx; } if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) { @@ -2815,10 +2828,6 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx); * in the source encoding. The last form by default does not raise * exceptions but uses replacement strings. * - * Please note that conversion from an encoding +enc+ to the - * same encoding +enc+ is a no-op, i.e. the receiver is returned without - * any changes, and no exceptions are raised, even if there are invalid bytes. - * * The +options+ Hash gives details for conversion and can have the following * keys: * |