summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-04-19 17:50:38 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-04-19 17:50:38 +0000
commit394d5dfa9ba625c99a1e6a411f81b628bfebd60a ()
tree22a788ad3df769fa9c3b4f5f81efde5b1cad87a4
parent57ffc79c4561b9249ef6b81101f1887f16f29e19 (diff)
* string.c (str_scrub): add ruby method String#scrub which verify and
fix invalid byte sequence. * string.c (str_compat_and_valid): check given string is compatible and valid with given encoding. * transcode.c (str_transcode0): If invalid: :replace is specified for String#encode, replace invalid byte sequence even if the destination encoding equals to the source encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog12
-rw-r--r--NEWS8
-rw-r--r--string.c267
-rw-r--r--test/ruby/test_m17n.rb34
-rw-r--r--transcode.c17
5 files changed, 334 insertions, 4 deletions
@@ -1,3 +1,15 @@
Fri Apr 19 21:55:40 2013 Kouhei Sutou <[email protected]>
* README.EXT.ja (Data_Wrap_Struct): Remove a description about
@@ -26,6 +26,14 @@ with all sufficient information, see the ChangeLog file.
* misc
* Mutex#owned? is no longer experimental.
* pack/unpack (Array/String)
* Q! and q! directives for long long type if platform has the type.
@@ -7741,6 +7741,272 @@ rb_str_ellipsize(VALUE str, long len)
return ret;
}
/**********************************************************************
* Document-class: Symbol
*
@@ -8226,6 +8492,7 @@ Init_String(void)
rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
@@ -1489,4 +1489,38 @@ class TestM17N < Test::Unit::TestCase
s.untrust
assert_equal(true, s.b.untrusted?)
end
end
@@ -2652,6 +2652,8 @@ str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2,
return dencidx;
}
static int
str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
{
@@ -2686,6 +2688,17 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
ECONV_XML_ATTR_CONTENT_DECORATOR|
ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) {
if (senc && senc == denc) {
return NIL_P(arg2) ? -1 : dencidx;
}
if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) {
@@ -2815,10 +2828,6 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx);
* in the source encoding. The last form by default does not raise
* exceptions but uses replacement strings.
*
- * Please note that conversion from an encoding +enc+ to the
- * same encoding +enc+ is a no-op, i.e. the receiver is returned without
- * any changes, and no exceptions are raised, even if there are invalid bytes.
- *
* The +options+ Hash gives details for conversion and can have the following
* keys:
*