diff options
author | Hiroshi SHIBATA <[email protected]> | 2025-05-08 19:08:11 +0900 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2025-05-09 14:27:28 +0900 |
commit | 8a1d45144bc30aaca67d953521c19c7ee0b378c0 () | |
tree | aa7928bb3f680528b933eb896e8f0760276cd12d | |
parent | c66768376815878e9d103e80135b37cb4058fa8b (diff) |
Support `require 'cgi/escape'` with extracting CGI::Escape from CGI::Util
Notes: Merged: https://.com/ruby/ruby/pull/13275
-rw-r--r-- | ext/cgi/escape/escape.c | 28 | ||||
-rw-r--r-- | lib/cgi.rb | 1 | ||||
-rw-r--r-- | lib/cgi/core.rb | 8 | ||||
-rw-r--r-- | lib/cgi/escape.rb | 224 | ||||
-rw-r--r-- | lib/cgi/util.rb | 216 | ||||
-rw-r--r-- | test/cgi/test_cgi_util.rb | 14 |
6 files changed, 251 insertions, 240 deletions
@@ -8,7 +8,7 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[]; #define upper_hexdigits (ruby_hexdigits+16) #define char_to_number(c) ruby_digit36_to_number_table[(unsigned char)(c)] -static VALUE rb_cCGI, rb_mUtil, rb_mEscape; static ID id_accept_charset; #define HTML_ESCAPE_MAX_LEN 6 @@ -471,17 +471,17 @@ Init_escape(void) void InitVM_escape(void) { - rb_cCGI = rb_define_class("CGI", rb_cObject); - rb_mEscape = rb_define_module_under(rb_cCGI, "Escape"); - rb_mUtil = rb_define_module_under(rb_cCGI, "Util"); - rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); - rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); - rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1); - rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent"); - rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); - rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent"); - rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); - rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1); - rb_prepend_module(rb_mUtil, rb_mEscape); - rb_extend_object(rb_cCGI, rb_mEscape); } @@ -294,4 +294,5 @@ end require 'cgi/core' require 'cgi/cookie' require 'cgi/util' CGI.autoload(:HtmlExtension, 'cgi/html') @@ -4,12 +4,12 @@ # generating HTTP responses. #++ class CGI - unless const_defined?(:Util) - module Util @@accept_charset = "UTF-8" # :nodoc: end - include Util - extend Util end $CGI_ENV = ENV # for FCGI support @@ -0,0 +1,224 @@ @@ -4,220 +4,8 @@ class CGI include Util extend Util end -module CGI::Util - @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) - - # URL-encode a string into application/x-www-form-urlencoded. - # Space characters (+" "+) are encoded with plus signs (+"+"+) - # url_encoded_string = CGI.escape("'Stop!' said Fred") - # # => "%27Stop%21%27+said+Fred" - def escape(string) - encoding = string.encoding - buffer = string.b - buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| - '%' + m.unpack('H2' * m.bytesize).join('%').upcase - end - buffer.tr!(' ', '+') - buffer.force_encoding(encoding) - end - - # URL-decode an application/x-www-form-urlencoded string with encoding(optional). - # string = CGI.unescape("%27Stop%21%27+said+Fred") - # # => "'Stop!' said Fred" - def unescape(string, encoding = @@accept_charset) - str = string.tr('+', ' ') - str = str.b - str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| - [m.delete('%')].pack('H*') - end - str.force_encoding(encoding) - str.valid_encoding? ? str : str.force_encoding(string.encoding) - end - - # URL-encode a string following RFC 3986 - # Space characters (+" "+) are encoded with (+"%20"+) - # url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred") - # # => "%27Stop%21%27%20said%20Fred" - def escapeURIComponent(string) - encoding = string.encoding - buffer = string.b - buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| - '%' + m.unpack('H2' * m.bytesize).join('%').upcase - end - buffer.force_encoding(encoding) - end - alias escape_uri_component escapeURIComponent - - # URL-decode a string following RFC 3986 with encoding(optional). - # string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred") - # # => "'Stop!'+said Fred" - def unescapeURIComponent(string, encoding = @@accept_charset) - str = string.b - str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| - [m.delete('%')].pack('H*') - end - str.force_encoding(encoding) - str.valid_encoding? ? str : str.force_encoding(string.encoding) - end - - alias unescape_uri_component unescapeURIComponent - - # The set of special characters and their escaped values - TABLE_FOR_ESCAPE_HTML__ = { - "'" => ''', - '&' => '&', - '"' => '"', - '<' => '<', - '>' => '>', - } - - # Escape special characters in HTML, namely '&\"<> - # CGI.escapeHTML('Usage: foo "bar" <baz>') - # # => "Usage: foo "bar" <baz>" - def escapeHTML(string) - enc = string.encoding - unless enc.ascii_compatible? - if enc.dummy? - origenc = enc - enc = Encoding::Converter.asciicompat_encoding(enc) - string = enc ? string.encode(enc) : string.b - end - table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}] - string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table) - string.encode!(origenc) if origenc - string - else - string = string.b - string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) - string.force_encoding(enc) - end - end - - # TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there - unless RUBY_ENGINE == 'truffleruby' - begin - require 'cgi/escape' - rescue LoadError - end - end - - # Unescape a string that has been HTML-escaped - # CGI.unescapeHTML("Usage: foo "bar" <baz>") - # # => "Usage: foo \"bar\" <baz>" - def unescapeHTML(string) - enc = string.encoding - unless enc.ascii_compatible? - if enc.dummy? - origenc = enc - enc = Encoding::Converter.asciicompat_encoding(enc) - string = enc ? string.encode(enc) : string.b - end - string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do - case $1.encode(Encoding::US_ASCII) - when 'apos' then "'".encode(enc) - when 'amp' then '&'.encode(enc) - when 'quot' then '"'.encode(enc) - when 'gt' then '>'.encode(enc) - when 'lt' then '<'.encode(enc) - when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) - when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) - end - end - string.encode!(origenc) if origenc - return string - end - return string unless string.include? '&' - charlimit = case enc - when Encoding::UTF_8; 0x10ffff - when Encoding::ISO_8859_1; 256 - else 128 - end - string = string.b - string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do - match = $1.dup - case match - when 'apos' then "'" - when 'amp' then '&' - when 'quot' then '"' - when 'gt' then '>' - when 'lt' then '<' - when /\A#0*(\d+)\z/ - n = $1.to_i - if n < charlimit - n.chr(enc) - else - "&##{$1};" - end - when /\A#x([0-9a-f]+)\z/i - n = $1.hex - if n < charlimit - n.chr(enc) - else - "&#x#{$1};" - end - else - "&#{match};" - end - end - string.force_encoding enc - end - - # Synonym for CGI.escapeHTML(str) - alias escape_html escapeHTML - - # Synonym for CGI.unescapeHTML(str) - alias unescape_html unescapeHTML - - # Escape only the tags of certain HTML elements in +string+. - # - # Takes an element or elements or array of elements. Each element - # is specified by the name of the element, without angle brackets. - # This matches both the start and the end tag of that element. - # The attribute list of the open tag will also be escaped (for - # instance, the double-quotes surrounding attribute values). - # - # print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG") - # # "<BR><A HREF="url"></A>" - # - # print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"]) - # # "<BR><A HREF="url"></A>" - def escapeElement(string, *elements) - elements = elements[0] if elements[0].kind_of?(Array) - unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do - CGI.escapeHTML($&) - end - else - string - end - end - - # Undo escaping such as that done by CGI.escapeElement() - # - # print CGI.unescapeElement( - # CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG") - # # "<BR><A HREF="url"></A>" - # - # print CGI.unescapeElement( - # CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"]) - # # "<BR><A HREF="url"></A>" - def unescapeElement(string, *elements) - elements = elements[0] if elements[0].kind_of?(Array) - unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:>)?/im) do - unescapeHTML($&) - end - else - string - end - end - - # Synonym for CGI.escapeElement(str) - alias escape_element escapeElement - - # Synonym for CGI.unescapeElement(str) - alias unescape_element unescapeElement # Format a +Time+ object as a String using the format specified by RFC 1123. # # CGI.rfc1123_date(Time.now) @@ -253,6 +41,4 @@ module CGI::Util end lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') end - - alias h escapeHTML end @@ -6,7 +6,7 @@ require_relative 'update_env' class CGIUtilTest < Test::Unit::TestCase - include CGI::Util include UpdateEnv def setup @@ -63,7 +63,7 @@ class CGIUtilTest < Test::Unit::TestCase return unless defined?(::Encoding) assert_raise(TypeError) {CGI.unescape('', nil)} - assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}") begin; assert_equal("", CGI.unescape('')) end; @@ -120,7 +120,7 @@ class CGIUtilTest < Test::Unit::TestCase return unless defined?(::Encoding) assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)} - assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}") begin; assert_equal("", CGI.unescapeURIComponent('')) end; @@ -300,21 +300,21 @@ end class CGIUtilPureRubyTest < Test::Unit::TestCase def setup - CGI::Escape.module_eval do alias _escapeHTML escapeHTML remove_method :escapeHTML alias _unescapeHTML unescapeHTML remove_method :unescapeHTML - end if defined?(CGI::Escape) end def teardown - CGI::Escape.module_eval do alias escapeHTML _escapeHTML remove_method :_escapeHTML alias unescapeHTML _unescapeHTML remove_method :_unescapeHTML - end if defined?(CGI::Escape) end include CGIUtilTest::UnescapeHTMLTests |