Support `require 'cgi/escape'` with extracting CGI::Escape from CGI::Util

author: Hiroshi SHIBATA <[email protected]> 2025-05-08 19:08:11 +0900
committer: Hiroshi SHIBATA <[email protected]> 2025-05-09 14:27:28 +0900
commit: 8a1d45144bc30aaca67d953521c19c7ee0b378c0 ()
tree: aa7928bb3f680528b933eb896e8f0760276cd12d
parent: c66768376815878e9d103e80135b37cb4058fa8b (diff)
6 files changed, 251 insertions, 240 deletions
@@ -8,7 +8,7 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[];
 #define upper_hexdigits (ruby_hexdigits+16)
 #define char_to_number(c) ruby_digit36_to_number_table[(unsigned char)(c)]
-static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
 static ID id_accept_charset;
 #define HTML_ESCAPE_MAX_LEN 6
@@ -471,17 +471,17 @@ Init_escape(void)
 void
 InitVM_escape(void)
 {
- rb_cCGI = rb_define_class("CGI", rb_cObject);
- rb_mEscape = rb_define_module_under(rb_cCGI, "Escape");
- rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
- rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
- rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
- rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
- rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent");
- rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
- rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent");
- rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
- rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
- rb_prepend_module(rb_mUtil, rb_mEscape);
- rb_extend_object(rb_cCGI, rb_mEscape);
 }
@@ -294,4 +294,5 @@ end
 require 'cgi/core'
 require 'cgi/cookie'
 require 'cgi/util'
 CGI.autoload(:HtmlExtension, 'cgi/html')
@@ -4,12 +4,12 @@
 # generating HTTP responses.
 #++
 class CGI
- unless const_defined?(:Util)
- module Util
 @@accept_charset = "UTF-8" # :nodoc:
 end
- include Util
- extend Util
 end
 $CGI_ENV = ENV # for FCGI support
@@ -0,0 +1,224 @@
@@ -4,220 +4,8 @@ class CGI
 include Util
 extend Util
 end
-module CGI::Util
- @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)
-
- # URL-encode a string into application/x-www-form-urlencoded.
- # Space characters (+" "+) are encoded with plus signs (+"+"+)
- # url_encoded_string = CGI.escape("'Stop!' said Fred")
- # # => "%27Stop%21%27+said+Fred"
- def escape(string)
- encoding = string.encoding
- buffer = string.b
- buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
- end
- buffer.tr!(' ', '+')
- buffer.force_encoding(encoding)
- end
-
- # URL-decode an application/x-www-form-urlencoded string with encoding(optional).
- # string = CGI.unescape("%27Stop%21%27+said+Fred")
- # # => "'Stop!' said Fred"
- def unescape(string, encoding = @@accept_charset)
- str = string.tr('+', ' ')
- str = str.b
- str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
- [m.delete('%')].pack('H*')
- end
- str.force_encoding(encoding)
- str.valid_encoding? ? str : str.force_encoding(string.encoding)
- end
-
- # URL-encode a string following RFC 3986
- # Space characters (+" "+) are encoded with (+"%20"+)
- # url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
- # # => "%27Stop%21%27%20said%20Fred"
- def escapeURIComponent(string)
- encoding = string.encoding
- buffer = string.b
- buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
- end
- buffer.force_encoding(encoding)
- end
- alias escape_uri_component escapeURIComponent
-
- # URL-decode a string following RFC 3986 with encoding(optional).
- # string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
- # # => "'Stop!'+said Fred"
- def unescapeURIComponent(string, encoding = @@accept_charset)
- str = string.b
- str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
- [m.delete('%')].pack('H*')
- end
- str.force_encoding(encoding)
- str.valid_encoding? ? str : str.force_encoding(string.encoding)
- end
-
- alias unescape_uri_component unescapeURIComponent
-
- # The set of special characters and their escaped values
- TABLE_FOR_ESCAPE_HTML__ = {
- "'" => '&#39;',
- '&' => '&amp;',
- '"' => '&quot;',
- '<' => '&lt;',
- '>' => '&gt;',
- }
-
- # Escape special characters in HTML, namely '&\"<>
- # CGI.escapeHTML('Usage: foo "bar" <baz>')
- # # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
- def escapeHTML(string)
- enc = string.encoding
- unless enc.ascii_compatible?
- if enc.dummy?
- origenc = enc
- enc = Encoding::Converter.asciicompat_encoding(enc)
- string = enc ? string.encode(enc) : string.b
- end
- table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
- string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
- string.encode!(origenc) if origenc
- string
- else
- string = string.b
- string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
- string.force_encoding(enc)
- end
- end
-
- # TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there
- unless RUBY_ENGINE == 'truffleruby'
- begin
- require 'cgi/escape'
- rescue LoadError
- end
- end
-
- # Unescape a string that has been HTML-escaped
- # CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
- # # => "Usage: foo \"bar\" <baz>"
- def unescapeHTML(string)
- enc = string.encoding
- unless enc.ascii_compatible?
- if enc.dummy?
- origenc = enc
- enc = Encoding::Converter.asciicompat_encoding(enc)
- string = enc ? string.encode(enc) : string.b
- end
- string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
- case $1.encode(Encoding::US_ASCII)
- when 'apos' then "'".encode(enc)
- when 'amp' then '&'.encode(enc)
- when 'quot' then '"'.encode(enc)
- when 'gt' then '>'.encode(enc)
- when 'lt' then '<'.encode(enc)
- when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
- when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
- end
- end
- string.encode!(origenc) if origenc
- return string
- end
- return string unless string.include? '&'
- charlimit = case enc
- when Encoding::UTF_8; 0x10ffff
- when Encoding::ISO_8859_1; 256
- else 128
- end
- string = string.b
- string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
- match = $1.dup
- case match
- when 'apos' then "'"
- when 'amp' then '&'
- when 'quot' then '"'
- when 'gt' then '>'
- when 'lt' then '<'
- when /\A#0*(\d+)\z/
- n = $1.to_i
- if n < charlimit
- n.chr(enc)
- else
- "&##{$1};"
- end
- when /\A#x([0-9a-f]+)\z/i
- n = $1.hex
- if n < charlimit
- n.chr(enc)
- else
- "&#x#{$1};"
- end
- else
- "&#{match};"
- end
- end
- string.force_encoding enc
- end
-
- # Synonym for CGI.escapeHTML(str)
- alias escape_html escapeHTML
-
- # Synonym for CGI.unescapeHTML(str)
- alias unescape_html unescapeHTML
-
- # Escape only the tags of certain HTML elements in +string+.
- #
- # Takes an element or elements or array of elements. Each element
- # is specified by the name of the element, without angle brackets.
- # This matches both the start and the end tag of that element.
- # The attribute list of the open tag will also be escaped (for
- # instance, the double-quotes surrounding attribute values).
- #
- # print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
- # # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
- #
- # print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
- # # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
- def escapeElement(string, *elements)
- elements = elements[0] if elements[0].kind_of?(Array)
- unless elements.empty?
- string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do
- CGI.escapeHTML($&)
- end
- else
- string
- end
- end
-
- # Undo escaping such as that done by CGI.escapeElement()
- #
- # print CGI.unescapeElement(
- # CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
- # # "&lt;BR&gt;<A HREF="url"></A>"
- #
- # print CGI.unescapeElement(
- # CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
- # # "&lt;BR&gt;<A HREF="url"></A>"
- def unescapeElement(string, *elements)
- elements = elements[0] if elements[0].kind_of?(Array)
- unless elements.empty?
- string.gsub(/&lt;\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:&gt;)?/im) do
- unescapeHTML($&)
- end
- else
- string
- end
- end
-
- # Synonym for CGI.escapeElement(str)
- alias escape_element escapeElement
-
- # Synonym for CGI.unescapeElement(str)
- alias unescape_element unescapeElement
 # Format a +Time+ object as a String using the format specified by RFC 1123.
 #
 # CGI.rfc1123_date(Time.now)
@@ -253,6 +41,4 @@ module CGI::Util
 end
 lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
 end
-
- alias h escapeHTML
 end
@@ -6,7 +6,7 @@ require_relative 'update_env'
 class CGIUtilTest < Test::Unit::TestCase
- include CGI::Util
 include UpdateEnv
 def setup
@@ -63,7 +63,7 @@ class CGIUtilTest < Test::Unit::TestCase
 return unless defined?(::Encoding)
 assert_raise(TypeError) {CGI.unescape('', nil)}
- assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}")
 begin;
 assert_equal("", CGI.unescape(''))
 end;
@@ -120,7 +120,7 @@ class CGIUtilTest < Test::Unit::TestCase
 return unless defined?(::Encoding)
 assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)}
- assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}")
 begin;
 assert_equal("", CGI.unescapeURIComponent(''))
 end;
@@ -300,21 +300,21 @@ end
 class CGIUtilPureRubyTest < Test::Unit::TestCase
 def setup
- CGI::Escape.module_eval do
 alias _escapeHTML escapeHTML
 remove_method :escapeHTML
 alias _unescapeHTML unescapeHTML
 remove_method :unescapeHTML
- end if defined?(CGI::Escape)
 end
 def teardown
- CGI::Escape.module_eval do
 alias escapeHTML _escapeHTML
 remove_method :_escapeHTML
 alias unescapeHTML _unescapeHTML
 remove_method :_unescapeHTML
- end if defined?(CGI::Escape)
 end
 include CGIUtilTest::UnescapeHTMLTests
author	Hiroshi SHIBATA <[email protected]>	2025-05-08 19:08:11 +0900
committer	Hiroshi SHIBATA <[email protected]>	2025-05-09 14:27:28 +0900
commit	8a1d45144bc30aaca67d953521c19c7ee0b378c0 ()
tree	aa7928bb3f680528b933eb896e8f0760276cd12d
parent	c66768376815878e9d103e80135b37cb4058fa8b (diff)