diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-03-14 17:01:06 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-03-14 17:01:06 +0000 |
commit | 1db141ed13454411acbf0e77cb91408514eacbe7 () | |
tree | 862eb4e54c33382b7b13c5178fab1cd668e984c2 /tool/transcode-tblgen.rb | |
parent | f5ce5551c8404d608815e95241dd3d91c74002f2 (diff) |
* tool/transcode-tblgen.rb: refactored.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26923 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rwxr-xr-x | tool/transcode-tblgen.rb | 470 |
1 files changed, 245 insertions, 225 deletions
@@ -1,6 +1,23 @@ require 'optparse' require 'erb' require 'fileutils' NUM_ELEM_BYTELOOKUP = 2 @@ -18,275 +35,277 @@ def c_esc(str) '"' + str.gsub(C_ESC_PAT) { C_ESC[$&] } + '"' end -HEX2 = /[0-9A-Fa-f]{2}/ - -class StrSet - attr_reader :pat - SINGLE_BYTE_RANGES = (0..255).map {|i| [i..i] } - def self.parse(pattern) - if /\A\s*((#{HEX2}|\{(#{HEX2}|#{HEX2}-#{HEX2})(,(#{HEX2}|#{HEX2}-#{HEX2}))*\})+(\s+|\z))*\z/o !~ pattern - raise ArgumentError, "invalid pattern: #{pattern.inspect}" - end - result = [] - pattern.scan(/\S+/) {|seq| - seq_result = [] - while !seq.empty? - if /\A(#{HEX2})/o =~ seq - byte = $1.to_i(16) - seq_result << SINGLE_BYTE_RANGES[byte] - seq = $' - elsif /\A\{([^\}]+)\}/ =~ seq - set = $1 - seq = $' - set_result = [] - set.scan(/[^,]+/) {|range| - if /\A(#{HEX2})-(#{HEX2})\z/o =~ range - b = $1.to_i(16) - e = $2.to_i(16) - set_result << (b..e) - elsif /\A(#{HEX2})\z/o =~ range - byte = $1.to_i(16) - set_result << (byte..byte) - else - raise "invalid range: #{range.inspect}" - end - } - seq_result << set_result - else - raise "invalid sequence: #{seq.inspect}" - end - end - result << seq_result - } - self.new(result) end - def initialize(pat) - @pat = pat end - def hash - return @hash if defined? @hash - @hash = @pat.hash end - def eql?(other) - self.class == other.class && - @pat == other.pat end - alias == eql? - def to_s - if @pat.empty? - "(empset)" - else - @pat.map {|seq| - if seq.empty? - "(empstr)" - else - seq.map {|byteset| - if byteset.length == 1 && byteset[0].begin == byteset[0].end - "%02x" % byteset[0].begin else - "{" + - byteset.map {|range| - if range.begin == range.end - "%02x" % range.begin else - "%02x-%02x" % [range.begin, range.end] end - }.join(',') + - "}" end - }.join('') - end - }.join(' ') - end - end - - def inspect - "\#<#{self.class}: #{self.to_s}>" end - def min_length - if @pat.empty? - nil else - @pat.map {|seq| seq.length }.min end end - def max_length - if @pat.empty? - nil - else - @pat.map {|seq| seq.length }.max - end end - def emptyable? - @pat.any? {|seq| - seq.empty? - } end - def has_nonempty? - @pat.any? {|seq| - !seq.empty? } - end - def first_bytes - result = {} - @pat.each {|seq| - next if seq.empty? - seq.first.each {|range| - range.each {|byte| - result[byte] = true - } } } - result.keys.sort end - def each_firstbyte - h = {} - @pat.each {|seq| - next if seq.empty? - seq.first.each {|range| - range.each {|byte| - (h[byte] ||= []) << seq[1..-1] - } } } - h.keys.sort.each {|byte| - yield byte, StrSet.new(h[byte]) } - end -end - -class ArrayCode - def initialize(type, name) - @type = type - @name = name - @len = 0; - @content = '' - end - - def length - @len - end - - def insert_at_last(num, str) - newnum = self.length + num - @content << str - @len += num - end - - def to_s - <<"End" -static const #{@type} -#{@name}[#{@len}] = { -#{@content}}; -End - end -end - -class ActionMap - def self.parse(hash) - h = {} - hash.each {|pat, action| - h[StrSet.parse(pat)] = action } - self.new(h) end - def initialize(h) - @map = h end def hash return @hash if defined? @hash - hash = 0 - @map.each {|k,v| - hash ^= k.hash ^ v.hash - } - @hash = hash end def eql?(other) self.class == other.class && - @map == other.instance_eval { @map } end alias == eql? def inspect "\#<#{self.class}:" + - @map.map {|k, v| " [" + k.to_s + "]=>" + v.inspect }.join('') + ">" end - def max_input_length - @map.keys.map {|k| k.max_length }.max end - def check_conflict - has_empty = false - has_nonempty = false - @map.each {|ss, action| - has_empty = true if ss.emptyable? - has_nonempty = true if ss.has_nonempty? - } - if has_empty && has_nonempty - raise "conflict between empty and nonempty sequence" - end end def empty_action - @map.each {|ss, action| - return action if ss.emptyable? - } - nil end - def each_firstbyte(valid_encoding=nil) - h = {} - @map.each {|ss, action| - if ss.emptyable? - raise "emptyable pattern" - else - ss.each_firstbyte {|byte, rest| - h[byte] ||= {} - if h[byte][rest].nil? - elsif action == :nomap0 - next - elsif h[byte][rest] != :nomap0 - raise "ambiguous %s or %s (%02X/%s)" % [h[byte][rest], action, byte, rest] - end - h[byte][rest] = action - } - end - } - if valid_encoding - valid_encoding.each_firstbyte {|byte, rest| - if h[byte] - am = ActionMap.new(h[byte]) - yield byte, am, rest - else - am = ActionMap.new(rest => :undef) - yield byte, am, nil - end - } - else - h.keys.sort.each {|byte| - am = ActionMap.new(h[byte]) - yield byte, am, nil } - end end OffsetsMemo = {} @@ -451,25 +470,24 @@ End PostMemo = {} NextName = "a" - def generate_node(bytes_code, words_code, name_hint=nil, valid_encoding=nil) - if n = PreMemo[[self,valid_encoding]] return n end table = Array.new(0x100, :invalid) - each_firstbyte(valid_encoding) {|byte, rest, rest_valid_encoding| - rest.check_conflict if a = rest.empty_action table[byte] = a else name_hint2 = nil name_hint2 = "#{name_hint}_#{'%02X' % byte}" if name_hint - table[byte] = "/*BYTE_LOOKUP*/" + rest.gennode(bytes_code, words_code, name_hint2, rest_valid_encoding) end } if n = PostMemo[table] - return PreMemo[[self,valid_encoding]] = n end if !name_hint @@ -477,16 +495,16 @@ End NextName.succ! end - PreMemo[[self,valid_encoding]] = PostMemo[table] = name_hint generate_lookup_node(bytes_code, words_code, name_hint, table) name_hint end - def gennode(bytes_code, words_code, name_hint=nil, valid_encoding=nil) @bytes_code = bytes_code @words_code = words_code - name = generate_node(bytes_code, words_code, name_hint, valid_encoding) @bytes_code = nil @words_code = nil return name @@ -627,18 +645,20 @@ def transcode_compile_tree(name, from, map) map.each {|k, v| h[k] = v unless h[k] # use first mapping } - am = ActionMap.parse(h) - - max_input = am.max_input_length - - if ValidEncoding[from] - valid_encoding = StrSet.parse(ValidEncoding[from]) - max_input = [max_input, valid_encoding.max_length].max else - valid_encoding = nil end - defined_name = am.gennode(TRANSCODE_GENERATED_BYTES_CODE, TRANSCODE_GENERATED_WORDS_CODE, name, valid_encoding) return defined_name, max_input end |