diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-02-17 07:42:23 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-02-17 07:42:23 +0000 |
commit | 0424e152c684a85f4b0691f1e84aec203115333d () | |
tree | ccb4af1c7fa12a09934f315edba0b9d839e57ceb /tool/enc-unicode.rb | |
parent | 5362e7fcddf70d16fc4261383f99175c90881353 (diff) |
* Merge Onigmo-5.13.1. [ruby-dev:45057] [Feature #5820]
https://.com/k-takata/Onigmo cp reg{comp,enc,error,exec,parse,syntax}.c reg{enc,int,parse}.h cp oniguruma.h cp tool/enc-unicode.rb cp -r enc/ git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@34663 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rwxr-xr-x | tool/enc-unicode.rb | 75 |
1 files changed, 57 insertions, 18 deletions
@@ -5,7 +5,7 @@ # # To use this, get UnicodeData.txt, Scripts.txt, PropList.txt, # PropertyAliases.txt, PropertyValueAliases.txt, DerivedCoreProperties.txt, -# and DerivedAge.txt from unicode.org. # (http://unicode.org/Public/UNIDATA/) And run following command. # ruby1.9 tool/enc-unicode.rb data_dir > enc/unicode/name2ctype.kwd # You can get source file for gperf. After this, simply make ruby. @@ -90,7 +90,10 @@ def parse_unicode_data(file) # codepoints to Cn and C cn_remainder = (last_cp.next..0x10ffff).to_a data['Cn'] += cn_remainder - data['C'] += cn_remainder # Define General Category properties gcps = data.keys.sort - POSIX_NAMES @@ -112,16 +115,15 @@ def define_posix_props(data) (0x0061..0x0066).to_a data['Alnum'] = data['Alpha'] + data['Digit'] data['Space'] = data['White_Space'] - data['Blank'] = data['White_Space'] - [0x0A, 0x0B, 0x0C, 0x0D, 0x85] - - data['Line_Separator'] - data['Paragraph_Separator'] data['Cntrl'] = data['Cc'] data['Word'] = data['Alpha'] + data['Mark'] + data['Digit'] + data['Connector_Punctuation'] data['Graph'] = data['Any'] - data['Space'] - data['Cntrl'] - data['Surrogate'] - data['Unassigned'] - data['Print'] = data['Graph'] + data['Blank'] - data['Cntrl'] end -def parse_scripts(data) files = [ {fn: 'DerivedCoreProperties.txt', title: 'Derived Property'}, {fn: 'Scripts.txt', title: 'Script'}, @@ -134,7 +136,7 @@ def parse_scripts(data) IO.foreach(get_file(file[:fn])) do |line| if /^# Total code points: / =~ line data[current] = cps - make_const(current, cps, file[:title]) (names[file[:title]] ||= []) << current cps = [] elsif /^([0-9a-fA-F]+)(?:..([0-9a-fA-F]+))?\s*;\s*(\w+)/ =~ line @@ -146,7 +148,7 @@ def parse_scripts(data) # All code points not explicitly listed for Script # have the value Unknown (Zzzz). data['Unknown'] = (0..0x10ffff).to_a - data.values_at(*names['Script']).flatten - make_const('Unknown', data['Unknown'], 'Script') names.values.flatten << 'Unknown' end @@ -200,6 +202,29 @@ def parse_age(data) ages end $const_cache = {} # make_const(property, pairs, name): Prints a 'static const' structure for a # given property, group of paired codepoints, and a human-friendly name for @@ -232,6 +257,10 @@ def constantize_agename(name) "Age_#{name.sub(/\./, '_')}" end def get_file(name) File.join(ARGV[0], name) end @@ -241,9 +270,16 @@ end puts '%{' puts '#define long size_t' props, data = parse_unicode_data(get_file('UnicodeData.txt')) print "\n#ifdef USE_UNICODE_PROPERTIES" props.each do |name| - category = case name.size when 1 then 'Major Category' when 2 then 'General Category' @@ -251,22 +287,18 @@ props.each do |name| end make_const(name, data[name], category) end -props.concat parse_scripts(data) -puts '#endif /* USE_UNICODE_PROPERTIES */' -aliases = parse_aliases(data) ages = parse_age(data) -define_posix_props(data) -POSIX_NAMES.each do |name| - make_const(name, data[name], "[[:#{name}:]]") -end puts(<<'__HEREDOC') static const OnigCodePoint* const CodeRanges[] = { __HEREDOC POSIX_NAMES.each{|name|puts" CR_#{name},"} puts "#ifdef USE_UNICODE_PROPERTIES" -props.each{|name|puts" CR_#{name},"} -ages.each{|name| puts" CR_#{constantize_agename(name)},"} puts(<<'__HEREDOC') #endif /* USE_UNICODE_PROPERTIES */ @@ -284,6 +316,7 @@ i = -1 name_to_index = {} POSIX_NAMES.each do |name| i += 1 name = normalize_propname(name) name_to_index[name] = i puts"%-40s %3d" % [name + ',', i] @@ -306,6 +339,12 @@ ages.each do |name| name_to_index[name] = i puts "%-40s %3d" % [name + ',', i] end puts(<<'__HEREDOC') #endif /* USE_UNICODE_PROPERTIES */ %% |