summaryrefslogtreecommitdiff
path: root/tool/enc-unicode.rb
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-02-17 07:42:23 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-02-17 07:42:23 +0000
commit0424e152c684a85f4b0691f1e84aec203115333d ()
treeccb4af1c7fa12a09934f315edba0b9d839e57ceb /tool/enc-unicode.rb
parent5362e7fcddf70d16fc4261383f99175c90881353 (diff)
* Merge Onigmo-5.13.1. [ruby-dev:45057] [Feature #5820]
https://.com/k-takata/Onigmo cp reg{comp,enc,error,exec,parse,syntax}.c reg{enc,int,parse}.h cp oniguruma.h cp tool/enc-unicode.rb cp -r enc/ git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@34663 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rwxr-xr-xtool/enc-unicode.rb75
1 files changed, 57 insertions, 18 deletions
@@ -5,7 +5,7 @@
#
# To use this, get UnicodeData.txt, Scripts.txt, PropList.txt,
# PropertyAliases.txt, PropertyValueAliases.txt, DerivedCoreProperties.txt,
-# and DerivedAge.txt from unicode.org.
# (http://unicode.org/Public/UNIDATA/) And run following command.
# ruby1.9 tool/enc-unicode.rb data_dir > enc/unicode/name2ctype.kwd
# You can get source file for gperf. After this, simply make ruby.
@@ -90,7 +90,10 @@ def parse_unicode_data(file)
# codepoints to Cn and C
cn_remainder = (last_cp.next..0x10ffff).to_a
data['Cn'] += cn_remainder
- data['C'] += cn_remainder
# Define General Category properties
gcps = data.keys.sort - POSIX_NAMES
@@ -112,16 +115,15 @@ def define_posix_props(data)
(0x0061..0x0066).to_a
data['Alnum'] = data['Alpha'] + data['Digit']
data['Space'] = data['White_Space']
- data['Blank'] = data['White_Space'] - [0x0A, 0x0B, 0x0C, 0x0D, 0x85] -
- data['Line_Separator'] - data['Paragraph_Separator']
data['Cntrl'] = data['Cc']
data['Word'] = data['Alpha'] + data['Mark'] + data['Digit'] + data['Connector_Punctuation']
data['Graph'] = data['Any'] - data['Space'] - data['Cntrl'] -
data['Surrogate'] - data['Unassigned']
- data['Print'] = data['Graph'] + data['Blank'] - data['Cntrl']
end
-def parse_scripts(data)
files = [
{fn: 'DerivedCoreProperties.txt', title: 'Derived Property'},
{fn: 'Scripts.txt', title: 'Script'},
@@ -134,7 +136,7 @@ def parse_scripts(data)
IO.foreach(get_file(file[:fn])) do |line|
if /^# Total code points: / =~ line
data[current] = cps
- make_const(current, cps, file[:title])
(names[file[:title]] ||= []) << current
cps = []
elsif /^([0-9a-fA-F]+)(?:..([0-9a-fA-F]+))?\s*;\s*(\w+)/ =~ line
@@ -146,7 +148,7 @@ def parse_scripts(data)
# All code points not explicitly listed for Script
# have the value Unknown (Zzzz).
data['Unknown'] = (0..0x10ffff).to_a - data.values_at(*names['Script']).flatten
- make_const('Unknown', data['Unknown'], 'Script')
names.values.flatten << 'Unknown'
end
@@ -200,6 +202,29 @@ def parse_age(data)
ages
end
$const_cache = {}
# make_const(property, pairs, name): Prints a 'static const' structure for a
# given property, group of paired codepoints, and a human-friendly name for
@@ -232,6 +257,10 @@ def constantize_agename(name)
"Age_#{name.sub(/\./, '_')}"
end
def get_file(name)
File.join(ARGV[0], name)
end
@@ -241,9 +270,16 @@ end
puts '%{'
puts '#define long size_t'
props, data = parse_unicode_data(get_file('UnicodeData.txt'))
print "\n#ifdef USE_UNICODE_PROPERTIES"
props.each do |name|
- category =
case name.size
when 1 then 'Major Category'
when 2 then 'General Category'
@@ -251,22 +287,18 @@ props.each do |name|
end
make_const(name, data[name], category)
end
-props.concat parse_scripts(data)
-puts '#endif /* USE_UNICODE_PROPERTIES */'
-aliases = parse_aliases(data)
ages = parse_age(data)
-define_posix_props(data)
-POSIX_NAMES.each do |name|
- make_const(name, data[name], "[[:#{name}:]]")
-end
puts(<<'__HEREDOC')
static const OnigCodePoint* const CodeRanges[] = {
__HEREDOC
POSIX_NAMES.each{|name|puts" CR_#{name},"}
puts "#ifdef USE_UNICODE_PROPERTIES"
-props.each{|name|puts" CR_#{name},"}
-ages.each{|name| puts" CR_#{constantize_agename(name)},"}
puts(<<'__HEREDOC')
#endif /* USE_UNICODE_PROPERTIES */
@@ -284,6 +316,7 @@ i = -1
name_to_index = {}
POSIX_NAMES.each do |name|
i += 1
name = normalize_propname(name)
name_to_index[name] = i
puts"%-40s %3d" % [name + ',', i]
@@ -306,6 +339,12 @@ ages.each do |name|
name_to_index[name] = i
puts "%-40s %3d" % [name + ',', i]
end
puts(<<'__HEREDOC')
#endif /* USE_UNICODE_PROPERTIES */
%%