summaryrefslogtreecommitdiff
path: root/lib/rss/parser.rb
diff options
context:
space:
mode:
authorkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-03-17 10:13:25 +0000
committerkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-03-17 10:13:25 +0000
commit966a25465aab5c2972e6c453f631a15fc2223256 ()
tree847090e856c9901ab2cc19251c179b9b0985e65b /lib/rss/parser.rb
parent53cbab048452742b537ac8bccf494630d1c184c8 (diff)
* lib/rss, test/rss:
- supported Atom. - bumped version 0.1.6 to 0.1.7. * sample/rss/convert.rb: added new sample. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12087 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--lib/rss/parser.rb172
1 files changed, 117 insertions, 55 deletions
@@ -2,6 +2,7 @@ require "forwardable"
require "open-uri"
require "rss/rss"
module RSS
@@ -63,7 +64,8 @@ module RSS
end
end
- def parse(rss, do_validate=true, ignore_unknown_element=true, parser_class=default_parser)
parser = new(rss, parser_class)
parser.do_validate = do_validate
parser.ignore_unknown_element = ignore_unknown_element
@@ -103,7 +105,7 @@ module RSS
return rss if rss.is_a?(::URI::Generic)
begin
- URI(rss)
rescue ::URI::Error
rss
end
@@ -158,26 +160,26 @@ module RSS
class << self
- @@setters = {}
@@registered_uris = {}
@@class_names = {}
- def install_setter(uri, tag_name, setter)
- @@setters[uri] ||= {}
- @@setters[uri][tag_name] = setter
- end
-
def setter(uri, tag_name)
- begin
- @@setters[uri][tag_name]
- rescue NameError
nil
end
end
def available_tags(uri)
begin
- @@setters[uri].keys
rescue NameError
[]
end
@@ -205,8 +207,8 @@ module RSS
end
end
- def install_get_text_element(uri, name, setter)
- install_setter(uri, name, setter)
def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
end
@@ -215,20 +217,18 @@ module RSS
end
private
def def_get_text_element(uri, name, file, line)
register_uri(uri, name)
unless private_instance_methods(false).include?("start_#{name}".to_sym)
module_eval(<<-EOT, file, line)
def start_#{name}(name, prefix, attrs, ns)
- uri = ns[prefix]
if self.class.uri_registered?(uri, #{name.inspect})
- if @do_validate
- tags = self.class.available_tags(uri)
- unless tags.include?(name)
- raise UnknownTagError.new(name, uri)
- end
- end
start_get_text_element(name, prefix, ns, uri)
else
start_else_element(name, prefix, attrs, ns)
@@ -244,7 +244,6 @@ module RSS
end
module ListenerMixin
-
attr_reader :rss
attr_accessor :ignore_unknown_element
@@ -254,13 +253,16 @@ module RSS
@rss = nil
@ignore_unknown_element = true
@do_validate = true
- @ns_stack = [{}]
@tag_stack = [[]]
@text_stack = ['']
@proc_stack = []
@last_element = nil
@version = @encoding = @standalone = nil
@xml_stylesheets = []
end
def xmldecl(version, encoding, standalone)
@@ -271,7 +273,7 @@ module RSS
if name == "xml-stylesheet"
params = parse_pi_content(content)
if params.has_key?("href")
- @xml_stylesheets << XMLStyleSheet.new(*params)
end
end
end
@@ -291,12 +293,41 @@ module RSS
@ns_stack.push(ns)
prefix, local = split_name(name)
- @tag_stack.last.push([ns[prefix], local])
@tag_stack.push([])
- if respond_to?("start_#{local}", true)
- __send__("start_#{local}", local, prefix, attrs, ns.dup)
else
- start_else_element(local, prefix, attrs, ns.dup)
end
end
@@ -313,10 +344,17 @@ module RSS
end
def text(data)
- @text_stack.last << data
end
private
CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/
def parse_pi_content(content)
@@ -328,20 +366,20 @@ module RSS
end
def start_else_element(local, prefix, attrs, ns)
- class_name = self.class.class_name(ns[prefix], local)
current_class = @last_element.class
if current_class.const_defined?(class_name)
next_class = current_class.const_get(class_name)
start_have_something_element(local, prefix, attrs, ns, next_class)
else
- if @ignore_unknown_element
@proc_stack.push(nil)
else
parent = "ROOT ELEMENT???"
if current_class.tag_name
parent = current_class.tag_name
end
- raise NotExceptedTagError.new(local, parent)
end
end
end
@@ -353,41 +391,48 @@ module RSS
end
def check_ns(tag_name, prefix, ns, require_uri)
- if @do_validate
- if ns[prefix] == require_uri
- #ns.delete(prefix)
- else
raise NSError.new(tag_name, prefix, require_uri)
end
end
end
def start_get_text_element(tag_name, prefix, ns, required_uri)
- @proc_stack.push Proc.new {|text, tags|
setter = self.class.setter(required_uri, tag_name)
- setter ||= "#{tag_name}="
if @last_element.respond_to?(setter)
@last_element.__send__(setter, text.to_s)
else
- if @do_validate and not @ignore_unknown_element
- raise NotExceptedTagError.new(tag_name, @last_element.tag_name)
end
end
- }
end
def start_have_something_element(tag_name, prefix, attrs, ns, klass)
check_ns(tag_name, prefix, ns, klass.required_uri)
- args = []
-
- klass.get_attributes.each do |a_name, a_uri, required|
if a_uri.is_a?(String) or !a_uri.respond_to?(:include?)
a_uri = [a_uri]
end
- unless a_uri == [nil]
for prefix, uri in ns
if a_uri.include?(uri)
val = attrs["#{prefix}:#{a_name}"]
@@ -395,12 +440,12 @@ module RSS
end
end
end
- if val.nil? and a_uri.include?(nil)
val = attrs[a_name]
end
if @do_validate and required and val.nil?
- unless a_uri.include?(nil)
for prefix, uri in ns
if a_uri.include?(uri)
a_name = "#{prefix}:#{a_name}"
@@ -410,20 +455,37 @@ module RSS
raise MissingAttributeError.new(tag_name, a_name)
end
- args << val
end
previous = @last_element
- next_element = klass.new(*args)
- next_element.do_validate = @do_validate
- previous.funcall(:set_next_element, tag_name, next_element)
@last_element = next_element
- @proc_stack.push Proc.new { |text, tags|
p(@last_element.class) if DEBUG
- @last_element.content = text if klass.have_content?
- @last_element.validate_for_stream(tags) if @do_validate
@last_element = previous
- }
end
end