A => compare.rb +21 -0
@@ 0,0 1,21 @@
+require 'rexml/document'
+
+puts 1
+d = REXML::Document.new( File.new( "docs/documentation.xml" ) )
+puts 2
+REXML_NO_USE_LIBXML2 = true
+puts 3
+d = REXML::Document.new( File.new( "docs/documentation.xml" ) )
+REXML_NO_USE_LIBXML2 = false
+puts 4
+puts REXML::Parsers::get_parser
+t=Time.now
+100.times { REXML::Document.new( File.new( "docs/documentation.xml" ) ) }
+puts Time.now - t
+REXML_NO_USE_LIBXML2 = true
+puts 5
+puts REXML::Parsers::get_parser
+t=Time.now
+100.times { REXML::Document.new( File.new( "docs/documentation.xml" ) ) }
+puts Time.now - t
+
M src/rexml/parsers/baseparser.rb +16 -0
@@ 1,8 1,22 @@
require 'rexml/parseexception'
require 'rexml/source'
+require 'dl/import'
+require 'rexml/parsers/libxml2parser'
module REXML
module Parsers
+
+ def Parsers::get_parser
+ return REXML::Parsers::BaseParser if defined?(REXML_NO_USE_LIBXML2) && REXML_NO_USE_LIBXML2
+
+ begin
+ DL.dlopen( 'libxml2.so' )
+ REXML::Parsers::LibXML2Parser
+ rescue
+ REXML::Parsers::BaseParser
+ end
+ end
+
# = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
@@ 446,6 460,8 @@ module REXML
end
end
+
+
=begin
case event[0]
when :start_element
M src/rexml/parsers/libxml2parser.rb +62 -23
@@ 4,19 4,31 @@ require 'dl/import'
module REXML
module Parsers
class LibXML2Parser
+ LIBXML = DL.dlopen( 'libxml2.so' )
+ NEW_READER = LIBXML[ 'xmlNewTextReaderFilename', 'PS' ]
+ FREE_READER = LIBXML[ 'xmlFreeTextReader', '0I' ]
+ PULL = LIBXML[ 'xmlTextReaderRead', 'IP' ]
+ NODE_TYPE = LIBXML[ 'xmlTextReaderNodeType', 'IP' ]
+ PREFIX = LIBXML[ 'xmlTextReaderPrefix', 'SP' ]
+ NAME = LIBXML[ 'xmlTextReaderName', 'SP' ]
+ VALUE = LIBXML[ 'xmlTextReaderValue', 'SP' ]
+ HAS_ATTRIBUTES = LIBXML[ 'xmlTextReaderHasAttributes', 'IP' ]
+ ATTRIBUTE_COUNT = LIBXML[ 'xmlTextReaderAttributeCount', 'IP' ]
+ MOVE_TO_NEXT_ATTRIBUTE = LIBXML[ 'xmlTextReaderMoveToNextAttribute', 'IP' ]
+ READ_STRING = LIBXML[ 'xmlTextReaderReadString', 'SP' ]
+ IS_EMPTY_ELEMENT = LIBXML[ 'xmlTextReaderIsEmptyElement', 'IP' ]
+
def initialize( source )
- libxml = DL.dlopen( 'libxml2.so' )
- @new_reader = libxml[ 'xmlNewTextReaderFilename', 'IS' ]
- @free_reader = libxml[ 'xmlFreeTextReader', '0I' ]
- @pull = libxml[ 'xmlTextReaderRead', 'II' ]
- @node_type = libxml[ 'xmlTextReaderNodeType', 'II' ]
- @prefix = libxml[ 'xmlTextReaderPrefix', 'SI' ]
- @name = libxml[ 'xmlTextReaderName', 'SI' ]
- @value = libxml[ 'xmlTextReaderValue', 'SI' ]
- @reader = @new_reader.call( source )[0]
+ if source.kind_of? File
+ @reader = NEW_READER.call( source.path )[0]
+ end
@done = false
+ @stack = []
end
+ def source
+ return @reader
+ end
def add_listener( listener )
if !defined?(@listeners) or !@listeners
@@ 37,7 49,7 @@ module REXML
def stream=( source )
- @reader = @new_reader.call( source )
+ @reader = NEW_READER.call( source )
end
@@ 71,42 83,69 @@ module REXML
def pull
- @stack.shift if @stack.size > 0
+ return @stack.shift if @stack.size > 0
current_node = nil
- rv = @pull.call( @reader )[0]
- (@done = true) && (return nil) if rv != 1
- case @node_type.call( @reader )[0]
+ rv = PULL.call( @reader )[0]
+ (@done = true) && (return [:end_document]) if rv != 1
+ k = NODE_TYPE.call( @reader )
+ case k[0]
when 1 # Element
- prefix = @prefix.call( @reader )[0]
- name = @name.call( @reader )[0]
- current_node = [ :start_element, "#{prefix}:#{name}" ]
- if @node_has_attributes.call( @reader )[0]
+ prefix = PREFIX.call( @reader )[0]
+ prefix = prefix + ":" if prefix
+ name = NAME.call( @reader )[0]
+ current_node = [ :start_element, "#{prefix}#{name}" ]
+ if HAS_ATTRIBUTES.call( @reader )
current_node << attrs = {}
- @node_attribute_count.times {
- rv = @pull.call( @reader )[0]
- prefix = @prefix.call( @reader )[0]
- name = @name.call( @reader )[0]
- value = @value.call( @reader )[0]
+ count = ATTRIBUTE_COUNT.call(@reader)[0] || 0
+ count.times {
+ prefix = PREFIX.call( @reader )[0]
+ name = NAME.call( @reader )[0]
+ value = VALUE.call( @reader )[0]
attrs[ "#{prefix}:#{name}" ] = value
}
end
+ if IS_EMPTY_ELEMENT.call(@reader)[0] == 1
+ @stack.push( [ :end_element, current_node[1] ] )
+ end
return current_node
when 15 # EndElement
+ prefix = PREFIX.call( @reader )[0]
+ prefix = prefix + ":" if prefix
+ name = NAME.call(@reader)[0]
+ return [ :end_element, "#{prefix}#{name}" ]
when 3 # Text
+ return [ :text, VALUE.call(@reader)[0], false ]
when 14 # SignificantWhitespace
+ return [ :text, VALUE.call(@reader)[0] ]
when 13 # Whitespace
+ return [ :text, VALUE.call(@reader)[0] ]
when 8 # Comment
+ return [ :comment, VALUE.call( @reader )[0] ]
when 4 # CDATA
+ return [ :cdata, VALUE.call( @reader )[0] ]
when 9 # Document
+ puts 9
when 11 # DocumentFragment
+ puts 11
when 10 # DocumentType
+ @stack.push [ :end_doctype ]
+ return [ :start_doctype, NAME.call(@reader)[0] ]
when 16 # EndEntity
+ puts 16
when 6 # Entity
+ puts 6
when 5 # EntityReference
+ puts 5
when 0 # None
+ puts 0
when 12 # Notation
+ puts 12
when 7 # ProcessingInstruction
+ return [ :processing_instruction, NAME.call(@reader)[0], VALUE.call(@reader)[0] ]
when 17 # XmlDeclaration
+ puts 17
+ else
+ puts "UNKNOWN: #{k[0]}"
end
end
M src/rexml/parsers/lightparser.rb +1 -1
@@ 7,7 7,7 @@ module REXML
class LightParser
def initialize stream
@stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
+ @parser = REXML::Parsers::get_parser.new( stream )
end
def add_listener( listener )
M src/rexml/parsers/pullparser.rb +1 -1
@@ 37,7 37,7 @@ module REXML
def initialize stream
@entities = {}
@listeners = nil
- @parser = BaseParser.new( stream )
+ @parser = REXML::Parsers::get_parser.new( stream )
@my_stack = []
end
M src/rexml/parsers/sax2parser.rb +1 -1
@@ 8,7 8,7 @@ module REXML
# SAX2Parser
class SAX2Parser
def initialize source
- @parser = BaseParser.new(source)
+ @parser = REXML::Parsers::get_parser.new(source)
@listeners = []
@procs = []
@namespace_stack = []
M src/rexml/parsers/streamparser.rb +1 -1
@@ 3,7 3,7 @@ module REXML
class StreamParser
def initialize source, listener
@listener = listener
- @parser = BaseParser.new( source )
+ @parser = REXML::Parsers::get_parser.new(source)
end
def add_listener( listener )
M src/rexml/parsers/treeparser.rb +4 -2
@@ 5,7 5,7 @@ module REXML
class TreeParser
def initialize( source, build_context = Document.new )
@build_context = build_context
- @parser = Parsers::BaseParser.new( source )
+ @parser = REXML::Parsers::get_parser.new( source )
end
def add_listener( listener )
@@ 39,7 39,7 @@ module REXML
@build_context[-1] << event[1]
else
@build_context.add(
- Text.new(event[1], @build_context.whitespace, nil, true)
+ Text.new(event[1], @build_context.whitespace, nil, event[2].nil?)
) unless (
@build_context.ignore_whitespace_nodes and
event[1].strip.size==0
@@ 86,6 86,8 @@ module REXML
rescue REXML::Validation::ValidationException
raise
rescue
+ puts $!
+ puts $!.backtrace.join("\n")
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end
end
M src/rexml/parsers/ultralightparser.rb +1 -1
@@ 6,7 6,7 @@ module REXML
class UltraLightParser
def initialize stream
@stream = stream
- @parser = REXML::Parsers::BaseParser.new( stream )
+ @parser = REXML::Parsers::get_parser.new(source)
end
def add_listener( listener )