First working parser.  Bits are missing.  This is slower
than pure-Ruby REXML, so I'm shelving it.
A => compare.rb +21 -0
@@ 0,0 1,21 @@ 
+require 'rexml/document'
+
+puts 1
+d = REXML::Document.new( File.new( "docs/documentation.xml" ) )
+puts 2
+REXML_NO_USE_LIBXML2 = true
+puts 3
+d = REXML::Document.new( File.new( "docs/documentation.xml" ) )
+REXML_NO_USE_LIBXML2 = false
+puts 4
+puts REXML::Parsers::get_parser
+t=Time.now
+100.times { REXML::Document.new( File.new( "docs/documentation.xml" ) ) }
+puts Time.now - t
+REXML_NO_USE_LIBXML2 = true
+puts 5
+puts REXML::Parsers::get_parser
+t=Time.now
+100.times { REXML::Document.new( File.new( "docs/documentation.xml" ) ) }
+puts Time.now - t
+

          
M src/rexml/parsers/baseparser.rb +16 -0
@@ 1,8 1,22 @@ 
 require 'rexml/parseexception'
 require 'rexml/source'
+require 'dl/import'
+require 'rexml/parsers/libxml2parser'
 
 module REXML
   module Parsers
+
+    def Parsers::get_parser
+      return REXML::Parsers::BaseParser if defined?(REXML_NO_USE_LIBXML2) && REXML_NO_USE_LIBXML2
+
+      begin
+        DL.dlopen( 'libxml2.so' )
+        REXML::Parsers::LibXML2Parser
+      rescue
+        REXML::Parsers::BaseParser
+      end
+    end
+
     # = Using the Pull Parser
     # <em>This API is experimental, and subject to change.</em>
     #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )

          
@@ 446,6 460,8 @@ module REXML
   end
 end
 
+
+
 =begin
   case event[0]
   when :start_element

          
M src/rexml/parsers/libxml2parser.rb +62 -23
@@ 4,19 4,31 @@ require 'dl/import'
 module REXML
   module Parsers
     class LibXML2Parser
+      LIBXML = DL.dlopen( 'libxml2.so' )
+      NEW_READER = LIBXML[ 'xmlNewTextReaderFilename', 'PS' ]
+      FREE_READER = LIBXML[ 'xmlFreeTextReader', '0I' ]
+      PULL = LIBXML[ 'xmlTextReaderRead', 'IP' ]
+      NODE_TYPE = LIBXML[ 'xmlTextReaderNodeType', 'IP' ]
+      PREFIX = LIBXML[ 'xmlTextReaderPrefix', 'SP' ]
+      NAME = LIBXML[ 'xmlTextReaderName', 'SP' ]
+      VALUE = LIBXML[ 'xmlTextReaderValue', 'SP' ]
+      HAS_ATTRIBUTES = LIBXML[ 'xmlTextReaderHasAttributes', 'IP' ]
+      ATTRIBUTE_COUNT = LIBXML[ 'xmlTextReaderAttributeCount', 'IP' ]
+      MOVE_TO_NEXT_ATTRIBUTE = LIBXML[ 'xmlTextReaderMoveToNextAttribute', 'IP' ]
+      READ_STRING = LIBXML[ 'xmlTextReaderReadString', 'SP' ]
+      IS_EMPTY_ELEMENT = LIBXML[ 'xmlTextReaderIsEmptyElement', 'IP' ]
+
       def initialize( source )
-        libxml = DL.dlopen( 'libxml2.so' )
-        @new_reader = libxml[ 'xmlNewTextReaderFilename', 'IS' ]
-        @free_reader = libxml[ 'xmlFreeTextReader', '0I' ]
-        @pull = libxml[ 'xmlTextReaderRead', 'II' ]
-        @node_type = libxml[ 'xmlTextReaderNodeType', 'II' ]
-        @prefix = libxml[ 'xmlTextReaderPrefix', 'SI' ]
-        @name = libxml[ 'xmlTextReaderName', 'SI' ]
-        @value = libxml[ 'xmlTextReaderValue', 'SI' ]
-        @reader = @new_reader.call( source )[0]
+        if source.kind_of? File
+          @reader = NEW_READER.call( source.path )[0]
+        end
         @done = false
+        @stack = []
       end
 
+      def source
+        return @reader
+      end
 
       def add_listener( listener )
         if !defined?(@listeners) or !@listeners

          
@@ 37,7 49,7 @@ module REXML
       
 
       def stream=( source )
-        @reader = @new_reader.call( source )
+        @reader = NEW_READER.call( source )
       end
 
 

          
@@ 71,42 83,69 @@ module REXML
 
 
       def pull
-        @stack.shift if @stack.size > 0
+        return @stack.shift if @stack.size > 0
         current_node = nil
-        rv = @pull.call( @reader )[0]
-        (@done = true) && (return nil) if rv != 1
-        case @node_type.call( @reader )[0]
+        rv = PULL.call( @reader )[0]
+        (@done = true) && (return [:end_document]) if rv != 1
+        k = NODE_TYPE.call( @reader )
+        case k[0]
         when 1    # Element
-          prefix = @prefix.call( @reader )[0]
-          name = @name.call( @reader )[0]
-          current_node = [ :start_element, "#{prefix}:#{name}" ]
-          if @node_has_attributes.call( @reader )[0]
+          prefix = PREFIX.call( @reader )[0]
+          prefix = prefix + ":" if prefix
+          name = NAME.call( @reader )[0]
+          current_node = [ :start_element, "#{prefix}#{name}" ]
+          if HAS_ATTRIBUTES.call( @reader )
             current_node << attrs = {}
-            @node_attribute_count.times { 
-              rv = @pull.call( @reader )[0]
-              prefix = @prefix.call( @reader )[0]
-              name = @name.call( @reader )[0]
-              value = @value.call( @reader )[0]
+            count = ATTRIBUTE_COUNT.call(@reader)[0] || 0
+            count.times { 
+              prefix = PREFIX.call( @reader )[0]
+              name = NAME.call( @reader )[0]
+              value = VALUE.call( @reader )[0]
               attrs[ "#{prefix}:#{name}" ] = value
             }
           end
+          if IS_EMPTY_ELEMENT.call(@reader)[0] == 1
+            @stack.push( [ :end_element, current_node[1] ] )
+          end
           return current_node
         when 15   # EndElement
+          prefix = PREFIX.call( @reader )[0]
+          prefix = prefix + ":" if prefix
+          name = NAME.call(@reader)[0] 
+          return [ :end_element, "#{prefix}#{name}" ]
         when 3    # Text
+          return [ :text, VALUE.call(@reader)[0], false ]
         when 14   # SignificantWhitespace
+          return [ :text, VALUE.call(@reader)[0] ]
         when 13   # Whitespace
+          return [ :text, VALUE.call(@reader)[0] ]
         when 8    # Comment
+          return [ :comment, VALUE.call( @reader )[0] ]
         when 4    # CDATA
+          return [ :cdata, VALUE.call( @reader )[0] ]
         when 9    # Document
+          puts 9
         when 11   # DocumentFragment
+          puts 11
         when 10   # DocumentType
+          @stack.push [ :end_doctype ]
+          return [ :start_doctype, NAME.call(@reader)[0] ]
         when 16   # EndEntity
+          puts 16
         when 6    # Entity
+          puts 6
         when 5    # EntityReference
+          puts 5
         when 0    # None
+          puts 0
         when 12   # Notation
+          puts 12
         when 7    # ProcessingInstruction
+          return [ :processing_instruction, NAME.call(@reader)[0], VALUE.call(@reader)[0] ]
         when 17   # XmlDeclaration
+          puts 17
+        else
+          puts "UNKNOWN: #{k[0]}"
         end
       end
 

          
M src/rexml/parsers/lightparser.rb +1 -1
@@ 7,7 7,7 @@ module REXML
 		class LightParser
 			def initialize stream
 				@stream = stream
-				@parser = REXML::Parsers::BaseParser.new( stream )
+				@parser = REXML::Parsers::get_parser.new( stream )
 			end
 
       def add_listener( listener )

          
M src/rexml/parsers/pullparser.rb +1 -1
@@ 37,7 37,7 @@ module REXML
       def initialize stream
         @entities = {}
         @listeners = nil
-        @parser = BaseParser.new( stream )
+        @parser = REXML::Parsers::get_parser.new( stream )
         @my_stack = []
       end
 

          
M src/rexml/parsers/sax2parser.rb +1 -1
@@ 8,7 8,7 @@ module REXML
     # SAX2Parser
 		class SAX2Parser
 			def initialize source
-				@parser = BaseParser.new(source)
+				@parser = REXML::Parsers::get_parser.new(source)
 				@listeners = []
 				@procs = []
 				@namespace_stack = []

          
M src/rexml/parsers/streamparser.rb +1 -1
@@ 3,7 3,7 @@ module REXML
     class StreamParser
       def initialize source, listener
         @listener = listener
-        @parser = BaseParser.new( source )
+				@parser = REXML::Parsers::get_parser.new(source)
       end
       
       def add_listener( listener )

          
M src/rexml/parsers/treeparser.rb +4 -2
@@ 5,7 5,7 @@ module REXML
     class TreeParser
       def initialize( source, build_context = Document.new )
         @build_context = build_context
-        @parser = Parsers::BaseParser.new( source )
+        @parser = REXML::Parsers::get_parser.new( source )
       end
 
       def add_listener( listener )

          
@@ 39,7 39,7 @@ module REXML
                   @build_context[-1] << event[1]
                 else
                   @build_context.add( 
-                    Text.new(event[1], @build_context.whitespace, nil, true) 
+                    Text.new(event[1], @build_context.whitespace, nil, event[2].nil?) 
                   ) unless (
                     @build_context.ignore_whitespace_nodes and
                     event[1].strip.size==0

          
@@ 86,6 86,8 @@ module REXML
         rescue REXML::Validation::ValidationException
           raise
         rescue
+          puts $!
+          puts $!.backtrace.join("\n")
           raise ParseException.new( $!.message, @parser.source, @parser, $! )
         end
       end

          
M src/rexml/parsers/ultralightparser.rb +1 -1
@@ 6,7 6,7 @@ module REXML
 		class UltraLightParser
 			def initialize stream
 				@stream = stream
-				@parser = REXML::Parsers::BaseParser.new( stream )
+				@parser = REXML::Parsers::get_parser.new(source)
 			end
 
       def add_listener( listener )