being impatient and in need of this functionality, i went ahead and
implemented it in a limited fashion using UserDataElements as James
suggested. unfortunately, i still had to patch dom4j itself in order to
make it work. specifically, i had to make a few private fields in
SAXReader protected, and make DispatchHandler public. attached are my
PositionalSAXReader and PositionalSAXContentHandler classes, as well as
the patch for dom4j.

i'd appreciate any comments from the dom4j team on the efficacy of this
patch and on the possibility of a more general solution that would
record position information for all nodes (that sax reports location
information for, anyway). i don't particularly like using the
UserDataDocumentFactory stuff for this because i don't think i should
have to choose between, say, IndexedDocumentFactory and being able to
store position information. but, still, it works.

- donald
package com.webslingerZ.webwalker.dom4j;

import org.dom4j.DocumentFactory;
import org.dom4j.io.SAXContentHandler;
import org.dom4j.io.SAXReader;
import org.dom4j.util.UserDataDocumentFactory;
import org.xml.sax.XMLReader;

/**
 * Extends the dom4j SAXReader class to use our PositionalSAXContentHandler,
 * which stores element position information in UserDataElements.
 *
 * @author Donald Ball
 * @version $Revision
 */
public class PositionalSAXReader extends SAXReader {

    public PositionalSAXReader(XMLReader reader) {
        super(reader);
    }

    //FIXME - should check for UserDataDocumentFactory? should not? dunno
    
    public DocumentFactory getDocumentFactory() {
        if (factory == null) {
            factory = new UserDataDocumentFactory();
        }
        return factory;
    }

    protected SAXContentHandler createContentHandler(XMLReader reader) {
        return new PositionalSAXContentHandler(getDocumentFactory(), dispatchHandler);
    }

}
package com.webslingerZ.webwalker.dom4j;

import org.dom4j.DocumentFactory;
import org.dom4j.io.DispatchHandler;
import org.dom4j.io.SAXContentHandler;
import org.dom4j.util.UserDataElement;
import org.dom4j.util.UserDataDocumentFactory;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/**
 * Extends the dom4j SAXContentHandler class to store element position
 * information in UserDataElements.
 *
 * @author Donald Ball
 * @version $Revision
 */
public class PositionalSAXContentHandler extends SAXContentHandler {

    Locator locator;

    public PositionalSAXContentHandler(DocumentFactory factory, DispatchHandler dispatchHandler) {
        super(factory, dispatchHandler);
        if (!(factory instanceof UserDataDocumentFactory)) {
            throw new IllegalArgumentException("PositionalSAXContentHandler requires use of UserDataDocumentFactory: "+factory);
        }
    }

    public void setDocumentLocator(Locator locator) {
        super.setDocumentLocator(locator);
        this.locator = locator;
        System.err.println("setting locator: "+locator);
    }

    public void startElement(String namespaceURI, String localName, String qualifiedName, Attributes attributes) throws SAXException {
        super.startElement(namespaceURI, localName, qualifiedName, attributes);
        if (locator == null) System.err.println("no locator");
        if (locator != null && currentElement instanceof UserDataElement) {
            int pos[] = new int[2];
            pos[0] = locator.getLineNumber();
            pos[1] = locator.getColumnNumber();
            ((UserDataElement)currentElement).setData(pos);
        }
    }

}
? patch
? patch2
? src/doc/images
Index: src/java/org/dom4j/io/DispatchHandler.java
===================================================================
RCS file: /cvsroot/dom4j/dom4j/src/java/org/dom4j/io/DispatchHandler.java,v
retrieving revision 1.3
diff -u -3 -p -c -r1.3 DispatchHandler.java
*** src/java/org/dom4j/io/DispatchHandler.java  20 May 2002 08:14:16 -0000      1.3
--- src/java/org/dom4j/io/DispatchHandler.java  6 Jun 2002 20:20:58 -0000
*************** import org.dom4j.ElementPath;
*** 28,34 ****
    * @version $Revision: 1.3 $
    */
  
! class DispatchHandler implements ElementHandler
  {
      /** Whether the parser is at the root element or not */
      private boolean        atRoot;
--- 28,34 ----
    * @version $Revision: 1.3 $
    */
  
! public class DispatchHandler implements ElementHandler
  {
      /** Whether the parser is at the root element or not */
      private boolean        atRoot;
Index: src/java/org/dom4j/io/SAXContentHandler.java
===================================================================
RCS file: /cvsroot/dom4j/dom4j/src/java/org/dom4j/io/SAXContentHandler.java,v
retrieving revision 1.44
diff -u -3 -p -c -r1.44 SAXContentHandler.java
*** src/java/org/dom4j/io/SAXContentHandler.java        25 Apr 2002 10:31:50 -0000     
 1.44
--- src/java/org/dom4j/io/SAXContentHandler.java        6 Jun 2002 20:20:58 -0000
*************** public class SAXContentHandler extends D
*** 102,108 ****
      private InputSource inputSource;
  
      /** The current element we are on */
!     private Element currentElement;
  
      /** Should internal DTD declarations be expanded into a List in the DTD */
      private boolean includeInternalDTDDeclarations = false;
--- 102,108 ----
      private InputSource inputSource;
  
      /** The current element we are on */
!     protected Element currentElement;
  
      /** Should internal DTD declarations be expanded into a List in the DTD */
      private boolean includeInternalDTDDeclarations = false;
*************** public class SAXContentHandler extends D
*** 128,134 ****
      /** Holds value of property stripWhitespaceText. */
      private boolean stripWhitespaceText = false;
  
- 
      public SAXContentHandler() {
          this( DocumentFactory.getInstance() );
      }
--- 128,133 ----
*************** public class SAXContentHandler extends D
*** 162,168 ****
  
      // ContentHandler interface
      //-------------------------------------------------------------------------
! 
      public void processingInstruction(String target, String data) throws 
SAXException {
          if ( mergeAdjacentText && textInTextBuffer ) {
              completeCurrentTextNode();
--- 161,167 ----
  
      // ContentHandler interface
      //-------------------------------------------------------------------------
!     
      public void processingInstruction(String target, String data) throws 
SAXException {
          if ( mergeAdjacentText && textInTextBuffer ) {
              completeCurrentTextNode();
Index: src/java/org/dom4j/io/SAXReader.java
===================================================================
RCS file: /cvsroot/dom4j/dom4j/src/java/org/dom4j/io/SAXReader.java,v
retrieving revision 1.41
diff -u -3 -p -c -r1.41 SAXReader.java
*** src/java/org/dom4j/io/SAXReader.java        20 May 2002 08:14:16 -0000      1.41
--- src/java/org/dom4j/io/SAXReader.java        6 Jun 2002 20:20:59 -0000
*************** import org.xml.sax.helpers.XMLReaderFact
*** 81,87 ****
  public class SAXReader {
  
      /** <code>DocumentFactory</code> used to create new document objects */
!     private DocumentFactory factory;
      
      /** <code>XMLReader</code> used to parse the SAX events */
      private XMLReader xmlReader;
--- 81,87 ----
  public class SAXReader {
  
      /** <code>DocumentFactory</code> used to create new document objects */
!     protected DocumentFactory factory;
      
      /** <code>XMLReader</code> used to parse the SAX events */
      private XMLReader xmlReader;
*************** public class SAXReader {
*** 90,96 ****
      private boolean validating;
      
      /** DispatchHandler to call when each <code>Element</code> is encountered */
!     private DispatchHandler dispatchHandler;
   
      /** ErrorHandler class to use */
      private ErrorHandler errorHandler;
--- 90,96 ----
      private boolean validating;
      
      /** DispatchHandler to call when each <code>Element</code> is encountered */
!     protected DispatchHandler dispatchHandler;
   
      /** ErrorHandler class to use */
      private ErrorHandler errorHandler;

Reply via email to