luetzkendorf    2005/04/04 06:43:17

  Modified:    src/share/org/apache/slide/extractor SimpleXmlExtractor.java
  Log:
  added support for XML namespaces
  
  Revision  Changes    Path
  1.12      +81 -19    
jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
  
  Index: SimpleXmlExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- SimpleXmlExtractor.java   14 Jan 2005 18:34:13 -0000      1.11
  +++ SimpleXmlExtractor.java   4 Apr 2005 13:43:17 -0000       1.12
  @@ -37,14 +37,46 @@
   import org.apache.slide.util.conf.ConfigurationException;
   import org.jdom.Attribute;
   import org.jdom.Document;
  +import org.jdom.Element;
   import org.jdom.JDOMException;
  +import org.jdom.Namespace;
   import org.jdom.Text;
   import org.jdom.input.SAXBuilder;
   import org.jdom.xpath.XPath;
   
   /**
  - * The SimpleXmlExtractor class
  + * Property extractor for XML files that maps XPath expressions to WebDAV 
properties.
  + *
  + * <p>For each property to be extracted a XPath expression must be 
specified. The XPath 
  + * is evaluated while extraction against the XML document. The first node of 
the resulting
  + * node set is used to determine the property value. For <code>text</code> 
nodes the text 
  + * becomes the properties value, for <code>attribute</code> nodes the 
attribute value and for
  + * <code>element</code> the concatenation of all descendant 
<code>text</code> nodes. 
  + *
  + * <p><b>Sample</b>: 
  + * <pre>
  + *   &lt;extractor classname="org.apache.slide.extractor.SimpleXmlExtractor" 
  + *              content-type="text/xml" uri="/files/xdocs">
  + *      &lt;configuration>
  + *         &lt;instruction property="id" 
namespace="http://mycomp.com/namepsaces/webdav"; xpath="/xdoc/@id"/>
  + *         &lt;instruction property="title" 
namespace="http://mycomp.com/namepsaces/webdav"; xpath="/xdoc/head/title"/>
  + *         &lt;instruction property="date" 
namespace="http://mycomp.com/namepsaces/webdav"; xpath="/xdoc/date"/>
  + *      &lt;/configuration>
  + *   &lt;/extractor>
  + * </pre>
    * 
  + * <p><b>Sample with XML namespaces</b>: 
  + * <pre>
  + *   &lt;extractor classname="org.apache.slide.extractor.SimpleXmlExtractor" 
  + *              content-type="text/xml" uri="/files/xdocs">
  + *      &lt;configuration>
  + *         &lt;xmlnamespace prefix="pre" 
uri="http://mycomp.com/namepsaces/xmldocs"/>
  + *         &lt;instruction property="id" 
namespace="http://mycomp.com/namepsaces/webdav"; xpath="/pre:xdoc/@id"/>
  + *         &lt;instruction property="title" 
namespace="http://mycomp.com/namepsaces/webdav"; 
xpath="/pre:xdoc/pre:head/pre:title"/>
  + *         &lt;instruction property="date" 
namespace="http://mycomp.com/namepsaces/webdav"; xpath="/pre:xdoc/pre:date"/>
  + *      &lt;/configuration>
  + *   &lt;/extractor>
  + * </pre>
    */
   public class SimpleXmlExtractor extends AbstractPropertyExtractor implements 
Configurable {
        
  @@ -55,6 +87,7 @@
        static final String CONTENT_TYPE_XML_ALL_CSV = 
CONTENT_TYPE_XML+","+CONTENT_TYPE_XHTML+","+CONTENT_TYPE_HTML;
        
       protected List instructions = new ArrayList();
  +    protected List namespaces = new ArrayList();
   
       public SimpleXmlExtractor(String uri, String contentType, String 
namespace) {
           super(uri, contentType, namespace);
  @@ -82,10 +115,14 @@
           return properties;
       }
   
  -    public void configure(Configuration configuration) throws 
ConfigurationException {
  -        Enumeration instructions = 
configuration.getConfigurations("instruction");
  -        while (instructions.hasMoreElements()) {
  -            Configuration instruction = (Configuration) 
instructions.nextElement();
  +    public void configure(Configuration conf) throws ConfigurationException {
  +     for(Enumeration e = conf.getConfigurations("xmlnamespace"); 
e.hasMoreElements();) {
  +             Configuration xpathNamespace = (Configuration)e.nextElement();
  +             
this.namespaces.add(Namespace.getNamespace(xpathNamespace.getAttribute("prefix"),
 
  +                                                                             
                   xpathNamespace.getAttribute("uri")));
  +        }
  +        for(Enumeration e = 
conf.getConfigurations("instruction");e.hasMoreElements();) {
  +            Configuration instruction = (Configuration) e.nextElement();
               addInstruction(createInstruction(instruction));
           }
       }
  @@ -95,33 +132,58 @@
        * Returning null signals that the extractor ignors this value.
        * 
        * @param text  the Node List identified by the xpath instruction.
  -     * @return  the property value to be set, <code>null</codee> if to be 
ignored.
  +     * @return  the property value to be set, <code>null</code> if to be 
ignored.
        */
       protected Object filter(List nodeList, Instruction instruction) throws 
ExtractorException {
           if (nodeList.size() > 0) {
  -            if (nodeList.get(0) instanceof Text) {
  -                return ((Text) nodeList.get(0)).getText();
  -            } else if (nodeList.get(0) instanceof Attribute) {
  -                return ((Attribute) nodeList.get(0)).getValue();
  -            } else if (nodeList.get(0) instanceof String) {
  -                return nodeList.get(0);
  +            Object node = nodeList.get(0);
  +                     if (node instanceof Text) {
  +                return ((Text) node).getText();
  +            } else if (node instanceof Attribute) {
  +                return ((Attribute) node).getValue();
  +            } else if (node instanceof String) {
  +                return node;
  +            } else if (node instanceof Element) {
  +             StringBuffer text = new StringBuffer();
  +             getElementText(node, text);
  +                return text.toString();
               }
           }
           return null;
       }
  +    
  +    private void getElementText(Object o, StringBuffer buffer) {
  +     if (o instanceof Element) {
  +             List list = ((Element)o).getContent();
  +             for (int i = 0, l = list.size(); i < l; i++) {
  +                     Object subNode = list.get(i);
  +                     getElementText(subNode, buffer);
  +             }
  +     } else if (o instanceof Text) {
  +             buffer.append(((Text)o).getText());
  +     } else {
  +             // ignore other stuff
  +     }
  +    }
   
       protected void addInstruction(Instruction instruction) {
           instructions.add(instruction);
       }
   
  -    protected Instruction createInstruction(Configuration instruction) 
throws ConfigurationException {
  +    protected Instruction createInstruction(Configuration instruction) 
  +             throws ConfigurationException {
           try {
               String property = instruction.getAttribute("property");
  -            String namespace = instruction.getAttribute("namespace", "DAV:");
  +            String propertyNamespace = instruction.getAttribute("namespace", 
"DAV:");
               XPath xPath = 
XPath.newInstance(instruction.getAttribute("xpath"));
  -            return new Instruction(xPath, 
PropertyName.getPropertyName(property, namespace));
  +            for(Iterator i = namespaces.iterator(); i.hasNext();) {
  +             xPath.addNamespace((Namespace)i.next());
  +            }
  +            return new Instruction(xPath, 
PropertyName.getPropertyName(property, 
  +                     propertyNamespace));
           } catch (JDOMException e) {
  -            throw new ConfigurationException("Could not create xPath from 
given attribute", instruction);
  +            throw new ConfigurationException("Could not create xPath from 
given attribute", 
  +                     instruction);
           }
       }
       
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to