luetzkendorf 2005/04/04 06:43:17 Modified: src/share/org/apache/slide/extractor SimpleXmlExtractor.java Log: added support for XML namespaces Revision Changes Path 1.12 +81 -19 jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java Index: SimpleXmlExtractor.java =================================================================== RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- SimpleXmlExtractor.java 14 Jan 2005 18:34:13 -0000 1.11 +++ SimpleXmlExtractor.java 4 Apr 2005 13:43:17 -0000 1.12 @@ -37,14 +37,46 @@ import org.apache.slide.util.conf.ConfigurationException; import org.jdom.Attribute; import org.jdom.Document; +import org.jdom.Element; import org.jdom.JDOMException; +import org.jdom.Namespace; import org.jdom.Text; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; /** - * The SimpleXmlExtractor class + * Property extractor for XML files that maps XPath expressions to WebDAV properties. + * + * <p>For each property to be extracted a XPath expression must be specified. The XPath + * is evaluated while extraction against the XML document. The first node of the resulting + * node set is used to determine the property value. For <code>text</code> nodes the text + * becomes the properties value, for <code>attribute</code> nodes the attribute value and for + * <code>element</code> the concatenation of all descendant <code>text</code> nodes. + * + * <p><b>Sample</b>: + * <pre> + * <extractor classname="org.apache.slide.extractor.SimpleXmlExtractor" + * content-type="text/xml" uri="/files/xdocs"> + * <configuration> + * <instruction property="id" namespace="http://mycomp.com/namepsaces/webdav" xpath="/xdoc/@id"/> + * <instruction property="title" namespace="http://mycomp.com/namepsaces/webdav" xpath="/xdoc/head/title"/> + * <instruction property="date" namespace="http://mycomp.com/namepsaces/webdav" xpath="/xdoc/date"/> + * </configuration> + * </extractor> + * </pre> * + * <p><b>Sample with XML namespaces</b>: + * <pre> + * <extractor classname="org.apache.slide.extractor.SimpleXmlExtractor" + * content-type="text/xml" uri="/files/xdocs"> + * <configuration> + * <xmlnamespace prefix="pre" uri="http://mycomp.com/namepsaces/xmldocs"/> + * <instruction property="id" namespace="http://mycomp.com/namepsaces/webdav" xpath="/pre:xdoc/@id"/> + * <instruction property="title" namespace="http://mycomp.com/namepsaces/webdav" xpath="/pre:xdoc/pre:head/pre:title"/> + * <instruction property="date" namespace="http://mycomp.com/namepsaces/webdav" xpath="/pre:xdoc/pre:date"/> + * </configuration> + * </extractor> + * </pre> */ public class SimpleXmlExtractor extends AbstractPropertyExtractor implements Configurable { @@ -55,6 +87,7 @@ static final String CONTENT_TYPE_XML_ALL_CSV = CONTENT_TYPE_XML+","+CONTENT_TYPE_XHTML+","+CONTENT_TYPE_HTML; protected List instructions = new ArrayList(); + protected List namespaces = new ArrayList(); public SimpleXmlExtractor(String uri, String contentType, String namespace) { super(uri, contentType, namespace); @@ -82,10 +115,14 @@ return properties; } - public void configure(Configuration configuration) throws ConfigurationException { - Enumeration instructions = configuration.getConfigurations("instruction"); - while (instructions.hasMoreElements()) { - Configuration instruction = (Configuration) instructions.nextElement(); + public void configure(Configuration conf) throws ConfigurationException { + for(Enumeration e = conf.getConfigurations("xmlnamespace"); e.hasMoreElements();) { + Configuration xpathNamespace = (Configuration)e.nextElement(); + this.namespaces.add(Namespace.getNamespace(xpathNamespace.getAttribute("prefix"), + xpathNamespace.getAttribute("uri"))); + } + for(Enumeration e = conf.getConfigurations("instruction");e.hasMoreElements();) { + Configuration instruction = (Configuration) e.nextElement(); addInstruction(createInstruction(instruction)); } } @@ -95,33 +132,58 @@ * Returning null signals that the extractor ignors this value. * * @param text the Node List identified by the xpath instruction. - * @return the property value to be set, <code>null</codee> if to be ignored. + * @return the property value to be set, <code>null</code> if to be ignored. */ protected Object filter(List nodeList, Instruction instruction) throws ExtractorException { if (nodeList.size() > 0) { - if (nodeList.get(0) instanceof Text) { - return ((Text) nodeList.get(0)).getText(); - } else if (nodeList.get(0) instanceof Attribute) { - return ((Attribute) nodeList.get(0)).getValue(); - } else if (nodeList.get(0) instanceof String) { - return nodeList.get(0); + Object node = nodeList.get(0); + if (node instanceof Text) { + return ((Text) node).getText(); + } else if (node instanceof Attribute) { + return ((Attribute) node).getValue(); + } else if (node instanceof String) { + return node; + } else if (node instanceof Element) { + StringBuffer text = new StringBuffer(); + getElementText(node, text); + return text.toString(); } } return null; } + + private void getElementText(Object o, StringBuffer buffer) { + if (o instanceof Element) { + List list = ((Element)o).getContent(); + for (int i = 0, l = list.size(); i < l; i++) { + Object subNode = list.get(i); + getElementText(subNode, buffer); + } + } else if (o instanceof Text) { + buffer.append(((Text)o).getText()); + } else { + // ignore other stuff + } + } protected void addInstruction(Instruction instruction) { instructions.add(instruction); } - protected Instruction createInstruction(Configuration instruction) throws ConfigurationException { + protected Instruction createInstruction(Configuration instruction) + throws ConfigurationException { try { String property = instruction.getAttribute("property"); - String namespace = instruction.getAttribute("namespace", "DAV:"); + String propertyNamespace = instruction.getAttribute("namespace", "DAV:"); XPath xPath = XPath.newInstance(instruction.getAttribute("xpath")); - return new Instruction(xPath, PropertyName.getPropertyName(property, namespace)); + for(Iterator i = namespaces.iterator(); i.hasNext();) { + xPath.addNamespace((Namespace)i.next()); + } + return new Instruction(xPath, PropertyName.getPropertyName(property, + propertyNamespace)); } catch (JDOMException e) { - throw new ConfigurationException("Could not create xPath from given attribute", instruction); + throw new ConfigurationException("Could not create xPath from given attribute", + instruction); } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
