Author: mattmann
Date: Mon Jun 29 05:26:52 2015
New Revision: 1688087
URL: http://svn.apache.org/r1688087
Log:
Fix for TIKA-1669: xpath node test ./node() should match all contained nodes
contributed by WulfB <[email protected]> this closes #52
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Jun 29 05:26:52 2015
@@ -1,5 +1,9 @@
Release 1.10 - Current Development
+ * The XPath content handler now correclty handles the
+ node() function which shouldn't match attributes
+ per http://www.w3.org/TR/xpath/ (TIKA-1669).
+
* GDALParser now correctly sets "nitf" as a supported
MediaType (TIKA-1664).
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
Mon Jun 29 05:26:52 2015
@@ -30,11 +30,6 @@ public class NodeMatcher extends Matcher
}
@Override
- public boolean matchesAttribute(String namespace, String name) {
- return true;
- }
-
- @Override
public boolean matchesText() {
return true;
}
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
Mon Jun 29 05:26:52 2015
@@ -23,98 +23,104 @@ import java.util.Map;
* Parser for a very simple XPath subset. Only the following XPath constructs
* (with namespaces) are supported:
* <ul>
- * <li><code>.../node()</code></li>
- * <li><code>.../text()</code></li>
- * <li><code>.../@*</code></li>
- * <li><code>.../@name</code></li>
- * <li><code>.../*...</code></li>
- * <li><code>.../name...</code></li>
- * <li><code>...//*...</code></li>
- * <li><code>...//name...</code></li>
+ * <li><code>.../node()</code></li>
+ * <li><code>.../text()</code></li>
+ * <li><code>.../@*</code></li>
+ * <li><code>.../@name</code></li>
+ * <li><code>.../*...</code></li>
+ * <li><code>.../name...</code></li>
+ * <li><code>...//*...</code></li>
+ * <li><code>...//name...</code></li>
* </ul>
* <p>
- * In addition the non-abbreviated <code>.../descendant::node()</code>
- * construct can be used for cases where the descendant-or-self axis
- * used by the <code>...//node()</code> construct is not appropriate.
+ * In addition the non-abbreviated <code>.../descendant::node()</code>
construct
+ * can be used for cases where the descendant-or-self axis used by the
+ * <code>...//node()</code> construct is not appropriate.
*/
public class XPathParser {
- private final Map<String, String> prefixes = new HashMap<String, String>();
+ private final Map<String, String> prefixes = new HashMap<String,
String>();
- public XPathParser() {
- }
+ public XPathParser() {
+ }
- public XPathParser(String prefix, String namespace) {
- addPrefix(prefix, namespace);
- }
-
- public void addPrefix(String prefix, String namespace) {
- prefixes.put(prefix, namespace);
- }
-
- /**
- * Parses the given simple XPath expression to an evaluation state
- * initialized at the document node. Invalid expressions are not flagged
- * as errors, they just result in a failing evaluation state.
- *
- * @param xpath simple XPath expression
- * @return XPath evaluation state
- */
- public Matcher parse(String xpath) {
- if (xpath.equals("/text()")) {
- return TextMatcher.INSTANCE;
- } else if (xpath.equals("/node()")) {
- return NodeMatcher.INSTANCE;
- } else if (xpath.equals("/descendant::node()")
- || xpath.equals("/descendant:node()")) { // for compatibility
- return new CompositeMatcher(
- TextMatcher.INSTANCE,
- new ChildMatcher(new
SubtreeMatcher(NodeMatcher.INSTANCE)));
- } else if (xpath.equals("/@*")) {
- return AttributeMatcher.INSTANCE;
- } else if (xpath.length() == 0) {
- return ElementMatcher.INSTANCE;
- } else if (xpath.startsWith("/@")) {
- String name = xpath.substring(2);
- String prefix = null;
- int colon = name.indexOf(':');
- if (colon != -1) {
- prefix = name.substring(0, colon);
- name = name.substring(colon + 1);
- }
- if (prefixes.containsKey(prefix)) {
- return new NamedAttributeMatcher(prefixes.get(prefix), name);
- } else {
- return Matcher.FAIL;
- }
- } else if (xpath.startsWith("/*")) {
- return new ChildMatcher(parse(xpath.substring(2)));
- } else if (xpath.startsWith("///")) {
- return Matcher.FAIL;
- } else if (xpath.startsWith("//")) {
- return new SubtreeMatcher(parse(xpath.substring(1)));
- } else if (xpath.startsWith("/")) {
- int slash = xpath.indexOf('/', 1);
- if (slash == -1) {
- slash = xpath.length();
- }
- String name = xpath.substring(1, slash);
- String prefix = null;
- int colon = name.indexOf(':');
- if (colon != -1) {
- prefix = name.substring(0, colon);
- name = name.substring(colon + 1);
- }
- if (prefixes.containsKey(prefix)) {
- return new NamedElementMatcher(
- prefixes.get(prefix), name,
- parse(xpath.substring(slash)));
- } else {
- return Matcher.FAIL;
- }
- } else {
- return Matcher.FAIL;
- }
- }
+ public XPathParser(String prefix, String namespace) {
+ addPrefix(prefix, namespace);
+ }
+
+ public void addPrefix(String prefix, String namespace) {
+ prefixes.put(prefix, namespace);
+ }
+
+ /**
+ * Parses the given simple XPath expression to an evaluation state
+ * initialized at the document node. Invalid expressions are not
flagged as
+ * errors, they just result in a failing evaluation state.
+ *
+ * @param xpath
+ * simple XPath expression
+ * @return XPath evaluation state
+ */
+ public Matcher parse(String xpath) {
+ if (xpath.equals("/text()")) {
+ return TextMatcher.INSTANCE;
+ } else if (xpath.equals("/node()")) {
+ return new CompositeMatcher(TextMatcher.INSTANCE, new
ChildMatcher(
+ ElementMatcher.INSTANCE));
+ } else if (xpath.equals("/descendant::node()")
+ || xpath.equals("/descendant:node()")) { // for
compatibility
+ return new CompositeMatcher(TextMatcher.INSTANCE, new
ChildMatcher(
+ new
SubtreeMatcher(NodeMatcher.INSTANCE)));
+ } else if (xpath.equals("/descendant-or-self::node()")) {
+ // equiv. to "//node()"
+ return new SubtreeMatcher(new CompositeMatcher(
+ TextMatcher.INSTANCE,
ElementMatcher.INSTANCE));
+ } else if (xpath.equals("/@*")) {
+ return AttributeMatcher.INSTANCE;
+ } else if (xpath.length() == 0) {
+ return ElementMatcher.INSTANCE;
+ } else if (xpath.startsWith("/@")) {
+ String name = xpath.substring(2);
+ String prefix = null;
+ int colon = name.indexOf(':');
+ if (colon != -1) {
+ prefix = name.substring(0, colon);
+ name = name.substring(colon + 1);
+ }
+ if (prefixes.containsKey(prefix)) {
+ return new
NamedAttributeMatcher(prefixes.get(prefix), name);
+ } else {
+ return Matcher.FAIL;
+ }
+ } else if (xpath.startsWith("/*")) {
+ return new ChildMatcher(parse(xpath.substring(2)));
+ } else if (xpath.startsWith("///")) {
+ return Matcher.FAIL;
+ } else if (xpath.startsWith("//")) {
+ return new SubtreeMatcher(parse(xpath.substring(1)));
+ } else if (xpath.startsWith("/descendant-or-self::node()/")) {
+ return new SubtreeMatcher(parse(xpath.substring(27)));
+ } else if (xpath.startsWith("/")) {
+ int slash = xpath.indexOf('/', 1);
+ if (slash == -1) {
+ slash = xpath.length();
+ }
+ String name = xpath.substring(1, slash);
+ String prefix = null;
+ int colon = name.indexOf(':');
+ if (colon != -1) {
+ prefix = name.substring(0, colon);
+ name = name.substring(colon + 1);
+ }
+ if (prefixes.containsKey(prefix)) {
+ return new
NamedElementMatcher(prefixes.get(prefix), name,
+ parse(xpath.substring(slash)));
+ } else {
+ return Matcher.FAIL;
+ }
+ } else {
+ return Matcher.FAIL;
+ }
+ }
}
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
(original)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
Mon Jun 29 05:26:52 2015
@@ -79,6 +79,99 @@ public class XPathParserTest {
}
@Test
+ public void testNode() {
+
+ // node() = child::node() selects all the children of the context
node. Note that no attribute nodes are returned,
+ // because attributes are not children.
+
+ Matcher matcher = parser.parse("/node()");
+ assertTrue(matcher.matchesText());
+ assertFalse(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "eman"));
+
+ matcher = matcher.descend(NS, "name");
+ assertFalse(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "eman"));
+
+ assertEquals(Matcher.FAIL, matcher.descend(NS, "name"));
+ }
+
+ @Test
+ public void testDescendantNode() {
+
+ // child::node() selects all the children of the context node. Note
that no attribute nodes are returned,
+ // because attributes are not children.
+
+ Matcher matcher = parser.parse("/descendant::node()");
+ assertTrue(matcher.matchesText());
+ assertFalse(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+
+ matcher = matcher.descend(NS, "name");
+ assertTrue(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+
+ matcher = matcher.descend(NS, "name");
+ assertTrue(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+ }
+
+ @Test
+ public void testDescendantOrSelfNode() {
+
+ Matcher matcher = parser.parse("/descendant-or-self::node()");
+ assertTrue(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+
+ matcher = matcher.descend(NS, "name");
+ assertTrue(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+
+ matcher = matcher.descend(NS, "name");
+ assertTrue(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+
+ }
+
+ @Test
+ public void testDescendantOrSelfElement() {
+
+ Matcher matcher = parser.parse("/descendant-or-self::node()/*");
+ // Matcher matcher = parser.parse("//*");
+ assertFalse(matcher.matchesText());
+ assertFalse(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "eman"));
+
+ matcher = matcher.descend(NS, "name");
+ assertFalse(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "eman"));
+
+ matcher = matcher.descend(NS, "name");
+ assertFalse(matcher.matchesText());
+ assertTrue(matcher.matchesElement());
+ assertFalse(matcher.matchesAttribute(null, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "name"));
+ assertFalse(matcher.matchesAttribute(NS, "eman"));
+
+ }
+
+
+ @Test
public void testAnyElement() {
Matcher matcher = parser.parse("/*");
assertFalse(matcher.matchesText());