Author: mattmann
Date: Mon Jun 29 14:34:29 2015
New Revision: 1688239
URL: http://svn.apache.org/r1688239
Log:
Rollback r1688087 as it seems to cause some tests to fail.
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1688239&r1=1688238&r2=1688239&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Jun 29 14:34:29 2015
@@ -1,9 +1,5 @@
Release 1.10 - Current Development
- * The XPath content handler now correclty handles the
- node() function which shouldn't match attributes
- per http://www.w3.org/TR/xpath/ (TIKA-1669).
-
* GDALParser now correctly sets "nitf" as a supported
MediaType (TIKA-1664).
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java?rev=1688239&r1=1688238&r2=1688239&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
Mon Jun 29 14:34:29 2015
@@ -30,6 +30,11 @@ public class NodeMatcher extends Matcher
}
@Override
+ public boolean matchesAttribute(String namespace, String name) {
+ return true;
+ }
+
+ @Override
public boolean matchesText() {
return true;
}
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java?rev=1688239&r1=1688238&r2=1688239&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
Mon Jun 29 14:34:29 2015
@@ -23,104 +23,98 @@ import java.util.Map;
* Parser for a very simple XPath subset. Only the following XPath constructs
* (with namespaces) are supported:
* <ul>
- * <li><code>.../node()</code></li>
- * <li><code>.../text()</code></li>
- * <li><code>.../@*</code></li>
- * <li><code>.../@name</code></li>
- * <li><code>.../*...</code></li>
- * <li><code>.../name...</code></li>
- * <li><code>...//*...</code></li>
- * <li><code>...//name...</code></li>
+ * <li><code>.../node()</code></li>
+ * <li><code>.../text()</code></li>
+ * <li><code>.../@*</code></li>
+ * <li><code>.../@name</code></li>
+ * <li><code>.../*...</code></li>
+ * <li><code>.../name...</code></li>
+ * <li><code>...//*...</code></li>
+ * <li><code>...//name...</code></li>
* </ul>
* <p>
- * In addition the non-abbreviated <code>.../descendant::node()</code>
construct
- * can be used for cases where the descendant-or-self axis used by the
- * <code>...//node()</code> construct is not appropriate.
+ * In addition the non-abbreviated <code>.../descendant::node()</code>
+ * construct can be used for cases where the descendant-or-self axis
+ * used by the <code>...//node()</code> construct is not appropriate.
*/
public class XPathParser {
- private final Map<String, String> prefixes = new HashMap<String,
String>();
+ private final Map<String, String> prefixes = new HashMap<String, String>();
- public XPathParser() {
- }
+ public XPathParser() {
+ }
- public XPathParser(String prefix, String namespace) {
- addPrefix(prefix, namespace);
- }
-
- public void addPrefix(String prefix, String namespace) {
- prefixes.put(prefix, namespace);
- }
-
- /**
- * Parses the given simple XPath expression to an evaluation state
- * initialized at the document node. Invalid expressions are not
flagged as
- * errors, they just result in a failing evaluation state.
- *
- * @param xpath
- * simple XPath expression
- * @return XPath evaluation state
- */
- public Matcher parse(String xpath) {
- if (xpath.equals("/text()")) {
- return TextMatcher.INSTANCE;
- } else if (xpath.equals("/node()")) {
- return new CompositeMatcher(TextMatcher.INSTANCE, new
ChildMatcher(
- ElementMatcher.INSTANCE));
- } else if (xpath.equals("/descendant::node()")
- || xpath.equals("/descendant:node()")) { // for
compatibility
- return new CompositeMatcher(TextMatcher.INSTANCE, new
ChildMatcher(
- new
SubtreeMatcher(NodeMatcher.INSTANCE)));
- } else if (xpath.equals("/descendant-or-self::node()")) {
- // equiv. to "//node()"
- return new SubtreeMatcher(new CompositeMatcher(
- TextMatcher.INSTANCE,
ElementMatcher.INSTANCE));
- } else if (xpath.equals("/@*")) {
- return AttributeMatcher.INSTANCE;
- } else if (xpath.length() == 0) {
- return ElementMatcher.INSTANCE;
- } else if (xpath.startsWith("/@")) {
- String name = xpath.substring(2);
- String prefix = null;
- int colon = name.indexOf(':');
- if (colon != -1) {
- prefix = name.substring(0, colon);
- name = name.substring(colon + 1);
- }
- if (prefixes.containsKey(prefix)) {
- return new
NamedAttributeMatcher(prefixes.get(prefix), name);
- } else {
- return Matcher.FAIL;
- }
- } else if (xpath.startsWith("/*")) {
- return new ChildMatcher(parse(xpath.substring(2)));
- } else if (xpath.startsWith("///")) {
- return Matcher.FAIL;
- } else if (xpath.startsWith("//")) {
- return new SubtreeMatcher(parse(xpath.substring(1)));
- } else if (xpath.startsWith("/descendant-or-self::node()/")) {
- return new SubtreeMatcher(parse(xpath.substring(27)));
- } else if (xpath.startsWith("/")) {
- int slash = xpath.indexOf('/', 1);
- if (slash == -1) {
- slash = xpath.length();
- }
- String name = xpath.substring(1, slash);
- String prefix = null;
- int colon = name.indexOf(':');
- if (colon != -1) {
- prefix = name.substring(0, colon);
- name = name.substring(colon + 1);
- }
- if (prefixes.containsKey(prefix)) {
- return new
NamedElementMatcher(prefixes.get(prefix), name,
- parse(xpath.substring(slash)));
- } else {
- return Matcher.FAIL;
- }
- } else {
- return Matcher.FAIL;
- }
- }
+ public XPathParser(String prefix, String namespace) {
+ addPrefix(prefix, namespace);
+ }
+
+ public void addPrefix(String prefix, String namespace) {
+ prefixes.put(prefix, namespace);
+ }
+
+ /**
+ * Parses the given simple XPath expression to an evaluation state
+ * initialized at the document node. Invalid expressions are not flagged
+ * as errors, they just result in a failing evaluation state.
+ *
+ * @param xpath simple XPath expression
+ * @return XPath evaluation state
+ */
+ public Matcher parse(String xpath) {
+ if (xpath.equals("/text()")) {
+ return TextMatcher.INSTANCE;
+ } else if (xpath.equals("/node()")) {
+ return NodeMatcher.INSTANCE;
+ } else if (xpath.equals("/descendant::node()")
+ || xpath.equals("/descendant:node()")) { // for compatibility
+ return new CompositeMatcher(
+ TextMatcher.INSTANCE,
+ new ChildMatcher(new
SubtreeMatcher(NodeMatcher.INSTANCE)));
+ } else if (xpath.equals("/@*")) {
+ return AttributeMatcher.INSTANCE;
+ } else if (xpath.length() == 0) {
+ return ElementMatcher.INSTANCE;
+ } else if (xpath.startsWith("/@")) {
+ String name = xpath.substring(2);
+ String prefix = null;
+ int colon = name.indexOf(':');
+ if (colon != -1) {
+ prefix = name.substring(0, colon);
+ name = name.substring(colon + 1);
+ }
+ if (prefixes.containsKey(prefix)) {
+ return new NamedAttributeMatcher(prefixes.get(prefix), name);
+ } else {
+ return Matcher.FAIL;
+ }
+ } else if (xpath.startsWith("/*")) {
+ return new ChildMatcher(parse(xpath.substring(2)));
+ } else if (xpath.startsWith("///")) {
+ return Matcher.FAIL;
+ } else if (xpath.startsWith("//")) {
+ return new SubtreeMatcher(parse(xpath.substring(1)));
+ } else if (xpath.startsWith("/")) {
+ int slash = xpath.indexOf('/', 1);
+ if (slash == -1) {
+ slash = xpath.length();
+ }
+ String name = xpath.substring(1, slash);
+ String prefix = null;
+ int colon = name.indexOf(':');
+ if (colon != -1) {
+ prefix = name.substring(0, colon);
+ name = name.substring(colon + 1);
+ }
+ if (prefixes.containsKey(prefix)) {
+ return new NamedElementMatcher(
+ prefixes.get(prefix), name,
+ parse(xpath.substring(slash)));
+ } else {
+ return Matcher.FAIL;
+ }
+ } else {
+ return Matcher.FAIL;
+ }
+ }
}
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java?rev=1688239&r1=1688238&r2=1688239&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
(original)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
Mon Jun 29 14:34:29 2015
@@ -79,99 +79,6 @@ public class XPathParserTest {
}
@Test
- public void testNode() {
-
- // node() = child::node() selects all the children of the context
node. Note that no attribute nodes are returned,
- // because attributes are not children.
-
- Matcher matcher = parser.parse("/node()");
- assertTrue(matcher.matchesText());
- assertFalse(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
- assertFalse(matcher.matchesAttribute(NS, "name"));
- assertFalse(matcher.matchesAttribute(NS, "eman"));
-
- matcher = matcher.descend(NS, "name");
- assertFalse(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
- assertFalse(matcher.matchesAttribute(NS, "name"));
- assertFalse(matcher.matchesAttribute(NS, "eman"));
-
- assertEquals(Matcher.FAIL, matcher.descend(NS, "name"));
- }
-
- @Test
- public void testDescendantNode() {
-
- // child::node() selects all the children of the context node. Note
that no attribute nodes are returned,
- // because attributes are not children.
-
- Matcher matcher = parser.parse("/descendant::node()");
- assertTrue(matcher.matchesText());
- assertFalse(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
-
- matcher = matcher.descend(NS, "name");
- assertTrue(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
-
- matcher = matcher.descend(NS, "name");
- assertTrue(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
- }
-
- @Test
- public void testDescendantOrSelfNode() {
-
- Matcher matcher = parser.parse("/descendant-or-self::node()");
- assertTrue(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(NS, "name"));
-
- matcher = matcher.descend(NS, "name");
- assertTrue(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(NS, "name"));
-
- matcher = matcher.descend(NS, "name");
- assertTrue(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(NS, "name"));
-
- }
-
- @Test
- public void testDescendantOrSelfElement() {
-
- Matcher matcher = parser.parse("/descendant-or-self::node()/*");
- // Matcher matcher = parser.parse("//*");
- assertFalse(matcher.matchesText());
- assertFalse(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
- assertFalse(matcher.matchesAttribute(NS, "name"));
- assertFalse(matcher.matchesAttribute(NS, "eman"));
-
- matcher = matcher.descend(NS, "name");
- assertFalse(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
- assertFalse(matcher.matchesAttribute(NS, "name"));
- assertFalse(matcher.matchesAttribute(NS, "eman"));
-
- matcher = matcher.descend(NS, "name");
- assertFalse(matcher.matchesText());
- assertTrue(matcher.matchesElement());
- assertFalse(matcher.matchesAttribute(null, "name"));
- assertFalse(matcher.matchesAttribute(NS, "name"));
- assertFalse(matcher.matchesAttribute(NS, "eman"));
-
- }
-
-
- @Test
public void testAnyElement() {
Matcher matcher = parser.parse("/*");
assertFalse(matcher.matchesText());