Author: mattmann
Date: Mon Jun 29 05:26:52 2015
New Revision: 1688087

URL: http://svn.apache.org/r1688087
Log:
Fix for TIKA-1669: xpath node test ./node() should match all contained nodes 
contributed by WulfB <[email protected]> this closes #52

Modified:
    tika/trunk/CHANGES.txt
    
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Jun 29 05:26:52 2015
@@ -1,5 +1,9 @@
 Release 1.10 - Current Development
 
+  * The XPath content handler now correclty handles the 
+     node() function which shouldn't match attributes 
+     per http://www.w3.org/TR/xpath/ (TIKA-1669).
+
   * GDALParser now correctly sets "nitf" as a supported 
     MediaType (TIKA-1664).
 

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/NodeMatcher.java 
Mon Jun 29 05:26:52 2015
@@ -30,11 +30,6 @@ public class NodeMatcher extends Matcher
     }
 
     @Override
-    public boolean matchesAttribute(String namespace, String name) {
-        return true;
-    }
-
-    @Override
     public boolean matchesText() {
         return true;
     }

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/xpath/XPathParser.java 
Mon Jun 29 05:26:52 2015
@@ -23,98 +23,104 @@ import java.util.Map;
  * Parser for a very simple XPath subset. Only the following XPath constructs
  * (with namespaces) are supported:
  * <ul>
- *   <li><code>.../node()</code></li>
- *   <li><code>.../text()</code></li>
- *   <li><code>.../@*</code></li>
- *   <li><code>.../@name</code></li>
- *   <li><code>.../*...</code></li>
- *   <li><code>.../name...</code></li>
- *   <li><code>...//*...</code></li>
- *   <li><code>...//name...</code></li>
+ * <li><code>.../node()</code></li>
+ * <li><code>.../text()</code></li>
+ * <li><code>.../@*</code></li>
+ * <li><code>.../@name</code></li>
+ * <li><code>.../*...</code></li>
+ * <li><code>.../name...</code></li>
+ * <li><code>...//*...</code></li>
+ * <li><code>...//name...</code></li>
  * </ul>
  * <p>
- * In addition the non-abbreviated <code>.../descendant::node()</code>
- * construct can be used for cases where the descendant-or-self axis
- * used by the <code>...//node()</code> construct is not appropriate.
+ * In addition the non-abbreviated <code>.../descendant::node()</code> 
construct
+ * can be used for cases where the descendant-or-self axis used by the
+ * <code>...//node()</code> construct is not appropriate.
  */
 public class XPathParser {
 
-    private final Map<String, String> prefixes = new HashMap<String, String>();
+       private final Map<String, String> prefixes = new HashMap<String, 
String>();
 
-    public XPathParser() {
-    }
+       public XPathParser() {
+       }
 
-    public XPathParser(String prefix, String namespace) {
-        addPrefix(prefix, namespace);
-    }
-
-    public void addPrefix(String prefix, String namespace) {
-        prefixes.put(prefix, namespace);
-    }
-
-    /**
-     * Parses the given simple XPath expression to an evaluation state
-     * initialized at the document node. Invalid expressions are not flagged
-     * as errors, they just result in a failing evaluation state.
-     *
-     * @param xpath simple XPath expression
-     * @return XPath evaluation state
-     */
-    public Matcher parse(String xpath) {
-        if (xpath.equals("/text()")) {
-            return TextMatcher.INSTANCE;
-        } else if (xpath.equals("/node()")) {
-            return NodeMatcher.INSTANCE;
-        } else if (xpath.equals("/descendant::node()")
-                || xpath.equals("/descendant:node()")) { // for compatibility
-            return new CompositeMatcher(
-                    TextMatcher.INSTANCE,
-                    new ChildMatcher(new 
SubtreeMatcher(NodeMatcher.INSTANCE)));
-        } else if (xpath.equals("/@*")) {
-            return AttributeMatcher.INSTANCE;
-        } else if (xpath.length() == 0) {
-            return ElementMatcher.INSTANCE;
-        } else if (xpath.startsWith("/@")) {
-            String name = xpath.substring(2);
-            String prefix = null;
-            int colon = name.indexOf(':');
-            if (colon != -1) {
-                prefix = name.substring(0, colon);
-                name = name.substring(colon + 1);
-            }
-            if (prefixes.containsKey(prefix)) {
-                return new NamedAttributeMatcher(prefixes.get(prefix), name);
-            } else {
-                return Matcher.FAIL;
-            }
-        } else if (xpath.startsWith("/*")) {
-            return new ChildMatcher(parse(xpath.substring(2)));
-        } else if (xpath.startsWith("///")) {
-            return Matcher.FAIL;
-        } else if (xpath.startsWith("//")) {
-            return new SubtreeMatcher(parse(xpath.substring(1)));
-        } else if (xpath.startsWith("/")) {
-            int slash = xpath.indexOf('/', 1);
-            if (slash == -1) {
-                slash = xpath.length();
-            }
-            String name = xpath.substring(1, slash);
-            String prefix = null;
-            int colon = name.indexOf(':');
-            if (colon != -1) {
-                prefix = name.substring(0, colon);
-                name = name.substring(colon + 1);
-            }
-            if (prefixes.containsKey(prefix)) {
-                return new NamedElementMatcher(
-                        prefixes.get(prefix), name,
-                        parse(xpath.substring(slash)));
-            } else {
-                return Matcher.FAIL;
-            }
-        } else {
-            return Matcher.FAIL;
-        }
-    }
+       public XPathParser(String prefix, String namespace) {
+               addPrefix(prefix, namespace);
+       }
+
+       public void addPrefix(String prefix, String namespace) {
+               prefixes.put(prefix, namespace);
+       }
+
+       /**
+        * Parses the given simple XPath expression to an evaluation state
+        * initialized at the document node. Invalid expressions are not 
flagged as
+        * errors, they just result in a failing evaluation state.
+        *
+        * @param xpath
+        *            simple XPath expression
+        * @return XPath evaluation state
+        */
+       public Matcher parse(String xpath) {
+               if (xpath.equals("/text()")) {
+                       return TextMatcher.INSTANCE;
+               } else if (xpath.equals("/node()")) {
+                       return new CompositeMatcher(TextMatcher.INSTANCE, new 
ChildMatcher(
+                                       ElementMatcher.INSTANCE));
+               } else if (xpath.equals("/descendant::node()")
+                               || xpath.equals("/descendant:node()")) { // for 
compatibility
+                       return new CompositeMatcher(TextMatcher.INSTANCE, new 
ChildMatcher(
+                                       new 
SubtreeMatcher(NodeMatcher.INSTANCE)));
+               } else if (xpath.equals("/descendant-or-self::node()")) {
+                       // equiv. to "//node()"
+                       return new SubtreeMatcher(new CompositeMatcher(
+                                       TextMatcher.INSTANCE, 
ElementMatcher.INSTANCE));
+               } else if (xpath.equals("/@*")) {
+                       return AttributeMatcher.INSTANCE;
+               } else if (xpath.length() == 0) {
+                       return ElementMatcher.INSTANCE;
+               } else if (xpath.startsWith("/@")) {
+                       String name = xpath.substring(2);
+                       String prefix = null;
+                       int colon = name.indexOf(':');
+                       if (colon != -1) {
+                               prefix = name.substring(0, colon);
+                               name = name.substring(colon + 1);
+                       }
+                       if (prefixes.containsKey(prefix)) {
+                               return new 
NamedAttributeMatcher(prefixes.get(prefix), name);
+                       } else {
+                               return Matcher.FAIL;
+                       }
+               } else if (xpath.startsWith("/*")) {
+                       return new ChildMatcher(parse(xpath.substring(2)));
+               } else if (xpath.startsWith("///")) {
+                       return Matcher.FAIL;
+               } else if (xpath.startsWith("//")) {
+                       return new SubtreeMatcher(parse(xpath.substring(1)));
+               } else if (xpath.startsWith("/descendant-or-self::node()/")) {
+                       return new SubtreeMatcher(parse(xpath.substring(27)));
+               } else if (xpath.startsWith("/")) {
+                       int slash = xpath.indexOf('/', 1);
+                       if (slash == -1) {
+                               slash = xpath.length();
+                       }
+                       String name = xpath.substring(1, slash);
+                       String prefix = null;
+                       int colon = name.indexOf(':');
+                       if (colon != -1) {
+                               prefix = name.substring(0, colon);
+                               name = name.substring(colon + 1);
+                       }
+                       if (prefixes.containsKey(prefix)) {
+                               return new 
NamedElementMatcher(prefixes.get(prefix), name,
+                                               parse(xpath.substring(slash)));
+                       } else {
+                               return Matcher.FAIL;
+                       }
+               } else {
+                       return Matcher.FAIL;
+               }
+       }
 
 }

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java?rev=1688087&r1=1688086&r2=1688087&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/xpath/XPathParserTest.java
 Mon Jun 29 05:26:52 2015
@@ -79,6 +79,99 @@ public class XPathParserTest {
     }
 
     @Test
+    public void testNode() {
+
+        // node() = child::node() selects all the children of the context 
node. Note that no attribute nodes are returned,
+        // because attributes are not children.
+
+        Matcher matcher = parser.parse("/node()");
+        assertTrue(matcher.matchesText());
+        assertFalse(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "eman"));
+        
+        matcher = matcher.descend(NS, "name");
+        assertFalse(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "eman"));
+      
+        assertEquals(Matcher.FAIL, matcher.descend(NS, "name"));
+    }
+
+    @Test
+    public void testDescendantNode() {
+
+        // child::node() selects all the children of the context node. Note 
that no attribute nodes are returned,
+        // because attributes are not children.
+
+        Matcher matcher = parser.parse("/descendant::node()");
+        assertTrue(matcher.matchesText());
+        assertFalse(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+      
+        matcher = matcher.descend(NS, "name");
+        assertTrue(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+     
+        matcher = matcher.descend(NS, "name");
+        assertTrue(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+    }
+
+    @Test
+    public void testDescendantOrSelfNode() {
+
+        Matcher matcher = parser.parse("/descendant-or-self::node()");
+        assertTrue(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+       
+        matcher = matcher.descend(NS, "name");
+        assertTrue(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+    
+        matcher = matcher.descend(NS, "name");
+        assertTrue(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+
+    }
+
+    @Test
+    public void testDescendantOrSelfElement() {
+
+        Matcher matcher = parser.parse("/descendant-or-self::node()/*");
+        // Matcher matcher = parser.parse("//*");
+        assertFalse(matcher.matchesText());
+        assertFalse(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "eman"));
+       
+        matcher = matcher.descend(NS, "name");
+        assertFalse(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "eman"));
+          
+        matcher = matcher.descend(NS, "name");
+        assertFalse(matcher.matchesText());
+        assertTrue(matcher.matchesElement());
+        assertFalse(matcher.matchesAttribute(null, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "name"));
+        assertFalse(matcher.matchesAttribute(NS, "eman"));
+
+    }
+
+    
+    @Test
     public void testAnyElement() {
         Matcher matcher = parser.parse("/*");
         assertFalse(matcher.matchesText());


Reply via email to