TestXPathRecordReader.java

noble Tue, 06 Oct 2009 00:43:30 -0700

Author: noble
Date: Tue Oct  6 07:42:28 2009
New Revision: 822154

URL: http://svn.apache.org/viewvc?rev=822154&view=rev
Log:
SOLR-1437 DIH: Enhance XPathRecordReader to deal with //tagname and other 
improvments.


Modified:
    lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
    
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
    
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java

Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=822154&r1=822153&r2=822154&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Tue Oct  6 07:42:28 
2009
@@ -178,6 +178,8 @@
 5. SOLR-1465: Replaced string concatenations with StringBuilder append calls 
in XPathRecordReader.
               (Mark Miller, shalin)
 
+6.SOLR-1437 : XPathEntityProcessor can deal with xpath syntaxes such as 
//tagname , /root//tagname (Fergus McMenemie via noble)
+
 Bug Fixes
 ----------------------
 1. SOLR-800:  Deep copy collections to avoid ConcurrentModificationException 
in XPathEntityprocessor while streaming

Modified: 
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java?rev=822154&r1=822153&r2=822154&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
 (original)
+++ 
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
 Tue Oct  6 07:42:28 2009
@@ -34,9 +34,12 @@
  * /a/b/subje...@qualifier='fullTitle']
  * /a/b/subje...@qualifier=]/subtag
  * /a/b/subject/@qualifier
+ * //a
+ * //a/b...
+ * /a//b
+ * /a//b...
  * /a/b/c
  * </pre>
- * Keep in mind that the wild-card syntax  '//' is not supported
  * A record is a Map<String,Object> . The key is the provided name
  * and the value is a String or a List<String>
  *
@@ -52,6 +55,7 @@
  */
 public class XPathRecordReader {
   private Node rootNode = new Node("/", null);
+
   /** 
    * The FLATTEN flag indicates that all text and cdata under a specific
    * tag should be recursivly fetched and appended to the current Node's
@@ -61,23 +65,25 @@
 
   /**
    * A constructor called with a '|' seperated list of Xpath expressions
-   * which define sub sections of the XML stream that are to be emitted
+   * which define sub sections of the XML stream that are to be emitted as
    * seperate records.
    * 
    * @param forEachXpath  The XPATH for which a record is emitted. Once the
    * xpath tag is encountered, the Node.parse method starts collecting wanted 
    * fields and at the close of the tag, a record is emitted containing all 
    * fields collected since the tag start. Once 
-   * emitted the collected fields are cleared. Any fields collected in the 
parent tag or above
-   * will also be included in the record, but these are not
-   * cleared after emitting the record.
-
+   * emitted the collected fields are cleared. Any fields collected in the 
+   * parent tag or above will also be included in the record, but these are
+   * not cleared after emitting the record.
+   *
    * It uses the ' | ' syntax of XPATH to pass in multiple xpaths.
    */
   public XPathRecordReader(String forEachXpath) {
     String[] splits = forEachXpath.split("\\|");
     for (String split : splits) {
       split = split.trim();
+      if (split.startsWith("//"))
+         throw new RuntimeException("forEach cannot start with '//': " + 
split);
       if (split.length() == 0)
         continue;
       // The created Node has a name set to the full forEach attribute xpath
@@ -86,9 +92,9 @@
   }
 
   /**
-   * A wrapper around {...@link #addField0 addField0()} to create a series of 
Nodes 
-   * based on the supplied Xpath for the given fieldName. The created nodes 
-   * are inserted into a Node tree.
+   * A wrapper around {...@link #addField0 addField0()} to create a series of  
+   * Nodes based on the supplied Xpath and a given fieldName. The created  
+   * nodes are inserted into a Node tree.
    *
    * @param name The name for this field in the emitted record
    * @param xpath The xpath expression for this field
@@ -96,16 +102,14 @@
    *                    a List<String>
    */
   public synchronized XPathRecordReader addField(String name, String xpath, 
boolean multiValued) {
-    if (!xpath.startsWith("/"))
-      throw new RuntimeException("xpath must start with '/' : " + xpath);
     addField0(xpath, name, multiValued, false, 0);
     return this;
   }
 
   /**
-   * A wrapper around {...@link #addField0 addField0()} to create a series of 
Nodes 
-   * based on the supplied Xpath for the given fieldName. The created nodes 
-   * are inserted into a Node tree.
+   * A wrapper around {...@link #addField0 addField0()} to create a series of  
+   * Nodes based on the supplied Xpath and a given fieldName. The created  
+   * nodes are inserted into a Node tree.
    *
    * @param name The name for this field in the emitted record
    * @param xpath The xpath expression for this field
@@ -114,8 +118,6 @@
    * @param flags FLATTEN: Recursivly combine text from all child XML elements
    */
   public synchronized XPathRecordReader addField(String name, String xpath, 
boolean multiValued, int flags) {
-    if (!xpath.startsWith("/"))
-      throw new RuntimeException("xpath must start with '/' : " + xpath);
     addField0(xpath, name, multiValued, false, flags);
     return this;
   }
@@ -129,24 +131,28 @@
    * @param name The name for this field in the emitted record
    * @param multiValued If 'true' then the emitted record will have values in 
    *                    a List<String>
-   * @param isRecord When 'true' flags that this XPATH is from a forEach 
statement
+   * @param isRecord Flags that this XPATH is from a forEach statement
    * @param flags The only supported flag is 'FLATTEN'
    */
   private void addField0(String xpath, String name, boolean multiValued,
                          boolean isRecord, int flags) {
+    if (!xpath.startsWith("/"))
+      throw new RuntimeException("xpath must start with '/' : " + xpath);
     List<String> paths = splitEscapeQuote(xpath);
+    // deal with how split behaves when seperator starts a string!
     if ("".equals(paths.get(0).trim()))
       paths.remove(0);
     rootNode.build(paths, name, multiValued, isRecord, flags);
+    rootNode.buildOptimise(null);
   }
 
   /** 
-   * Uses {...@link #streamRecords streamRecords} to parse the XML source but 
-   * collects the emitted records into a List which is returned upon 
completion.
+   * Uses {...@link #streamRecords streamRecords} to parse the XML source but 
with
+   * a handler that collects all the emitted records into a single List which 
+   * is returned upon completion.
    *
    * @param r the stream reader
    * @return results a List of emitted records
-   *
    */
   public List<Map<String, Object>> getAllRecords(Reader r) {
     final List<Map<String, Object>> results = new ArrayList<Map<String, 
Object>>();
@@ -181,21 +187,22 @@
    * For each node/leaf in the Node tree there is one object of this class.
    * This tree of objects represents all the XPaths we are interested in.
    * For each Xpath segment of interest we create a node. In most cases the
-   * node (branch) is rather basic , but for the final portion (leaf) of any 
Xpath  we add
-   * more information to the Node. When parsing the XML document we
-   * step though this tree as we stream records from the reader. If the XML
+   * node (branch) is rather basic , but for the final portion (leaf) of any
+   * Xpath we add more information to the Node. When parsing the XML document 
+   * we step though this tree as we stream records from the reader. If the XML
    * document departs from this tree we skip start tags till we are back on 
    * the tree.
-   *
    */
-  private class Node {
+  private static class Node {
     String name;      // genrally: segment of the Xpath represented by this 
Node
     String fieldName; // the fieldname in the emitted record (key of the map)
     String xpathName; // the segment of the Xpath represented by this Node
     String forEachPath; // the full Xpath from the forEach entity attribute
-    List<Node> attributes; // a List of attribute Nodes associated with this 
Node
-    List<Node> childNodes; // a List of child Nodes of this node
+    List<Node> attributes; // List of attribute Nodes associated with this Node
+    List<Node> childNodes; // List of immediate child Nodes of this node
+    List<Node> wildCardNodes; // List of '//' style decendants of this Node
     List<Map.Entry<String, String>> attribAndValues;
+    Node wildAncestor; // ancestor Node containing '//' style decendants 
     Node parent; // parent Node in the tree
     boolean hasText=false; // flag: store/emit streamed text for this node
     boolean multiValued=false; //flag: this fields values are returned as a 
List
@@ -205,7 +212,7 @@
 
     public Node(String name, Node p) {
       // Create a basic Node, suitable for the mid portions of any Xpath.
-      // Node.xpathName and Node.name are set to same value
+      // Node.xpathName and Node.name are set to same value.
       xpathName = this.name = name;
       parent = p;
     }
@@ -222,13 +229,17 @@
      * tag/subtag read from the source, this method is called recursively.
      *
      */
-    private void parse(XMLStreamReader parser, Handler handler,
-                       Map<String, Object> values, Stack<Set<String>> stack,
-                       boolean recordStarted) throws IOException, 
XMLStreamException {
+    private void parse(XMLStreamReader parser, 
+                       Handler handler,
+                       Map<String, Object> values, 
+                       Stack<Set<String>> stack, // lists of values to purge
+                       boolean recordStarted
+                       ) throws IOException, XMLStreamException {
       Set<String> valuesAddedinThisFrame = null;
       if (isRecord) {
         // This Node is a match for an XPATH from a forEach attribute, 
-        // prepare to emit a new record when its END_ELEMENT is matched 
+        // prepare for the clean up that will occurr when the record
+        // is emitted after its END_ELEMENT is matched 
         recordStarted = true;
         valuesAddedinThisFrame = new HashSet<String>();
         stack.push(valuesAddedinThisFrame);
@@ -236,15 +247,16 @@
         // This node is a child of some parent which matched against forEach 
         // attribute. Continue to add values to an existing record.
         valuesAddedinThisFrame = stack.peek();
-      } else {
-        //if this tag has an attribute or text which is a brank/leaf just push 
an item up the stack
-        if (attributes != null || hasText)
-          valuesAddedinThisFrame = new HashSet<String>();
-        stack.push(valuesAddedinThisFrame);
       }
 
       try {
+        /* The input stream has deposited us at this Node in our tree of 
+         * intresting nodes. Depending on how this node is of interest,
+         * process further tokens from the input stream and decide what
+         * we do next
+         */
         if (attributes != null) {
+          // we interested in storing attributes from the input stream
           for (Node node : attributes) {
             String value = parser.getAttributeValue(null, node.name);
             if (value != null || (recordStarted && !isRecord)) {
@@ -255,122 +267,133 @@
         }
 
         Set<Node> childrenFound = new HashSet<Node>();
-        // Internally we have to gobble CDATA | CHARACTERS | SPACE events as we
-        // store text, the gobbling continues till we have fetched some other 
-        // event. We use "isNextEventFetched" to indcate that the gobbling has
-        // already fetched the next event.
-        boolean isNextEventFetched = false;
         int event = -1;
+        int flattenedStarts=0; // our tag depth when flattening elements
+        StringBuilder text = new StringBuilder();
 
-        while (true) {
-          if (!isNextEventFetched) {
-            event = parser.next();
-            isNextEventFetched = false;
-          }
-          if (event == END_DOCUMENT) {
-            return;
-          }
+        while (true) {  
+          event = parser.next();
+   
           if (event == END_ELEMENT) {
-            if (isRecord)
-              handler.handle(getDeepCopy(values), forEachPath);
-            if (recordStarted && !isRecord
-                    && !childrenFound.containsAll(childNodes)) {
-              for (Node n : childNodes) {
-                if (!childrenFound.contains(n))
-                  n.putNulls(values);
+            if (flattenedStarts > 0) flattenedStarts--;
+            else {
+              if (text.length() > 0 && valuesAddedinThisFrame != null) {
+                valuesAddedinThisFrame.add(fieldName);
+                putText(values, text.toString(), fieldName, multiValued);
               }
-            }
-            return;
-          }
-          if ((event == CDATA || event == CHARACTERS || event == SPACE)
-                  && hasText) {
-            valuesAddedinThisFrame.add(fieldName);
-            // becuase we are fetching events here we need to ensure the outer
-            // loop does not end up doing an extra parser.next()
-            isNextEventFetched = true;
-            StringBuilder text = new StringBuilder(parser.getText());
-            event = parser.next();
-
-            while (true) {
-              if(event == CDATA || event == CHARACTERS || event == SPACE) {
-                text.append(parser.getText());
-              } else if(event == START_ELEMENT) {
-                if (flatten) {
-                  int starts = 1;
-                  while (true) {
-                    event = parser.next();
-                    if (event == CDATA || event == CHARACTERS || event == 
SPACE) {
-                      text.append(parser.getText());
-                    } else if (event == START_ELEMENT) {
-                      starts++;
-                    } else if (event == END_ELEMENT) {
-                      starts--;
-                      if (starts == 0) break;
-                    }
-                  }
-                } else {
-                  // We are not flatten-ing, so look to see if any of the child
-                  // elements are wanted, and recurse if any are found.
-                  handleStartElement(parser, childrenFound, handler, values, 
stack, recordStarted);
+              if (isRecord) handler.handle(getDeepCopy(values), forEachPath);
+              if (childNodes != null && recordStarted && !isRecord && 
!childrenFound.containsAll(childNodes)) {
+                // nonReccord nodes where we have not collected text for ALL
+                // the child nodes.
+                for (Node n : childNodes) {
+                  // For the multivalue child nodes where we could have, but
+                  // didnt, collect text. Push a null string into values.
+                  if (!childrenFound.contains(n)) n.putNulls(values);
                 }
-              } else {
-                break;
               }
-              event = parser.next();
+              return;
             }
-            // save the text we have read against the fieldName in the Map 
values
-            putText(values, text.toString(), fieldName, multiValued);
-          } else if (event == START_ELEMENT) {
-            handleStartElement(parser, childrenFound, handler, values, stack, 
recordStarted);
           }
-        }
-      } finally {
-        /*If a record has ended  (tag closed) then clearup all the fields found
-        in this record after this tag started */
-        Set<String> cleanThis = null;
-        if (isRecord || !recordStarted) {
-          cleanThis = stack.pop();
-        } else {
-          return;
-        }
-        if (cleanThis != null) {
-          for (String fld : cleanThis) {
-            values.remove(fld);
+          else if (hasText && (event==CDATA || event==CHARACTERS || 
event==SPACE)) {
+            text.append(parser.getText());
+          } 
+          else if (event == START_ELEMENT) {
+            if ( flatten ) 
+               flattenedStarts++;
+            else 
+               handleStartElement(parser, childrenFound, handler, values, 
stack, recordStarted);
+          }
+          // END_DOCUMENT is least likely to appear and should be 
+          // last in if-then-else skip chain
+          else if (event == END_DOCUMENT) return;
+          }
+        }finally {
+        if ((isRecord || !recordStarted) && !stack.empty()) {
+          Set<String> cleanThis = stack.pop();
+          if (cleanThis != null) {
+            for (String fld : cleanThis) values.remove(fld);
           }
         }
       }
     }
 
-    /**if a new tag is encountered, check if it is of interest of not (if 
there is a matching child Node).
-     * if yes continue parsing else skip
+    /**
+     * If a new tag is encountered, check if it is of interest or not by seeing
+     * if it matches against our node tree. If we have deperted from the node 
+     * tree then walk back though the tree's ancestor nodes checking to see if
+     * any // expressions exist for the node and compare them against the new
+     * tag. If matched then "jump" to that node, otherwise ignore the tag.
+     *
+     * Note, the list of // expressions found while walking back up the tree
+     * is chached in the HashMap decends. Then if the new tag is to be skipped,
+     * any inner chil tags are compared against the cache and jumped to if
+     * matched.
      */
     private void handleStartElement(XMLStreamReader parser, Set<Node> 
childrenFound,
                                     Handler handler, Map<String, Object> 
values,
                                     Stack<Set<String>> stack, boolean 
recordStarted)
             throws IOException, XMLStreamException {
-      Node n = getMatchingChild(parser);
+      Node n = getMatchingNode(parser,childNodes);
+      Map<String, Object> decends=new HashMap<String, Object>();
       if (n != null) {
         childrenFound.add(n);
         n.parse(parser, handler, values, stack, recordStarted);
+        return;
         }
-      else {
-        // skip ELEMENTS till source document is back within the tree
-        int count=1; // we have had our first START_ELEMENT
-        while ( count != 0 ) {
-          int token = parser.next();
-          if (token == START_ELEMENT) count++;
-          else if (token == END_ELEMENT)  count--;
+      // The stream has diverged from the tree of interesting elements, but
+      // are there any wildCardNodes ... anywhere in our path from the root?
+      Node dn = this; // checking our Node first!
+            
+      do {
+        if (dn.wildCardNodes != null) {
+          // Check to see if the streams tag matches one of the "//" all
+          // decendents type expressions for this node.
+          n = getMatchingNode(parser, dn.wildCardNodes);
+          if (n != null) {
+            childrenFound.add(n);
+            n.parse(parser, handler, values, stack, recordStarted);
+            break;
           }
+          // add the list of this nodes wild decendents to the cache
+          for (Node nn : dn.wildCardNodes) decends.put(nn.name, nn);
+        }
+        dn = dn.wildAncestor; // leap back along the tree toward root
+      } while (dn != null) ;
+ 
+      if (n == null) {
+        // we have a START_ELEMENT which is not within the tree of
+        // interesting nodes. Skip over the contents of this element
+        // but recursivly repeat the above for any START_ELEMENTs
+        // found within this element.
+        int count = 1; // we have had our first START_ELEMENT
+        while (count != 0) {
+          int token = parser.next();
+          if (token == START_ELEMENT) {
+            Node nn = (Node) decends.get(parser.getLocalName());
+            if (nn != null) {
+              // We have a //Node which matches the stream's parser.localName
+              childrenFound.add(nn);
+              // Parse the contents of this stream element
+              nn.parse(parser, handler, values, stack, recordStarted);
+            } 
+            else count++;
+          } 
+          else if (token == END_ELEMENT) count--;
+        }
       }
     }
 
-    /**check if the current tag is to be parsed or not. if yes return the Node 
object
+
+    /**
+     * Check if the current tag is to be parsed or not. We step through the
+     * supplied List "searchList" looking for a match. If matched, return the
+     * Node object.
      */
-    private Node getMatchingChild(XMLStreamReader parser) {
-      if (childNodes == null)
+    private Node getMatchingNode(XMLStreamReader parser,List<Node> searchL){
+      if (searchL == null)
         return null;
       String localName = parser.getLocalName();
-      for (Node n : childNodes) {
+      for (Node n : searchL) {
         if (n.name.equals(localName)) {
           if (n.attribAndValues == null)
             return n;
@@ -389,14 +412,14 @@
           return false;
         if (e.getValue() != null && !e.getValue().equals(val))
           return false;
-
       }
       return true;
     }
 
     /**
      * A recursive routine that walks the Node tree from a supplied start
-     * pushing a null string onto every multiValued fieldName's List of values.
+     * pushing a null string onto every multiValued fieldName's List of values
+     * where a value has not been provided from the stream.
      */
     private void putNulls(Map<String, Object> values) {
       if (attributes != null) {
@@ -414,10 +437,10 @@
     }
 
     /**
-     * Add the field name and text into the values Map. If it is a non 
multivalued field, then the text
-     * is simply placed in the object portion of the Map. If it is a
-     * multivalued field then the text is pushed onto a List which is
-     * the object portion of the Map.
+     * Add the field name and text into the values Map. If it is a non
+     * multivalued field, then the text is simply placed in the object
+     * portion of the Map. If it is a multivalued field then the text is
+     * pushed onto a List which is the object portion of the Map.
      */
     @SuppressWarnings("unchecked")
     private void putText(Map<String, Object> values, String value,
@@ -436,13 +459,24 @@
 
 
     /**
+     * Walk the Node tree propagating any wildDescentant information to
+     * child nodes. This allows us to optimise the performance of the
+     * main parse method.
+     */
+    private void buildOptimise(Node wa) {
+     wildAncestor=wa;
+     if ( wildCardNodes != null ) wa = this;
+     if ( childNodes != null )
+       for ( Node n : childNodes ) n.buildOptimise(wa);
+     }
+
+    /**
      * Build a Node tree structure representing all Xpaths of intrest to us.
      * This must be done before parsing of the XML stream starts. Each node 
      * holds one portion of an Xpath. Taking each Xpath segment in turn this
      * method walks the Node tree  and finds where the new segment should be
      * inserted. It creates a Node representing a field's name, XPATH and 
      * some flags and inserts the Node into the Node tree.
-     *
      */
     private void build(
         List<String> paths,   // a List of segments from the split xpaths
@@ -452,27 +486,46 @@
         int flags             // are we to flatten matching xpaths
         ) {
       // recursivly walk the paths Lists adding new Nodes as required
-      String name = paths.remove(0); // shift out next Xpath segment
-      if (paths.isEmpty() && name.startsWith("@")) {
+      String xpseg = paths.remove(0); // shift out next Xpath segment
+
+      if (paths.isEmpty() && xpseg.startsWith("@")) {
         // we have reached end of element portion of Xpath and can now only
         // have an element attribute. Add it to this nodes list of attributes
         if (attributes == null) {
           attributes = new ArrayList<Node>();
         }
-        name = name.substring(1); // strip the '@'
-        attributes.add(new Node(name, fieldName, multiValued));
-
-      } else {
+        xpseg = xpseg.substring(1); // strip the '@'
+        attributes.add(new Node(xpseg, fieldName, multiValued));
+      }
+      else if ( xpseg.length() == 0) {
+        // we have a '//' selector for all decendents of the current nodes
+        xpseg = paths.remove(0); // shift out next Xpath segment
+        if (wildCardNodes == null) wildCardNodes = new ArrayList<Node>();
+        Node n = getOrAddNode(xpseg, wildCardNodes);
+        if (paths.isEmpty()) {
+          // We are current a leaf node.
+          // xpath with content we want to store and return
+          n.hasText = true;        // we have to store text found here
+          n.fieldName = fieldName; // name to store collected text against
+          n.multiValued = multiValued; // true: text be stored in a List
+          n.flatten = flags == FLATTEN; // true: store text from child tags
+        }
+        else {
+          // recurse to handle next paths segment
+          n.build(paths, fieldName, multiValued, record, flags);
+        }
+      }
+      else {
         if (childNodes == null)
           childNodes = new ArrayList<Node>();
         // does this "name" already exist as a child node.
-        Node n = getOrAddChildNode(name);
+        Node n = getOrAddNode(xpseg,childNodes);
         if (paths.isEmpty()) {
-          // We have reached the end of paths. When parsing the actual
-          // input we have traversed to a position where we actutally have to
-          // do something. getOrAddChildNode() will have created and returned
-          // a new minimal Node with name and xpathName already populated. We
-          // need to add more information
+          // We have emptied paths, we are for the moment a leaf of the tree.
+          // When parsing the actual input we have traversed to a position 
+          // where we actutally have to do something. getOrAddNode() will
+          // have created and returned a new minimal Node with name and
+          // xpathName already populated. We need to add more information.
           if (record) {
             // forEach attribute
             n.isRecord = true; // flag: forEach attribute, prepare to emit rec
@@ -491,10 +544,9 @@
       }
     }
 
-    private Node getOrAddChildNode(String xpathName) {
-      for (Node n : childNodes)
-        if (n.xpathName.equals(xpathName))
-          return n;
+    private Node getOrAddNode(String xpathName, List<Node> searchList ) {
+      for (Node n : searchList)
+        if (n.xpathName.equals(xpathName)) return n;
       // new territory! add a new node for this Xpath bitty
       Node n = new Node(xpathName, this); // a minimal Node initalization
       Matcher m = ATTRIB_PRESENT_WITHVAL.matcher(xpathName);
@@ -510,49 +562,54 @@
           if (n.attribAndValues == null)
             n.attribAndValues = new ArrayList<Map.Entry<String, String>>();
           n.attribAndValues.addAll(attribs.entrySet());
-
         }
       }
-      childNodes.add(n);
+      searchList.add(n);
       return n;
     }
-  } // end of class Node
-
 
-  /**
-   * Copies a supplied Map to a new Map which is returned. Used to copy a 
-   * records values. If a fields value is a List then they have to be 
-   * deep-copied for thread safety
-   */
-  private Map<String, Object> getDeepCopy(Map<String, Object> values) {
-    Map<String, Object> result = new HashMap<String, Object>();
-    for (Map.Entry<String, Object> entry : values.entrySet()) {
-      if (entry.getValue() instanceof List) {
-        result.put(entry.getKey(),new ArrayList((List) entry.getValue()));
-      } else {
-        result.put(entry.getKey(),entry.getValue());
+    /**
+     * Copies a supplied Map to a new Map which is returned. Used to copy a
+     * records values. If a fields value is a List then they have to be
+     * deep-copied for thread safety
+     */
+    private static Map<String, Object> getDeepCopy(Map<String, Object> values) 
{
+      Map<String, Object> result = new HashMap<String, Object>();
+      for (Map.Entry<String, Object> entry : values.entrySet()) {
+        if (entry.getValue() instanceof List) {
+          result.put(entry.getKey(), new ArrayList((List) entry.getValue()));
+        } else {
+          result.put(entry.getKey(), entry.getValue());
+        }
       }
+      return result;
     }
-    return result;
-  }
+  } // end of class Node
+
 
   /**
-   * The Xpath is split into segments using the '/' s a seperator. However
+   * The Xpath is split into segments using the '/' as a seperator. However
    * this method deals with special cases where there is a slash '/' character
-   * inside the attribute value e.g. x/@html='text/html'. We need to split
-   * by '/' excluding the '/' which is a part of the attribute's value.
+   * inside the attribute value e.g. x/@html='text/html'. We split by '/' but 
+   * then reassemble things were the '/' appears within a quoted sub-string.
+   *
+   * We have already enforced that the string must begin with a seperator. This
+   * method depends heavily on how split behaves if the string starts with the
+   * seperator or if a sequence of multiple seperator's appear. 
    */
   private static List<String> splitEscapeQuote(String str) {
     List<String> result = new LinkedList<String>();
     String[] ss = str.split("/");
-    for (int i = 0; i < ss.length; i++) {
-      if (ss[i].length() == 0 && result.size() == 0) continue;
+    for (int i=0; i<ss.length; i++) { // i=1: skip seperator at start of string
       StringBuilder sb = new StringBuilder();
       int quoteCount = 0;
       while (true) {
         sb.append(ss[i]);
-        for (int j = 0; j < ss[i].length(); j++) if (ss[i].charAt(j) == '\'') 
quoteCount++;
+        for (int j=0; j<ss[i].length(); j++)
+            if (ss[i].charAt(j) == '\'') quoteCount++;
+        // have we got a split inside quoted sub-string?
         if ((quoteCount % 2) == 0) break;
+        // yes!; replace the '/' and loop to concat next token
         i++;
         sb.append("/");
       }
@@ -576,7 +633,8 @@
      * the addField() methods. The value can be a single String (for single 
      * valued fields) or a List<String> (for multiValued).
      * @param xpath The forEach XPATH for which this record is being emitted
-     * If there is any change all parsing will be aborted and the Exception is 
propogated up
+     * If there is any change all parsing will be aborted and the Exception
+     * is propogated up
      */
     public void handle(Map<String, Object> record, String xpath);
   }

Modified: 
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java?rev=822154&r1=822153&r2=822154&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java
 (original)
+++ 
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java
 Tue Oct  6 07:42:28 2009
@@ -33,9 +33,13 @@
 public class TestXPathRecordReader {
   @Test
   public void basic() {
-    String xml = "<root>\n" + "   <b>\n" + "     <c>Hello C1</c>\n"
-            + "     <c>Hello C1</c>\n" + "   </b>\n" + "   <b>\n"
-            + "     <c>Hello C2</c>\n" + "   </b>\n" + "</root>";
+    String xml="<root>\n"
+             + "   <b><c>Hello C1</c>\n"
+             + "      <c>Hello C1</c>\n"
+             + "      </b>\n"
+             + "   <b><c>Hello C2</c>\n"
+             + "     </b>\n"
+             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/b");
     rr.addField("c", "/root/b/c", true);
     List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
@@ -46,8 +50,10 @@
 
   @Test
   public void attributes() {
-    String xml = "<root>\n" + "   <b a=\"x0\" b=\"y0\" />\n"
-            + "   <b a=\"x1\" b=\"y1\" />\n" + "   <b a=\"x2\" b=\"y2\" />\n"
+    String xml="<root>\n"
+             + "   <b a=\"x0\" b=\"y0\" />\n"
+             + "   <b a=\"x1\" b=\"y1\" />\n"
+             + "   <b a=\"x2\" b=\"y2\" />\n"
             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/b");
     rr.addField("a", "/root/b/@a", false);
@@ -55,22 +61,26 @@
     List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
     Assert.assertEquals(3, l.size());
     Assert.assertEquals("x0", l.get(0).get("a"));
+    Assert.assertEquals("x1", l.get(1).get("a"));
+    Assert.assertEquals("x2", l.get(2).get("a"));
+    Assert.assertEquals("y0", l.get(0).get("b"));
     Assert.assertEquals("y1", l.get(1).get("b"));
+    Assert.assertEquals("y2", l.get(2).get("b"));
   }
   
   @Test
   public void attrInRoot(){
-    String xml = "<r>\n" +
+    String xml="<r>\n" +
             "<merchantProduct id=\"814636051\" mid=\"189973\">\n" +
             "                   <in_stock type=\"stock-4\" />\n" +
             "                   <condition type=\"cond-0\" />\n" +
             "                   <price>301.46</price>\n" +
-            "</merchantProduct>\n" +
+               "   </merchantProduct>\n" +
             "<merchantProduct id=\"814636052\" mid=\"189974\">\n" +
             "                   <in_stock type=\"stock-5\" />\n" +
             "                   <condition type=\"cond-1\" />\n" +
             "                   <price>302.46</price>\n" +
-            "</merchantProduct>\n" +
+               "   </merchantProduct>\n" +
             "\n" +
             "</r>";
      XPathRecordReader rr = new XPathRecordReader("/r/merchantProduct");
@@ -94,9 +104,12 @@
 
   @Test
   public void attributes2Level() {
-    String xml = "<root>\n" + "<a>\n" + "   <b a=\"x0\" b=\"y0\" />\n"
-            + "   <b a=\"x1\" b=\"y1\" />\n" + "   <b a=\"x2\" b=\"y2\" />\n"
-            + "</a>" + "</root>";
+    String xml="<root>\n"
+             + "<a>\n  <b a=\"x0\" b=\"y0\" />\n"
+             + "       <b a=\"x1\" b=\"y1\" />\n"
+             + "       <b a=\"x2\" b=\"y2\" />\n"
+             + "       </a>"
+             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/a/b");
     rr.addField("a", "/root/a/b/@a", false);
     rr.addField("b", "/root/a/b/@b", false);
@@ -108,11 +121,16 @@
 
   @Test
   public void attributes2LevelHetero() {
-    String xml = "<root>\n" + "<a>\n" + "   <b a=\"x0\" b=\"y0\" />\n"
-            + "   <b a=\"x1\" b=\"y1\" />\n" + "   <b a=\"x2\" b=\"y2\" />\n"
-            + "</a>" + "<x>\n" + "   <b a=\"x4\" b=\"y4\" />\n"
-            + "   <b a=\"x5\" b=\"y5\" />\n" + "   <b a=\"x6\" b=\"y6\" />\n"
-            + "</x>" + "</root>";
+    String xml="<root>\n"
+             + "<a>\n   <b a=\"x0\" b=\"y0\" />\n"
+             + "        <b a=\"x1\" b=\"y1\" />\n"
+             + "        <b a=\"x2\" b=\"y2\" />\n"
+             + "        </a>"
+             + "<x>\n   <b a=\"x4\" b=\"y4\" />\n"
+             + "        <b a=\"x5\" b=\"y5\" />\n"
+             + "        <b a=\"x6\" b=\"y6\" />\n"
+             + "        </x>"
+             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/a | /root/x");
     rr.addField("a", "/root/a/b/@a", false);
     rr.addField("b", "/root/a/b/@b", false);
@@ -123,12 +141,9 @@
     final List<Map<String, Object>> x = new ArrayList<Map<String, Object>>();
     rr.streamRecords(new StringReader(xml), new XPathRecordReader.Handler() {
       public void handle(Map<String, Object> record, String xpath) {
-        if (record == null)
-          return;
-        if (xpath.equals("/root/a"))
-          a.add(record);
-        if (xpath.equals("/root/x"))
-          x.add(record);
+        if (record == null) return;
+        if (xpath.equals("/root/a")) a.add(record);
+        if (xpath.equals("/root/x")) x.add(record);
       }
     });
 
@@ -138,9 +153,14 @@
 
   @Test
   public void attributes2LevelMissingAttrVal() {
-    String xml = "<root>\n" + "<a>\n" + "   <b a=\"x0\" b=\"y0\" />\n"
-            + "   <b a=\"x1\" b=\"y1\" />\n" + "</a>" + "<a>\n"
-            + "   <b a=\"x3\"  />\n" + "   <b b=\"y4\" />\n" + "</a>" + 
"</root>";
+    String xml="<root>\n"
+             + "<a>\n  <b a=\"x0\" b=\"y0\" />\n"
+             + "       <b a=\"x1\" b=\"y1\" />\n"
+             + "       </a>"
+             + "<a>\n  <b a=\"x3\"  />\n"
+             + "       <b b=\"y4\" />\n"
+             + "       </a>"
+             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/a");
     rr.addField("a", "/root/a/b/@a", true);
     rr.addField("b", "/root/a/b/@b", true);
@@ -152,12 +172,20 @@
 
   @Test
   public void elems2LevelMissing() {
-    String xml = "<root>\n" + "\t<a>\n" + "\t   <b>\n" + "\t      <x>x0</x>\n"
-            + "\t      <y>y0</y>\n" + "\t   </b>\n" + "\t   <b>\n"
-            + "\t   \t<x>x1</x>\n" + "\t   \t<y>y1</y>\n" + "\t   </b>\n"
-            + "\t</a>\n" + "\t<a>\n" + "\t   <b>\n" + "\t      <x>x3</x>\n"
-            + "\t   </b>\n" + "\t   <b>\n" + "\t   \t<y>y4</y>\n" + "\t   
</b>\n"
-            + "\t</a>\n" + "</root>";
+    String xml="<root>\n"
+             + "\t<a>\n"
+             + "\t   <b>\n\t  <x>x0</x>\n"
+             + "\t            <y>y0</y>\n"
+             + "\t            </b>\n"
+             + "\t   <b>\n\t  <x>x1</x>\n"
+             + "\t            <y>y1</y>\n"
+             + "\t            </b>\n"
+             + "\t   </a>\n"
+             + "\t<a>\n"
+             + "\t   <b>\n\t  <x>x3</x>\n\t   </b>\n"
+             + "\t   <b>\n\t  <y>y4</y>\n\t   </b>\n"
+             + "\t   </a>\n"
+             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/a");
     rr.addField("a", "/root/a/b/x", true);
     rr.addField("b", "/root/a/b/y", true);
@@ -207,12 +235,23 @@
 
   @Test
   public void elems2LevelWithAttrib() {
-    String xml = "<root>\n" + "\t<a>\n" + "\t   <b k=\"x\">\n"
-            + "\t      <x>x0</x>\n" + "\t      <y>y0</y>\n" + "\t   </b>\n"
-            + "\t   <b k=\"y\">\n" + "\t   \t<x>x1</x>\n" + "\t   
\t<y>y1</y>\n"
-            + "\t   </b>\n" + "\t</a>\n" + "\t<a>\n" + "\t   <b>\n"
-            + "\t      <x>x3</x>\n" + "\t   </b>\n" + "\t   <b>\n"
-            + "\t   \t<y>y4</y>\n" + "\t   </b>\n" + "\t</a>\n" + "</root>";
+    String xml = "<root>\n\t<a>\n\t   <b k=\"x\">\n"
+            + "\t                        <x>x0</x>\n"
+            + "\t                        <y>y0</y>\n"
+            + "\t                        </b>\n"
+            + "\t                     <b k=\"y\">\n"
+            + "\t                        <x>x1</x>\n"
+            + "\t                        <y>y1</y>\n"
+            + "\t                        </b>\n"
+            + "\t                </a>\n"
+            + "\t           <a>\n\t   <b>\n"
+            + "\t                        <x>x3</x>\n"
+            + "\t                        </b>\n"
+            + "\t                     <b>\n"
+            + "\t                     <y>y4</y>\n"
+            + "\t                        </b>\n"
+            + "\t               </a>\n"
+            + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/a");
     rr.addField("x", "/root/a/b...@k]/x", true);
     rr.addField("y", "/root/a/b...@k]/y", true);
@@ -225,13 +264,24 @@
 
   @Test
   public void elems2LevelWithAttribMultiple() {
-    String xml = "<root>\n" + "\t<a>\n" + "\t   <b k=\"x\" m=\"n\" >\n"
-            + "\t      <x>x0</x>\n" + "\t      <y>y0</y>\n" + "\t   </b>\n"
-            + "\t   <b k=\"y\" m=\"p\">\n" + "\t   \t<x>x1</x>\n"
-            + "\t   \t<y>y1</y>\n" + "\t   </b>\n" + "\t</a>\n" + "\t<a>\n"
-            + "\t   <b k=\"x\">\n" + "\t      <x>x3</x>\n" + "\t   </b>\n"
-            + "\t   <b m=\"n\">\n" + "\t   \t<y>y4</y>\n" + "\t   </b>\n"
-            + "\t</a>\n" + "</root>";
+    String xml="<root>\n"
+             + "\t<a>\n\t   <b k=\"x\" m=\"n\" >\n"
+             + "\t             <x>x0</x>\n"
+             + "\t             <y>y0</y>\n"
+             + "\t             </b>\n"
+             + "\t          <b k=\"y\" m=\"p\">\n"
+             + "\t             <x>x1</x>\n"
+             + "\t             <y>y1</y>\n"
+             + "\t             </b>\n"
+             + "\t   </a>\n"
+             + "\t<a>\n\t   <b k=\"x\">\n"
+             + "\t             <x>x3</x>\n"
+             + "\t             </b>\n"
+             + "\t          <b m=\"n\">\n"
+             + "\t             <y>y4</y>\n"
+             + "\t             </b>\n"
+             + "\t   </a>\n"
+             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/a");
     rr.addField("x", "/root/a/b...@k][@m='n']/x", true);
     rr.addField("y", "/root/a/b...@k][@m='n']/y", true);
@@ -244,12 +294,18 @@
 
   @Test
   public void elems2LevelWithAttribVal() {
-    String xml = "<root>\n" + "\t<a>\n" + "\t   <b k=\"x\">\n"
-            + "\t      <x>x0</x>\n" + "\t      <y>y0</y>\n" + "\t   </b>\n"
-            + "\t   <b k=\"y\">\n" + "\t   \t<x>x1</x>\n" + "\t   
\t<y>y1</y>\n"
-            + "\t   </b>\n" + "\t</a>\n" + "\t<a>\n" + "\t   <b>\n"
-            + "\t      <x>x3</x>\n" + "\t   </b>\n" + "\t   <b>\n"
-            + "\t   \t<y>y4</y>\n" + "\t   </b>\n" + "\t</a>\n" + "</root>";
+    String xml="<root>\n\t<a>\n   <b k=\"x\">\n"
+             + "\t                  <x>x0</x>\n"
+             + "\t                  <y>y0</y>\n"
+             + "\t                  </b>\n"
+             + "\t                <b k=\"y\">\n"
+             + "\t                  <x>x1</x>\n"
+             + "\t                  <y>y1</y>\n"
+             + "\t                  </b>\n"
+             + "\t                </a>\n"
+             + "\t        <a>\n   <b><x>x3</x></b>\n"
+             + "\t                <b><y>y4</y></b>\n"
+             + "\t</a>\n" + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/a");
     rr.addField("x", "/root/a/b...@k='x']/x", true);
     rr.addField("y", "/root/a/b...@k='x']/y", true);
@@ -274,31 +330,148 @@
   }
 
   @Test
+  public void  unsupported_Xpaths() {
+    String xml = "<root><b><a x=\"a/b\" h=\"hello-A\"/>  </b></root>";
+    XPathRecordReader rr=null;
+    try {
+      rr = new XPathRecordReader("//b");
+      Assert.fail("A RuntimeException was expected: //b forEach cannot begin 
with '//'.");
+      }
+    catch (RuntimeException ex) {  }
+     try {
+      rr.addField("bold"  ,"b",        false);
+      Assert.fail("A RuntimeException was expected: 'b' xpaths must begin with 
'/'.");
+      }
+    catch (RuntimeException ex) {  }
+
+  }
+
+  @Test
+  public void any_decendent_from_root() {
+    XPathRecordReader rr = new XPathRecordReader("/anyd/contenido");
+    rr.addField("descdend", "//boo",                   true);
+    rr.addField("inr_descd","//boo/i",                false);
+    rr.addField("cont",     "/anyd/contenido",        false);
+    rr.addField("id",       "/anyd/contenido/@id",    false);
+    rr.addField("status",   "/anyd/status",           false);
+    rr.addField("title",    "/anyd/contenido/titulo", 
false,XPathRecordReader.FLATTEN);
+    rr.addField("resume",   "/anyd/contenido/resumen",false);
+    rr.addField("text",     "/anyd/contenido/texto",  false);
+
+    String xml="<anyd>\n"
+             + "  this <boo>top level</boo> is ignored because it is external 
to the forEach\n"
+             + "  <status>as is <boo>this element</boo></status>\n"
+             + "  <contenido id=\"10097\" idioma=\"cat\">\n"
+             + "    This one is <boo>not ignored as its</boo> inside a 
forEach\n"
+             + "    <antetitulo><i> big <boo>antler</boo></i></antetitulo>\n"
+             + "    <titulo>  My <i>flattened <boo>title</boo></i> </titulo>\n"
+             + "    <resumen> My summary <i>skip this!</i>  </resumen>\n"
+             + "    <texto>   <boo>Within the body of</boo>My text</texto>\n"
+             + "    <p>Access <boo>inner <i>sub clauses</i> as 
well</boo></p>\n"
+             + "    </contenido>\n"
+             + "</anyd>";
+
+    List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
+    Assert.assertEquals(1, l.size());
+    Map<String, Object> m = l.get(0);
+    Assert.assertEquals("This one is  inside a forEach", 
m.get("cont").toString().trim());
+    Assert.assertEquals("10097"             ,m.get("id"));
+    Assert.assertEquals("My flattened title",m.get("title").toString().trim());
+    Assert.assertEquals("My summary"        
,m.get("resume").toString().trim());
+    Assert.assertEquals("My text"           ,m.get("text").toString().trim());
+    Assert.assertEquals("not ignored as its",(String) ((List) 
m.get("descdend")).get(0) );
+    Assert.assertEquals("antler"            ,(String) ((List) 
m.get("descdend")).get(1) );
+    Assert.assertEquals("Within the body of",(String) ((List) 
m.get("descdend")).get(2) );
+    Assert.assertEquals("inner  as well"    ,(String) ((List) 
m.get("descdend")).get(3) );
+    Assert.assertEquals("sub clauses"       
,m.get("inr_descd").toString().trim());
+  }
+
+  @Test
+  public void any_decendent_of_a_child1() {
+    XPathRecordReader rr = new XPathRecordReader("/anycd");
+    rr.addField("descdend", "/anycd//boo",         true);
+
+    // same test string as above but checking to see if *all* //boo's are 
collected
+    String xml="<anycd>\n"
+             + "  this <boo>top level</boo> is ignored because it is external 
to the forEach\n"
+             + "  <status>as is <boo>this element</boo></status>\n"
+             + "  <contenido id=\"10097\" idioma=\"cat\">\n"
+             + "    This one is <boo>not ignored as its</boo> inside a 
forEach\n"
+             + "    <antetitulo><i> big <boo>antler</boo></i></antetitulo>\n"
+             + "    <titulo>  My <i>flattened <boo>title</boo></i> </titulo>\n"
+             + "    <resumen> My summary <i>skip this!</i>  </resumen>\n"
+             + "    <texto>   <boo>Within the body of</boo>My text</texto>\n"
+             + "    <p>Access <boo>inner <i>sub clauses</i> as 
well</boo></p>\n"
+             + "    </contenido>\n"
+             + "</anycd>";
+
+    List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
+    Assert.assertEquals(1, l.size());
+    Map<String, Object> m = l.get(0);
+    Assert.assertEquals("top level"         ,(String) ((List) 
m.get("descdend")).get(0) );
+    Assert.assertEquals("this element"      ,(String) ((List) 
m.get("descdend")).get(1) );
+    Assert.assertEquals("not ignored as its",(String) ((List) 
m.get("descdend")).get(2) );
+    Assert.assertEquals("antler"            ,(String) ((List) 
m.get("descdend")).get(3) );
+    Assert.assertEquals("title"             ,(String) ((List) 
m.get("descdend")).get(4) );
+    Assert.assertEquals("Within the body of",(String) ((List) 
m.get("descdend")).get(5) );
+    Assert.assertEquals("inner  as well"    ,(String) ((List) 
m.get("descdend")).get(6) );
+  }
+
+  @Test
+  public void any_decendent_of_a_child2() {
+    XPathRecordReader rr = new XPathRecordReader("/anycd");
+    rr.addField("descdend", "/anycd/contenido//boo",         true);
+
+    // same test string as above but checking to see if *some* //boo's are 
collected
+    String xml="<anycd>\n"
+             + "  this <boo>top level</boo> is ignored because it is external 
to the forEach\n"
+             + "  <status>as is <boo>this element</boo></status>\n"
+             + "  <contenido id=\"10097\" idioma=\"cat\">\n"
+             + "    This one is <boo>not ignored as its</boo> inside a 
forEach\n"
+             + "    <antetitulo><i> big <boo>antler</boo></i></antetitulo>\n"
+             + "    <titulo>  My <i>flattened <boo>title</boo></i> </titulo>\n"
+             + "    <resumen> My summary <i>skip this!</i>  </resumen>\n"
+             + "    <texto>   <boo>Within the body of</boo>My text</texto>\n"
+             + "    <p>Access <boo>inner <i>sub clauses</i> as 
well</boo></p>\n"
+             + "    </contenido>\n"
+             + "</anycd>";
+
+    List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
+    Assert.assertEquals(1, l.size());
+    Map<String, Object> m = l.get(0);
+    Assert.assertEquals("not ignored as its",((List) m.get("descdend")).get(0) 
);
+    Assert.assertEquals("antler"            ,((List) m.get("descdend")).get(1) 
);
+    Assert.assertEquals("title"             ,((List) m.get("descdend")).get(2) 
);
+    Assert.assertEquals("Within the body of",((List) m.get("descdend")).get(3) 
);
+    Assert.assertEquals("inner  as well"    ,((List) m.get("descdend")).get(4) 
);
+  }
+  
+  @Test
   public void another() {
-    String xml = "<root>\n"
+    String xml="<root>\n"
             + "       <contenido id=\"10097\" idioma=\"cat\">\n"
-            + "       <antetitulo></antetitulo>\n" + "       <titulo>\n"
-            + "               This is my title\n" + "       </titulo>\n"
-            + "       <resumen>\n" + "               This is my summary\n"
-            + "       </resumen>\n" + "       <texto>\n"
-            + "               This is the body of my text\n" + "       
</texto>\n"
-            + "       </contenido>\n" + "</root>";
+             + "    <antetitulo></antetitulo>\n"
+             + "    <titulo>    This is my title             </titulo>\n"
+             + "    <resumen>   This is my summary           </resumen>\n"
+             + "    <texto>     This is the body of my text  </texto>\n"
+             + "    </contenido>\n"
+             + "</root>";
     XPathRecordReader rr = new XPathRecordReader("/root/contenido");
     rr.addField("id", "/root/contenido/@id", false);
     rr.addField("title", "/root/contenido/titulo", false);
-    rr.addField("resume", "/root/contenido/resumen", false);
+    rr.addField("resume","/root/contenido/resumen",false);
     rr.addField("text", "/root/contenido/texto", false);
 
     List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
     Assert.assertEquals(1, l.size());
     Map<String, Object> m = l.get(0);
-    Assert.assertEquals("10097", m.get("id").toString().trim());
+    Assert.assertEquals("10097", m.get("id"));
     Assert.assertEquals("This is my title", m.get("title").toString().trim());
-    Assert
-            .assertEquals("This is my summary", 
m.get("resume").toString().trim());
+    Assert.assertEquals("This is my summary", 
m.get("resume").toString().trim());
     Assert.assertEquals("This is the body of my text", m.get("text").toString()
             .trim());
   }
+
   @Test
   public void sameForEachAndXpath(){
     String xml="<root>\n" +
@@ -367,4 +540,31 @@
     Assert.assertEquals("B.2.2",b.get(1));
     Assert.assertEquals("C.2.2",c.get(1));
   }
+
+
+ @Test
+  public void testError(){
+    String malformedXml = "<root>\n" +
+          "    <node>\n" +
+          "        <id>1</id>\n" +
+          "        <desc>test1</desc>\n" +
+          "    </node>\n" +
+          "    <node>\n" +
+          "        <id>2</id>\n" +
+          "        <desc>test2</desc>\n" +
+          "    </node>\n" +
+          "    <node>\n" +
+          "        <id/>3</id>\n" +   // invalid XML
+          "        <desc>test3</desc>\n" +
+          "    </node>\n" +
+          "</root>";
+    XPathRecordReader rr = new XPathRecordReader("/root/node");
+    rr.addField("id", "/root/node/id", true);
+    rr.addField("desc", "/root/node/desc", true);
+    try {
+      rr.getAllRecords(new StringReader(malformedXml));
+      Assert.fail("A RuntimeException was expected: the input XML is 
invalid.");
+      }
+    catch (Exception e) { }
+ }
 }

svn commit: r822154 - in /lucene/solr/trunk/contrib/dataimporthandler: CHANGES.txt src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java

Reply via email to