otis        2002/06/21 08:02:51

  Added:       contributions/XML-Indexing-Demo IndexingRequest.xml
               
contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo
                        IndexFiles.java SearchFiles.java
                        XMLDocumentHandlerDOM.java
                        XMLDocumentHandlerSAX.java
  Log:
  - Unpacked the .zip file, for easier access to the source code.
  
  Revision  Changes    Path
  1.1                  
jakarta-lucene-sandbox/contributions/XML-Indexing-Demo/IndexingRequest.xml
  
  Index: IndexingRequest.xml
  ===================================================================
  <customerInfo>
      <name><![CDATA[Aruna A. Raghavan]]></name>
      <profession><![CDATA[Software Developer]]></profession>
      <addressLine1><![CDATA[6801 West 106th Street]]></addressLine1>
      <addressLine2><![CDATA[#205]]></addressLine2>
      <city><![CDATA[Eagan]]></city>
      <state><![CDATA[MN]]></state>
      <zip><![CDATA[55121]]></zip>
      <country><![CDATA[USA]]></country>
  </customerInfo>
  
  
  
  1.1                  
jakarta-lucene-sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/IndexFiles.java
  
  Index: IndexFiles.java
  ===================================================================
  package org.apache.lucenesandbox.xmlindexingdemo;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.index.IndexWriter;
  
  import java.io.File;
  import java.util.Date;
  
  class IndexFiles
  {
      public static void main(String[] args)
        throws Exception
      {
        try
        {
            Date start = new Date();
  
            IndexWriter writer = new IndexWriter("index", new StandardAnalyzer(), 
true);
            indexDocs(writer, new File(args[0]));
  
            writer.optimize();
            writer.close();
  
            Date end = new Date();
  
            System.out.print(end.getTime() - start.getTime());
            System.out.println(" total milliseconds");
  
        }
        catch (Exception e)
        {
            System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
            throw e;
        }
      }
  
      public static void indexDocs(IndexWriter writer, File file)
        throws Exception
      {
        if (file.isDirectory())
        {
            String[] files = file.list();
            for (int i = 0; i < files.length; i++)
                indexDocs(writer, new File(file, files[i]));
        }
        else
        {
            System.out.println("adding " + file);
            XMLDocumentHandlerSAX hdlr = new XMLDocumentHandlerSAX(file);
            writer.addDocument(hdlr.getDocument());
            // For DOM, use
            // XMLDocumentHandlerDOM hdlr = new XMLDocumentHandlerDOM();
            // writer.addDocument(hdlr.createXMLDocument(file));
        }
      }
  }
  
  
  
  1.1                  
jakarta-lucene-sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/SearchFiles.java
  
  Index: SearchFiles.java
  ===================================================================
  package org.apache.lucenesandbox.xmlindexingdemo;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.io.IOException;
  import java.io.BufferedReader;
  import java.io.InputStreamReader;
  
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.search.Searcher;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.Query;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.queryParser.QueryParser;
  
  class SearchFiles {
    public static void main(String[] args) {
      try {
        Searcher searcher = new IndexSearcher("index");
        Analyzer analyzer = new StandardAnalyzer();
  
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        while (true) {
        System.out.print("Query: ");
        String line = in.readLine();
  
        if (line.length() == -1)
          break;
  
        Query query = QueryParser.parse(line, "name", analyzer);
        System.out.println("Searching for: " + query.toString("name"));
  
        Hits hits = searcher.search(query);
        System.out.println(hits.length() + " total matching documents");
  
        final int HITS_PER_PAGE = 10;
        for (int start = 0; start < hits.length(); start += HITS_PER_PAGE)
          {
          int end = Math.min(hits.length(), start + HITS_PER_PAGE);
          for (int i = start; i < end; i++)
            {
            Document doc = hits.doc(i);
            String name = doc.get("name");
            System.out.println(name);
              System.out.println(doc.get("profession"));
              System.out.println(doc.get("addressLine1"));
              System.out.println(doc.get("addressLine2"));
              System.out.print(doc.get("city"));
              System.out.print(" ");
              System.out.print(doc.get("state"));
              System.out.print(" ");
              System.out.print(doc.get("zip"));
              System.out.println(doc.get("country"));
  
          }
  
          if (hits.length() > end) {
            System.out.print("more (y/n) ? ");
            line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n')
              break;
          }
        }
        }
        searcher.close();
  
      } catch (Exception e) {
        System.out.println(" caught a " + e.getClass() +
                         "\n with message: " + e.getMessage());
      }
    }
  }
  
  
  
  1.1                  
jakarta-lucene-sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerDOM.java
  
  Index: XMLDocumentHandlerDOM.java
  ===================================================================
  package org.apache.lucenesandbox.xmlindexingdemo;
  
  import org.w3c.dom.*;
  import org.w3c.dom.Node;
  import javax.xml.parsers.*;
  import org.apache.lucene.document.Field;
  
  import java.io.File;
  
  /**
   *
   */
  public class XMLDocumentHandlerDOM
  {
      public org.apache.lucene.document.Document createXMLDocument(File f)
      {
        org.apache.lucene.document.Document document = new 
org.apache.lucene.document.Document();
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        try
          {
            DocumentBuilder df = dbf.newDocumentBuilder();
            org.w3c.dom.Document d = df.parse(f);
            Node root = d.getDocumentElement();
            traverseTree(root, document);
        }
        catch (Exception e)
          {
            System.out.println("error: " + e);
            e.printStackTrace();
        }
        return document;
      }
  
      static private void traverseTree(Node node, org.apache.lucene.document.Document 
document)
      {
        NodeList nl = node.getChildNodes();
        if (nl.getLength() == 0)
          {
            if (node.getNodeType() == Node.TEXT_NODE)
            {
                Node parentNode = node.getParentNode();
                if (parentNode.getNodeType() == Node.ELEMENT_NODE)
                  {
                    String parentNodeName = parentNode.getNodeName();
  //                String nodeValue = node.getNodeValue();
  //                if (parentNodeName.equals("name"))
  //                {
                        Node siblingNode = node.getNextSibling();
                        if (siblingNode != null)
                          {
                            if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
                            {
                                document.add(Field.Text("name", 
siblingNode.getNodeValue()));
                            }
                        }
  //                }
  //                else if (parentNodeName.equals("profession"))
  //                {
  //                    Node siblingNode = node.getNextSibling();
  //                    if (siblingNode != null)
  //                         {
  //                        if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
  //                             {
  //                            document.add(Field.Text([arentNodeName, 
siblingNode.getNodeValue()));
  //                        }
  //                    }
  //                }
  //                else if (parentNodeName == "addressLine1")
  //                     {
  //                    Node siblingNode = node.getNextSibling();
  //                    if(siblingNode != null)
  //                    {
  //                        if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
  //                        {
  //                            document.add(Field.Text("addressLine1", 
siblingNode.getNodeValue()));
  //                        }
  //                    }
  //                }
  //                else if (parentNodeName.equals("addressLine2"))
  //                {
  //                    Node siblingNode = node.getNextSibling();
  //                    if (siblingNode != null)
  //                    {
  //                        if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
  //                        {
  //                            document.add(Field.Text("addressLine2", 
siblingNode.getNodeValue()));
  //                        }
  //                    }
  //                }
  //                if (parentNodeName.equals("city"))
  //                {
  //                    Node siblingNode = node.getNextSibling();
  //                    if (siblingNode != null)
  //                         {
  //                        if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
  //                        {
  //                            document.add(Field.Text("city", 
siblingNode.getNodeValue()));
  //                        }
  //                    }
  //                }
  //                else if (parentNodeName.equals("zip"))
  //                {
  //                    Node siblingNode = node.getNextSibling();
  //                    if (siblingNode != null)
  //                    {
  //                        if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
  //                        {
  //                            document.add(Field.Text("zip", 
siblingNode.getNodeValue()));
  //                        }
  //                    }
  //                }
  //                else if (parentNodeName.equals("state"))
  //                {
  //                    Node siblingNode = node.getNextSibling();
  //                    if (siblingNode != null)
  //                    {
  //                        if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
  //                        {
  //                            document.add(Field.Text("state", 
siblingNode.getNodeValue()));
  //                        }
  //                    }
  //                }
  //                else if (parentNodeName.equals("country"))
  //                {
  //                    Node siblingNode = node.getNextSibling();
  //                    if (siblingNode != null)
  //                    {
  //                        if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
  //                        {
  //                            document.add(Field.Text("country", 
siblingNode.getNodeValue()));
  //                        }
  //                    }
  //                }
                }
            }
          }
          else
          {
            for(int i=0; i<nl.getLength(); i++)
              {
                traverseTree(nl.item(i), document);
            }
          }
      }
  }
  
  
  
  1.1                  
jakarta-lucene-sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerSAX.java
  
  Index: XMLDocumentHandlerSAX.java
  ===================================================================
  package org.apache.lucenesandbox.xmlindexingdemo;
  
  import org.xml.sax.*;
  import org.xml.sax.helpers.*;
  import org.xml.sax.AttributeList;
  import javax.xml.parsers.*;
  
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  
  import java.io.File;
  import java.io.IOException;
  
  public class XMLDocumentHandlerSAX
      extends HandlerBase
  {
      /** A buffer for each XML element */
      private StringBuffer elementBuffer = new StringBuffer();
  
      private Document mDocument;
  
      // constructor
      public XMLDocumentHandlerSAX(File xmlFile)
        throws ParserConfigurationException, SAXException, IOException
      {
        SAXParserFactory spf = SAXParserFactory.newInstance();
  
        SAXParser parser = spf.newSAXParser();
        parser.parse(xmlFile, this);
      }
  
      // call at document start
      public void startDocument()
      {
        mDocument = new Document();
      }
  
      // call at element start
      public void startElement(String localName, AttributeList atts)
        throws SAXException
      {
          elementBuffer.setLength(0);
      }
  
      // call when cdata found
      public void characters(char[] text, int start, int length)
      {
        elementBuffer.append(text, start, length);
      }
  
      // call at element end
      public void endElement(String localName)
        throws SAXException
      {
            mDocument.add(Field.Text(localName, elementBuffer.toString()));
      }
  
      public Document getDocument()
      {
        return mDocument;
      }
  }
  
  
  

--
To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>

Reply via email to