I have started to create a set of generic lucene document types that can 
be easily manipulated depending on the fields.
I know other have generated Documents out of PDF.
Is there some place we can add contributed classes to the lucene web 
page?

Here my current version of the XMLDocument based on . It's a bit slow.
It uses a path (taken from Document example) and based on a field name / 
xpath pair (key / value) from either an array or property file generates
an appropriate lucene document with the specified fields.

I have not tested all permutations of Document (I have used the File, 
Properties) and it works.

Note:
It uses the xalan example ApplyXpath class to get the xml xpath.

I hope this helps.

--Peter

--------------------------------------------------

package xxx.lucene.xml;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;

import org.apache..../ApplyXpath;
import java.util.Properties;
import java.io.File;
import java.util.Enumeration;
import java.io.FileInputStream;

/**
* A utility for making lucene document from an XML source and a set of 
xpaths
* based on Document example from Lucene
*
*/
public class XMLDocument
{
        private XMLDocument() { }

         /**
          * @param file Document that to be converted to a lucene document
          * @param propertyList properties where the key is the field 
name and the value is the
          * XML xpath.
          * @throws FileNotFoundException
          * @throws Exception
          * @return lucene document
          */
        public static Document Document (File file, Properties propertyList)
        throws java.io.FileNotFoundException , Exception
        {
                Document doc = new Document();
                
                // add path
                doc.add(Field.Text("path", file.getPath()));
                
                //add date modified
                doc.add(Field.Keyword("modified", 
DateField.timeToString(file.lastModified())));
                
                //add field list in property list
                 Enumeration e = propertyList.propertyNames();
                 while (e.hasMoreElements())
                 {
                        String key = (String) e.nextElement();
                        String xpath = propertyList.getProperty(key);
                        String[] valueArray = ApplyXpath(file.getPath(),xpath);
                        StringBuffer value = new StringBuffer("");
                        for (int i=0; i < valueArray.length; i++)
                        {
                                value.append(valueArray[i]);
                        }
                        //System.out.println("add key "+key+" wtih value = "+value);
                         filter(key,value);
                        doc.add(Field.Text(key,value.toString()));
                 }
                
                 return doc;
        }

         /**
          * @return lucene document
          * @param fieldNames field names for the lucene document
          * @param file Document that to be converted to a lucene document
          * @param xpaths XML xpaths for the information you want to get
          * @throws Exception
          */
         public static Document Document(File file, java.lang.String[] 
fieldNames, java.lang.String[] xpaths)
         {
             if (fieldNames.length != xpaths.length)
             {
                 throw new IllegalArgumentException ("String arrays are 
not equal size");
             }

             Properties propertyList = new Properties();

             // generate properties from the arrays
             for (int i=0;i<fieldNames.length;i++) {
                 propertyList.setProperty(fieldNames[i],xpaths[i]);
             }

             Document doc = Document (file, propertyList);
             return doc;
         }

         /**
          * @param path path of the Document that to be converted to a 
lucene document
          * @param keys
          * @param xpaths
          * @throws Exception
          * @return
          */
         public static Document Document(String path, String[] 
fieldNames, String[] xpaths)
         throws Exception
         {
             File file = new File(path);
             Document doc = Document (file, fieldNames, xpaths);
             return doc;
         }

         /**
          * @param path path of document you want to convert to a lucene 
document
          * @param propertyList properties where the key is the field 
name and the value is the
          * XML xpath.
          * @throws Exception
          * @return lucene document
          */
         public static Document Document(String path, Properties 
propertyList)
         throws Exception
         {
             File file = new File(path);
             Document doc = Document (file, propertyList);
             return doc;
         }

         /**
          * @param documentPath path of the Document that to be converted 
to a lucene document
          * @param propertyPath path of file containing properties where 
the key is the field name and the value is the
          * XML xpath.
          * @throws Exception
          * @return
          */
         public static Document Document(String documentPath, String 
propertyPath)
         throws Exception
         {
             File file = new File(documentPath);
             FileInputStream fis = new FileInputStream(propertyPath);
             Properties propertyList = new Properties();
             propertyList.load(fis);
             Document doc = Document (file, propertyList);
             return doc;
         }

         /**
          * @param documentFile Document that to be converted to a lucene 
document
          * @param propertyFile file containing properties where the key 
is the field name and the value is the
          * XML xpath.
          * @throws Exception
          * @return
          */
         public static Document Document(File documentFile, File 
propertyFile)
         throws Exception
         {
             FileInputStream fis = new FileInputStream(propertyFile);
             Properties propertyList = new Properties();
             propertyList.load(fis);
             Document doc = Document (documentFile, propertyList);
             return doc;
         }

         private static String filter(String key, StringBuffer value) {
             String newValue;
             newValue = value.toString();
             return newValue;
         }
}


--
To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>

Reply via email to