I have started to create a set of generic lucene document types that can
be easily manipulated depending on the fields.
I know other have generated Documents out of PDF.
Is there some place we can add contributed classes to the lucene web
page?
Here my current version of the XMLDocument based on . It's a bit slow.
It uses a path (taken from Document example) and based on a field name /
xpath pair (key / value) from either an array or property file generates
an appropriate lucene document with the specified fields.
I have not tested all permutations of Document (I have used the File,
Properties) and it works.
Note:
It uses the xalan example ApplyXpath class to get the xml xpath.
I hope this helps.
--Peter
--------------------------------------------------
package xxx.lucene.xml;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
import org.apache..../ApplyXpath;
import java.util.Properties;
import java.io.File;
import java.util.Enumeration;
import java.io.FileInputStream;
/**
* A utility for making lucene document from an XML source and a set of
xpaths
* based on Document example from Lucene
*
*/
public class XMLDocument
{
private XMLDocument() { }
/**
* @param file Document that to be converted to a lucene document
* @param propertyList properties where the key is the field
name and the value is the
* XML xpath.
* @throws FileNotFoundException
* @throws Exception
* @return lucene document
*/
public static Document Document (File file, Properties propertyList)
throws java.io.FileNotFoundException , Exception
{
Document doc = new Document();
// add path
doc.add(Field.Text("path", file.getPath()));
//add date modified
doc.add(Field.Keyword("modified",
DateField.timeToString(file.lastModified())));
//add field list in property list
Enumeration e = propertyList.propertyNames();
while (e.hasMoreElements())
{
String key = (String) e.nextElement();
String xpath = propertyList.getProperty(key);
String[] valueArray = ApplyXpath(file.getPath(),xpath);
StringBuffer value = new StringBuffer("");
for (int i=0; i < valueArray.length; i++)
{
value.append(valueArray[i]);
}
//System.out.println("add key "+key+" wtih value = "+value);
filter(key,value);
doc.add(Field.Text(key,value.toString()));
}
return doc;
}
/**
* @return lucene document
* @param fieldNames field names for the lucene document
* @param file Document that to be converted to a lucene document
* @param xpaths XML xpaths for the information you want to get
* @throws Exception
*/
public static Document Document(File file, java.lang.String[]
fieldNames, java.lang.String[] xpaths)
{
if (fieldNames.length != xpaths.length)
{
throw new IllegalArgumentException ("String arrays are
not equal size");
}
Properties propertyList = new Properties();
// generate properties from the arrays
for (int i=0;i<fieldNames.length;i++) {
propertyList.setProperty(fieldNames[i],xpaths[i]);
}
Document doc = Document (file, propertyList);
return doc;
}
/**
* @param path path of the Document that to be converted to a
lucene document
* @param keys
* @param xpaths
* @throws Exception
* @return
*/
public static Document Document(String path, String[]
fieldNames, String[] xpaths)
throws Exception
{
File file = new File(path);
Document doc = Document (file, fieldNames, xpaths);
return doc;
}
/**
* @param path path of document you want to convert to a lucene
document
* @param propertyList properties where the key is the field
name and the value is the
* XML xpath.
* @throws Exception
* @return lucene document
*/
public static Document Document(String path, Properties
propertyList)
throws Exception
{
File file = new File(path);
Document doc = Document (file, propertyList);
return doc;
}
/**
* @param documentPath path of the Document that to be converted
to a lucene document
* @param propertyPath path of file containing properties where
the key is the field name and the value is the
* XML xpath.
* @throws Exception
* @return
*/
public static Document Document(String documentPath, String
propertyPath)
throws Exception
{
File file = new File(documentPath);
FileInputStream fis = new FileInputStream(propertyPath);
Properties propertyList = new Properties();
propertyList.load(fis);
Document doc = Document (file, propertyList);
return doc;
}
/**
* @param documentFile Document that to be converted to a lucene
document
* @param propertyFile file containing properties where the key
is the field name and the value is the
* XML xpath.
* @throws Exception
* @return
*/
public static Document Document(File documentFile, File
propertyFile)
throws Exception
{
FileInputStream fis = new FileInputStream(propertyFile);
Properties propertyList = new Properties();
propertyList.load(fis);
Document doc = Document (documentFile, propertyList);
return doc;
}
private static String filter(String key, StringBuffer value) {
String newValue;
newValue = value.toString();
return newValue;
}
}
--
To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>