I have started to create a set of generic lucene document types that can be easily manipulated depending on the fields. I know other have generated Documents out of PDF. Is there some place we can add contributed classes to the lucene web page?
Here my current version of the XMLDocument based on . It's a bit slow. It uses a path (taken from Document example) and based on a field name / xpath pair (key / value) from either an array or property file generates an appropriate lucene document with the specified fields. I have not tested all permutations of Document (I have used the File, Properties) and it works. Note: It uses the xalan example ApplyXpath class to get the xml xpath. I hope this helps. --Peter -------------------------------------------------- package xxx.lucene.xml; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.DateField; import org.apache..../ApplyXpath; import java.util.Properties; import java.io.File; import java.util.Enumeration; import java.io.FileInputStream; /** * A utility for making lucene document from an XML source and a set of xpaths * based on Document example from Lucene * */ public class XMLDocument { private XMLDocument() { } /** * @param file Document that to be converted to a lucene document * @param propertyList properties where the key is the field name and the value is the * XML xpath. * @throws FileNotFoundException * @throws Exception * @return lucene document */ public static Document Document (File file, Properties propertyList) throws java.io.FileNotFoundException , Exception { Document doc = new Document(); // add path doc.add(Field.Text("path", file.getPath())); //add date modified doc.add(Field.Keyword("modified", DateField.timeToString(file.lastModified()))); //add field list in property list Enumeration e = propertyList.propertyNames(); while (e.hasMoreElements()) { String key = (String) e.nextElement(); String xpath = propertyList.getProperty(key); String[] valueArray = ApplyXpath(file.getPath(),xpath); StringBuffer value = new StringBuffer(""); for (int i=0; i < valueArray.length; i++) { value.append(valueArray[i]); } //System.out.println("add key "+key+" wtih value = "+value); filter(key,value); doc.add(Field.Text(key,value.toString())); } return doc; } /** * @return lucene document * @param fieldNames field names for the lucene document * @param file Document that to be converted to a lucene document * @param xpaths XML xpaths for the information you want to get * @throws Exception */ public static Document Document(File file, java.lang.String[] fieldNames, java.lang.String[] xpaths) { if (fieldNames.length != xpaths.length) { throw new IllegalArgumentException ("String arrays are not equal size"); } Properties propertyList = new Properties(); // generate properties from the arrays for (int i=0;i<fieldNames.length;i++) { propertyList.setProperty(fieldNames[i],xpaths[i]); } Document doc = Document (file, propertyList); return doc; } /** * @param path path of the Document that to be converted to a lucene document * @param keys * @param xpaths * @throws Exception * @return */ public static Document Document(String path, String[] fieldNames, String[] xpaths) throws Exception { File file = new File(path); Document doc = Document (file, fieldNames, xpaths); return doc; } /** * @param path path of document you want to convert to a lucene document * @param propertyList properties where the key is the field name and the value is the * XML xpath. * @throws Exception * @return lucene document */ public static Document Document(String path, Properties propertyList) throws Exception { File file = new File(path); Document doc = Document (file, propertyList); return doc; } /** * @param documentPath path of the Document that to be converted to a lucene document * @param propertyPath path of file containing properties where the key is the field name and the value is the * XML xpath. * @throws Exception * @return */ public static Document Document(String documentPath, String propertyPath) throws Exception { File file = new File(documentPath); FileInputStream fis = new FileInputStream(propertyPath); Properties propertyList = new Properties(); propertyList.load(fis); Document doc = Document (file, propertyList); return doc; } /** * @param documentFile Document that to be converted to a lucene document * @param propertyFile file containing properties where the key is the field name and the value is the * XML xpath. * @throws Exception * @return */ public static Document Document(File documentFile, File propertyFile) throws Exception { FileInputStream fis = new FileInputStream(propertyFile); Properties propertyList = new Properties(); propertyList.load(fis); Document doc = Document (documentFile, propertyList); return doc; } private static String filter(String key, StringBuffer value) { String newValue; newValue = value.toString(); return newValue; } } -- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>