Author: dflorey
Date: Sat Dec 31 03:47:45 2005
New Revision: 360272
URL: http://svn.apache.org/viewcvs?rev=360272&view=rev
Log:
Refactored the PropertyExtractor interface to allow more sophicticated
property extraction.
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
Sat Dec 31 03:47:45 2005
@@ -26,6 +26,9 @@
import java.io.InputStream;
import java.util.Map;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
/**
* The AbstractPropertyExtractor class
*/
@@ -45,7 +48,7 @@
this.namespace = namespace;
}
- public abstract Map extract(InputStream content) throws ExtractorException;
+ public abstract Map extract(NodeRevisionDescriptors descriptors,
NodeRevisionDescriptor descriptor, InputStream content) throws
ExtractorException;
/* (non-Javadoc)
* @see org.apache.slide.extractor.Extractor#getContentType()
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
Sat Dec 31 03:47:45 2005
@@ -28,6 +28,7 @@
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
+
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionDescriptors;
import org.apache.slide.util.conf.Configurable;
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
Sat Dec 31 03:47:45 2005
@@ -29,6 +29,7 @@
import java.io.InputStream;
import java.io.Reader;
import java.util.Iterator;
+
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
Sat Dec 31 03:47:45 2005
@@ -29,6 +29,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
+
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
Sat Dec 31 03:47:45 2005
@@ -27,6 +27,7 @@
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
+
import org.textmining.text.extraction.WordExtractor;
/**
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
Sat Dec 31 03:47:45 2005
@@ -7,6 +7,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+
import org.apache.poi.hpsf.NoPropertySetStreamException;
import org.apache.poi.hpsf.Property;
import org.apache.poi.hpsf.PropertySet;
@@ -16,6 +17,8 @@
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
import org.apache.slide.util.conf.Configurable;
import org.apache.slide.util.conf.Configuration;
import org.apache.slide.util.conf.ConfigurationException;
@@ -100,7 +103,7 @@
super(uri, contentType, namespace);
}
- public Map extract(InputStream content) throws ExtractorException {
+ public Map extract(NodeRevisionDescriptors descriptors,
NodeRevisionDescriptor descriptor, InputStream content) throws
ExtractorException {
OfficePropertiesListener listener = new
OfficePropertiesListener();
try {
POIFSReader r = new POIFSReader();
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
Sat Dec 31 03:47:45 2005
@@ -28,6 +28,7 @@
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.Reader;
+
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
Sat Dec 31 03:47:45 2005
@@ -26,6 +26,9 @@
import java.io.InputStream;
import java.util.Map;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
/**
* The PropertyExtractor interface
*
@@ -36,5 +39,5 @@
* Gets extracted property value from the resource, for example "author"
* for a word doc, ...
*/
- public Map extract(InputStream content) throws ExtractorException;
+ public Map extract(NodeRevisionDescriptors descriptors,
NodeRevisionDescriptor descriptor, InputStream content) throws
ExtractorException;
}
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
Sat Dec 31 03:47:45 2005
@@ -54,7 +54,7 @@
if ( content != null && descriptor != null ) {
List extractor =
ExtractorManager.getInstance().getPropertyExtractors(namespaceName,
descriptors, descriptor);
for ( int i = 0, l = extractor.size(); i < l; i++ ) {
- Map extractedProperties =
((PropertyExtractor)extractor.get(i)).extract(new
ByteArrayInputStream(content.getContentBytes()));
+ Map extractedProperties =
((PropertyExtractor)extractor.get(i)).extract(descriptors, descriptor, new
ByteArrayInputStream(content.getContentBytes()));
for ( Iterator j =
extractedProperties.entrySet().iterator(); j.hasNext(); ) {
Map.Entry entry = (Map.Entry) j.next();
final Object key = entry.getKey();
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
URL:
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
---
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
(original)
+++
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
Sat Dec 31 03:47:45 2005
@@ -31,7 +31,10 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+
import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
import org.apache.slide.util.conf.Configurable;
import org.apache.slide.util.conf.Configuration;
import org.apache.slide.util.conf.ConfigurationException;
@@ -93,7 +96,7 @@
super(uri, contentType, namespace);
}
- public Map extract(InputStream content) throws ExtractorException {
+ public Map extract(NodeRevisionDescriptors descriptors,
NodeRevisionDescriptor descriptor, InputStream content) throws
ExtractorException {
Map properties = new HashMap();
try {
SAXBuilder saxBuilder = new SAXBuilder();
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]