Author: nick
Date: Mon Jan 11 14:55:43 2010
New Revision: 897887

URL: http://svn.apache.org/viewvc?rev=897887&view=rev
Log:
Add PublisherTextExtractor support to ExtractorFactory

Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
    
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
    
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Mon Jan 11 14:55:43 
2010
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.7-SNAPSHOT" date="2010-??-??">
+           <action dev="POI-DEVELOPERS" type="add">Add PublisherTextExtractor 
support to ExtractorFactory</action>
            <action dev="POI-DEVELOPERS" type="add">Add XSLF support for text 
extraction from tables</action>
            <action dev="POI-DEVELOPERS" type="add">Support attachments as 
embeded documents within the new OutlookTextExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">Add a text extractor 
(OutlookTextExtractor) to HSMF for simpler extraction of text from .msg 
files</action>

Modified: 
poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java 
(original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Mon 
Jan 11 14:55:43 2010
@@ -31,6 +31,7 @@
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hsmf.MAPIMessage;
 import org.apache.poi.hsmf.datatypes.AttachmentChunks;
@@ -142,6 +143,9 @@
                        if(entry.getName().equals("VisioDocument")) {
                                return new VisioTextExtractor(poifsDir, fs);
                        }
+         if(entry.getName().equals("Quill")) {
+            return new PublisherTextExtractor(poifsDir, fs);
+         }
                        if(
                              entry.getName().equals("__substg1.0_1000001E") ||
                entry.getName().equals("__substg1.0_1000001F") ||

Modified: 
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- 
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
 (original)
+++ 
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
 Mon Jan 11 14:55:43 2010
@@ -24,6 +24,7 @@
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
@@ -62,6 +63,8 @@
    private File msgEmb;
    
    private File vsd;
+   
+   private File pub;
 
    protected void setUp() throws Exception {
       super.setUp();
@@ -86,6 +89,9 @@
       POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
       vsd = dgTests.getFile("Test_Visio-Some_Random_Text.vsd");
       
+      POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
+      pub = pubTests.getFile("Simple.pub");
+      
       POIDataSamples olTests = POIDataSamples.getHSMFInstance();
       msg = olTests.getFile("quick.msg");
       msgEmb = olTests.getFile("attachment_test_msg.msg");
@@ -169,6 +175,15 @@
             ExtractorFactory.createExtractor(vsd).getText().length() > 50
       );
       
+      // Publisher
+      assertTrue(
+            ExtractorFactory.createExtractor(pub)
+            instanceof PublisherTextExtractor
+      );
+      assertTrue(
+            ExtractorFactory.createExtractor(pub).getText().length() > 50
+      );
+      
       // Outlook msg
       assertTrue(
             ExtractorFactory.createExtractor(msg)
@@ -248,6 +263,15 @@
                                ExtractorFactory.createExtractor(new 
FileInputStream(vsd)).getText().length() > 50
                );
                
+      // Publisher
+      assertTrue(
+            ExtractorFactory.createExtractor(new FileInputStream(pub))
+            instanceof PublisherTextExtractor
+      );
+      assertTrue(
+            ExtractorFactory.createExtractor(new 
FileInputStream(pub)).getText().length() > 50
+      );
+      
                // Outlook msg
       assertTrue(
             ExtractorFactory.createExtractor(new FileInputStream(msg))
@@ -302,6 +326,15 @@
                assertTrue(
                                ExtractorFactory.createExtractor(new 
POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
                );
+      
+      // Publisher
+      assertTrue(
+            ExtractorFactory.createExtractor(new POIFSFileSystem(new 
FileInputStream(pub)))
+            instanceof PublisherTextExtractor
+      );
+      assertTrue(
+            ExtractorFactory.createExtractor(new POIFSFileSystem(new 
FileInputStream(pub))).getText().length() > 50
+      );
                
       // Outlook msg
       assertTrue(
@@ -426,6 +459,7 @@
       assertEquals(1, numWord);
 
       // TODO - PowerPoint
+      // TODO - Publisher
       // TODO - Visio
    }
 }

Modified: 
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- 
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
 (original)
+++ 
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
 Mon Jan 11 14:55:43 2010
@@ -26,6 +26,7 @@
 import org.apache.poi.hpbf.model.qcbits.QCBit;
 import org.apache.poi.hpbf.model.qcbits.QCTextBit;
 import org.apache.poi.hpbf.model.qcbits.QCPLCBit.Type12;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
 /**
@@ -39,6 +40,9 @@
                super(doc);
                this.doc = doc;
        }
+   public PublisherTextExtractor(DirectoryNode dir, POIFSFileSystem fs) throws 
IOException {
+      this(new HPBFDocument(dir, fs));
+   }
        public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
                this(new HPBFDocument(fs));
        }



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to