Author: nick
Date: Fri Jan 8 16:18:52 2010
New Revision: 897249
URL: http://svn.apache.org/viewvc?rev=897249&view=rev
Log:
Rename the outlook extractor to be more consistent with other extractors
Added:
poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
Removed:
poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java
Modified:
poi/trunk/src/documentation/content/xdocs/status.xml
poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL:
http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=897249&r1=897248&r2=897249&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Fri Jan 8 16:18:52
2010
@@ -34,7 +34,7 @@
<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
- <action dev="POI-DEVELOPERS" type="fix">Add a text extractor to
HSMF for simpler extraction of text from .msg files</action>
+ <action dev="POI-DEVELOPERS" type="fix">Add a text extractor
(OutlookTextExtractor) to HSMF for simpler extraction of text from .msg
files</action>
<action dev="POI-DEVELOPERS" type="fix">Some improvements to HSMF
parsing of .msg files</action>
<action dev="POI-DEVELOPERS" type="fix">Initialise the link type of
HSSFHyperLink, so that getType() on it works</action>
<action dev="POI-DEVELOPERS" type="fix">48425 - improved
performance of DateUtil.isCellDateFormatted() </action>
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=897249&r1=897248&r2=897249&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
(original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Fri
Jan 8 16:18:52 2010
@@ -31,7 +31,7 @@
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hsmf.extractor.HSMFTextExtactor;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
@@ -142,7 +142,7 @@
if(entry.getName().equals("__substg1.0_1000001E") ||
entry.getName().equals("__substg1.0_0047001E") ||
entry.getName().equals("__substg1.0_0037001E")) {
- return new HSMFTextExtactor(poifsDir, fs);
+ return new OutlookTextExtactor(poifsDir, fs);
}
}
throw new IllegalArgumentException("No supported documents
found in the OLE2 stream");
Modified:
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=897249&r1=897248&r2=897249&view=diff
==============================================================================
---
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
(original)
+++
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
Fri Jan 8 16:18:52 2010
@@ -25,7 +25,7 @@
import org.apache.poi.POIDataSamples;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hsmf.extractor.HSMFTextExtactor;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -169,7 +169,7 @@
// Outlook msg
assertTrue(
ExtractorFactory.createExtractor(msg)
- instanceof HSMFTextExtactor
+ instanceof OutlookTextExtactor
);
assertTrue(
ExtractorFactory.createExtractor(msg).getText().length() > 50
@@ -248,7 +248,7 @@
// Outlook msg
assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(msg))
- instanceof HSMFTextExtactor
+ instanceof OutlookTextExtactor
);
assertTrue(
ExtractorFactory.createExtractor(new
FileInputStream(msg)).getText().length() > 50
@@ -303,7 +303,7 @@
// Outlook msg
assertTrue(
ExtractorFactory.createExtractor(new POIFSFileSystem(new
FileInputStream(msg)))
- instanceof HSMFTextExtactor
+ instanceof OutlookTextExtactor
);
assertTrue(
ExtractorFactory.createExtractor(new POIFSFileSystem(new
FileInputStream(msg))).getText().length() > 50
Added:
poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java?rev=897249&view=auto
==============================================================================
---
poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
(added)
+++
poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
Fri Jan 8 16:18:52 2010
@@ -0,0 +1,81 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hsmf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.SimpleDateFormat;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * A text extractor for HSMF (Outlook) .msg files.
+ * Outputs in a format somewhat like a plain text email.
+ */
+public class OutlookTextExtactor extends POIOLE2TextExtractor {
+ public OutlookTextExtactor(MAPIMessage msg) {
+ super(msg);
+ }
+ public OutlookTextExtactor(DirectoryNode poifsDir, POIFSFileSystem fs)
throws IOException {
+ this(new MAPIMessage(poifsDir, fs));
+ }
+ public OutlookTextExtactor(POIFSFileSystem fs) throws IOException {
+ this(new MAPIMessage(fs));
+ }
+ public OutlookTextExtactor(InputStream inp) throws IOException {
+ this(new MAPIMessage(inp));
+ }
+
+ /**
+ * Outputs something a little like a RFC822 email
+ */
+ public String getText() {
+ MAPIMessage msg = (MAPIMessage)document;
+ StringBuffer s = new StringBuffer();
+
+ try {
+ s.append("From: " + msg.getDisplayFrom() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("To: " + msg.getDisplayTo() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ if(msg.getDisplayCC().length() > 0)
+ s.append("CC: " + msg.getDisplayCC() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ if(msg.getDisplayBCC().length() > 0)
+ s.append("BCC: " + msg.getDisplayBCC() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
+ s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("Subject: " + msg.getSubject() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("\n" + msg.getTextBody() + "\n");
+ } catch(ChunkNotFoundException e) {}
+
+ return s.toString();
+ }
+}
Added:
poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java?rev=897249&view=auto
==============================================================================
---
poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
(added)
+++
poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
Fri Jan 8 16:18:52 2010
@@ -0,0 +1,95 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.extractor;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Tests to verify that the text extractor works
+ */
+public final class TestOutlookTextExtractor extends TestCase {
+ private POIDataSamples samples;
+
+ public TestOutlookTextExtractor() throws IOException {
+ samples = POIDataSamples.getHSMFInstance();
+ }
+
+ private void assertContains(String haystack, String needle) {
+ if(haystack.indexOf(needle) > -1) {
+ return;
+ }
+ fail("'" + needle + "' wasn't found in '" + haystack + "'");
+ }
+
+ public void testQuick() throws Exception {
+ POIFSFileSystem simple = new POIFSFileSystem(
+ new FileInputStream(samples.getFile("quick.msg"))
+ );
+ MAPIMessage msg = new MAPIMessage(simple);
+
+ OutlookTextExtactor ext = new OutlookTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Kevin Roast\n");
+ assertContains(text, "To: Kevin Roast\n");
+ assertEquals(-1, text.indexOf("CC:"));
+ assertEquals(-1, text.indexOf("BCC:"));
+ assertContains(text, "Subject: Test the content transformer\n");
+ assertContains(text, "Date: Thu, 14 Jun 2007 09:42:55\n");
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
+
+ public void testSimple() throws Exception {
+ MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
+ new FileInputStream(samples.getFile("simple_test_msg.msg"))
+ ));
+
+ OutlookTextExtactor ext = new OutlookTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Travis Ferguson\n");
+ assertContains(text, "To: [email protected]\n");
+ assertEquals(-1, text.indexOf("CC:"));
+ assertEquals(-1, text.indexOf("BCC:"));
+ assertContains(text, "Subject: test message\n");
+ assertEquals(-1, text.indexOf("Date:"));
+ assertContains(text, "This is a test message.");
+ }
+
+ public void testConstructors() throws Exception {
+ String inp = (new OutlookTextExtactor(new FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ )).getText());
+ String poifs = (new OutlookTextExtactor(new POIFSFileSystem(new
FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ ))).getText());
+ String mapi = (new OutlookTextExtactor(new MAPIMessage(new
FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ ))).getText());
+
+ assertEquals(inp, poifs);
+ assertEquals(inp, mapi);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]