Author: jukka
Date: Thu Jun 25 14:36:26 2009
New Revision: 788364

URL: http://svn.apache.org/viewvc?rev=788364&view=rev
Log:
TIKA-148: The ExcelParsing should scan the cell comments

Patch contributed by Maxim Valyanskiy.

Modified:
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=788364&r1=788363&r2=788364&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
 Thu Jun 25 14:36:26 2009
@@ -40,6 +40,7 @@
 import org.apache.poi.hssf.record.FormatRecord;
 import org.apache.poi.hssf.record.FormulaRecord;
 import org.apache.poi.hssf.record.HyperlinkRecord;
+import org.apache.poi.hssf.record.TextObjectRecord;
 import org.apache.poi.hssf.record.UnicodeString;
 //import org.apache.poi.hssf.record.HyperlinkRecord;  // FIXME - requires POI 
release
 import org.apache.poi.hssf.record.LabelRecord;
@@ -134,6 +135,7 @@
             hssfRequest.addListener(listener, NumberRecord.sid);
             hssfRequest.addListener(listener, RKRecord.sid);
             hssfRequest.addListener(listener, HyperlinkRecord.sid);
+            hssfRequest.addListener(listener, TextObjectRecord.sid);
         }
 
         // Create event factory and process Workbook (fire events)
@@ -288,6 +290,10 @@
                     }
                 }
                 break;
+            case TextObjectRecord.sid:
+                TextObjectRecord tor = (TextObjectRecord) record;
+                addTextCell(record, tor.getStr().getString());
+                break;
             }
         }
 
@@ -298,16 +304,21 @@
          * @param record record that holds the cell value
          * @param cell cell value (or <code>null</code>)
          */
-        private void addCell(Record record, Cell cell) {
-            if (currentSheet == null) {
-                // Ignore cells outside sheets
-            } else if (cell == null) {
+        private void addCell(Record record, Cell cell) throws SAXException {
+            if (cell == null) {
                 // Ignore empty cells
-            } else if (record instanceof CellValueRecordInterface) {
+            } else if (currentSheet != null
+                    && record instanceof CellValueRecordInterface) {
+                // Normal cell inside a worksheet
                 CellValueRecordInterface value =
                     (CellValueRecordInterface) record;
                 Point point = new Point(value.getColumn(), value.getRow());
                 currentSheet.put(point, cell);
+            } else {
+                // Cell outside the worksheets
+                handler.startElement("div", "class", "outside");
+                cell.render(handler);
+                handler.endElement("div");
             }
         }
 
@@ -317,8 +328,9 @@
          *
          * @param record record that holds the text value
          * @param text text content, may be <code>null</code>
+         * @throws SAXException 
          */
-        private void addTextCell(Record record, String text) {
+        private void addTextCell(Record record, String text) throws 
SAXException {
             if (text != null) {
                 text = text.trim();
                 if (text.length() > 0) {


Reply via email to