Author: jukka
Date: Wed Mar 26 11:58:16 2008
New Revision: 641482

URL: http://svn.apache.org/viewvc?rev=641482&view=rev
Log:
TIKA-132: Refactor Excel extractor to parse per sheet and add hyperlink support
    - Further refactoring to simplify cell value handling

Modified:
    
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Modified: 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=641482&r1=641481&r2=641482&view=diff
==============================================================================
--- 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
 (original)
+++ 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
 Wed Mar 26 11:58:16 2008
@@ -270,12 +270,7 @@
                             && record instanceof CellValueRecordInterface) {
                         CellValueRecordInterface value =
                             (CellValueRecordInterface) record;
-                        Cell cell = getCellValue(record.getSid(), value);
-                        if (cell != null) {
-                            Point point =
-                                new Point(value.getColumn(), value.getRow());
-                            currentSheet.put(point, cell);
-                        }
+                        addCell(record, getCellValue(record.getSid(), value));
                     }
                     break;
             }
@@ -290,8 +285,6 @@
         private Cell getCellValue(
                 short sid, CellValueRecordInterface record)
                 throws SAXException {
-
-            String text = null;
             switch (sid) {
                 /* FormulaRecord: Cell value from a formula */
                 case FormulaRecord.sid:
@@ -299,15 +292,13 @@
 
                 /* LabelRecord: strings stored directly in the cell */
                 case LabelRecord.sid:
-                    text = ((LabelRecord)record).getValue();
-                    break;
+                    return getTextCell(((LabelRecord) record).getValue());
 
                 /* LabelSSTRecord: Ref. a string in the shared string table */
                 case LabelSSTRecord.sid:
                     LabelSSTRecord labelSSTRecord = (LabelSSTRecord) record;
                     int sstIndex = labelSSTRecord.getSSTIndex();
-                    text = sstRecord.getString(sstIndex).getString();
-                    break;
+                    return 
getTextCell(sstRecord.getString(sstIndex).getString());
 
                 /* NumberRecord: Contains a numeric cell value */
                 case NumberRecord.sid:
@@ -317,11 +308,42 @@
                 case RKRecord.sid:
                     return new NumberCell(((RKRecord)record).getRKNumber());
             }
+            return null;
+        }
+
+        /**
+         * Adds the given cell (unless <code>null</code>) to the current
+         * worksheet (if any) at the position (if any) of the given record.
+         *
+         * @param record record that holds the cell value
+         * @param cell cell value (or <code>null</code>)
+         */
+        private void addCell(Record record, Cell cell) {
+            if (!insideWorksheet) {
+                // Ignore cells outside sheets
+            } else if (cell == null) {
+                // Ignore empty cells
+            } else if (record instanceof CellValueRecordInterface) {
+                CellValueRecordInterface value =
+                    (CellValueRecordInterface) record;
+                Point point = new Point(value.getColumn(), value.getRow());
+                currentSheet.put(point, cell);
+            }
+        }
+
+        /**
+         * Returns a text cell with the given text comment. The given text
+         * is trimmed, and ignored if <code>null</code> or empty.
+         *
+         * @param text text content, may be <code>null</code>
+         * @return text cell, or <code>null</code>
+         */
+        private Cell getTextCell(String text) {
             if (text != null) {
                 text = text.trim();
-            }
-            if (text != null && text.length() > 0) {
-                return new TextCell(text);
+                if (text.length() > 0) {
+                    return new TextCell(text);
+                }
             }
             return null;
         }


Reply via email to