Author: jukka
Date: Sun Jan 20 06:46:17 2008
New Revision: 613566

URL: http://svn.apache.org/viewvc?rev=613566&view=rev
Log:
TIKA-105: Excel parser implementation based on POI's Event API
    - Replaced ExcelParser with ExcelEventParser
    - Use a setter for listenForAllRecords
      (JavaBean properties are more flexible
      than constructor arguments)
    - Use debug logging for all output
    - Removed some of the explicit log.isDebugEnabled() checks
      (simplicity over insignificant performance gains)
    - Inlined the trivial debug(Record) method

Added:
    
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelParser.java
      - copied, changed from r613561, 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelEventParser.java
Removed:
    
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelEventParser.java

Copied: 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelParser.java
 (from r613561, 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelEventParser.java)
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelParser.java?p2=incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelParser.java&p1=incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelEventParser.java&r1=613561&r2=613566&rev=613566&view=diff
==============================================================================
--- 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelEventParser.java
 (original)
+++ 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelParser.java
 Sun Jan 20 06:46:17 2008
@@ -51,10 +51,6 @@
  * Excel parser implementation which uses POI's Event API
  * to handle the contents of a Workbook.
  * <p>
- * This is an alternative implementation to Tika's
- * [EMAIL PROTECTED] ExcelParser} implementation which uses POI's
- * <code>HSSFWorkbook</code> to parse excel files.
- * <p>
  * The Event API uses a much smaller memory footprint than
  * <code>HSSFWorkbook</code> when processing excel files
  * but at the cost of more complexity.
@@ -68,40 +64,40 @@
  * @see <a href="http://poi.apache.org/hssf/how-to.html#event_api";>
  * POI Event API How To</a>
  */
-public class ExcelEventParser extends OfficeParser implements Serializable {
+public class ExcelParser extends OfficeParser implements Serializable {
 
     /** Logging instance */
-    private static Log log = LogFactory.getLog(ExcelEventParser.class);
+    private static Log log = LogFactory.getLog(ExcelParser.class);
 
     /**
      * <code>true</code> if the HSSFListener should be registered
-     * to listen for all records or <code>false</code> if the listener
-     * should be configured to only receive specified records.
+     * to listen for all records or <code>false</code> (the default)
+     * if the listener should be configured to only receive specified
+     * records.
      */
-    private final boolean listenForAllRecords;
+    private boolean listenForAllRecords = false;
 
     /**
-     * Create an instance which only listens for the specified
-     * records (i.e. <code>listenForAllRecords</code> is
-     * <code>false</code>).
+     * Returns <code>true</code> if this parser is configured to listen
+     * for all records instead of just the specified few.
      */
-    public ExcelEventParser() {
-        this(false);
+    public boolean isListenForAllRecords() {
+        return listenForAllRecords;
     }
 
     /**
-     * Create an instance specifying whether to listen for all
+     * Specifies whether this parser should to listen for all
      * records or just for the specified few.
      * <p>
-     * <strong>Note</strong> This constructor is intended primarily
-     * for testing and debugging - under normal operation
-     * <code>listenForAllRecords</code> should be <code>false</code>.
+     * <strong>Note:</strong> Under normal operation this setting should
+     * be <code>false</code> (the default), but you can experiment with
+     * this setting for testing and debugging purposes.
      *
      * @param listenForAllRecords <code>true</code> if the HSSFListener
      * should be registered to listen for all records or <code>false</code>
      * if the listener should be configured to only receive specified records.
      */
-    public ExcelEventParser(boolean listenForAllRecords) {
+    public void setListenForAllRecords(boolean listenForAllRecords) {
         this.listenForAllRecords = listenForAllRecords;
     }
 
@@ -125,10 +121,7 @@
      */
     protected void extractText(final POIFSFileSystem filesystem,
             final Appendable appendable) throws IOException {
-
-        if (log.isInfoEnabled()) {
-            log.info("Starting listenForAllRecords=" + listenForAllRecords);
-        }
+        log.debug("Starting listenForAllRecords=" + listenForAllRecords);
 
         // Set up listener and register the records we want to process
         TikaHSSFListener listener = new TikaHSSFListener(appendable);
@@ -156,9 +149,7 @@
         HSSFEventFactory eventFactory = new HSSFEventFactory();
         eventFactory.processEvents(hssfRequest, documentInputStream);
 
-        if (log.isInfoEnabled()) {
-            log.info("Processed " + listener.getRecordCount() + " records");
-        }
+        log.debug("Processed " + listener.getRecordCount() + " records");
     }
 
     // ======================================================================
@@ -169,7 +160,7 @@
     private static class TikaHSSFListener implements HSSFListener, 
Serializable {
 
         /** Logging instance */
-        private static Log log = LogFactory.getLog(ExcelEventParser.class);
+        private static Log log = LogFactory.getLog(ExcelParser.class);
 
         private final Appendable appendable;
         private int recordCount;
@@ -230,23 +221,20 @@
                             if (currentSheetIndex < sheetNames.size()) {
                                 currentSheetName = 
sheetNames.get(currentSheetIndex);
                             }
-                            if (log.isDebugEnabled()) {
-                                debug(record, ".Worksheet[" + currentSheetIndex
-                                        + "], Name=[" + currentSheetName + 
"]");
-                            }
+                            debug(record,
+                                    ".Worksheet[" + currentSheetIndex
+                                    + "], Name=[" + currentSheetName + "]");
                             addText(currentSheetName);
                             break;
                         default:
-                            if (log.isDebugEnabled()) {
-                                debug(record, "[" + bofRecordType + "]");
-                            }
+                            debug(record, "[" + bofRecordType + "]");
                             break;
                     }
                     break;
 
                 /* BOFRecord: indicates end of workbook, worksheet etc. 
records */
                 case EOFRecord.sid:
-                    debug(record);
+                    debug(record, "");
                     bofRecordType = 0;
                     break;
 
@@ -254,9 +242,7 @@
                 case DateWindow1904Record.sid:
                     DateWindow1904Record dw1904Rec = 
(DateWindow1904Record)record;
                     use1904windowing = (dw1904Rec.getWindowing() == 1);
-                    if (log.isDebugEnabled()) {
-                        debug(record, "[" + use1904windowing + "]");
-                    }
+                    debug(record, "[" + use1904windowing + "]");
                     break;
 
                 /* CountryRecord: holds all the strings for LabelSSTRecords */
@@ -264,16 +250,15 @@
                     CountryRecord countryRecord = (CountryRecord)record;
                     defualtCountry = countryRecord.getDefaultCountry();
                     currentCountry = countryRecord.getCurrentCountry();
-                    if (log.isDebugEnabled()) {
-                        debug(record, " default=[" + defualtCountry
-                                + "], current=[" + currentCountry + "]");
-                    }
+                    debug(record,
+                            " default=[" + defualtCountry
+                            + "], current=[" + currentCountry + "]");
                     break;
 
                 /* SSTRecord: holds all the strings for LabelSSTRecords */
                 case SSTRecord.sid:
                     sstRecord = (SSTRecord)record;
-                    debug(record);
+                    debug(record, "");
                     break;
 
                 /* BoundSheetRecord: Worksheet index record */
@@ -281,10 +266,9 @@
                     BoundSheetRecord boundSheetRecord = 
(BoundSheetRecord)record;
                     String sheetName = boundSheetRecord.getSheetname();
                     sheetNames.add(sheetName);
-                    if (log.isDebugEnabled()) {
-                        debug(record, "[" + sheetNames.size()
-                                + "], Name=[" + sheetName + "]");
-                    }
+                    debug(record,
+                            "[" + sheetNames.size()
+                            + "], Name=[" + sheetName + "]");
                     break;
 
                 /* FormatRecord */
@@ -293,9 +277,7 @@
                     String dataFormat = formatRecord.getFormatString();
                     short formatIdx = formatRecord.getIndexCode();
                     formats.put(formatIdx, dataFormat);
-                    if (log.isDebugEnabled()) {
-                        debug(record, "[" + formatIdx + "]=[" + dataFormat + 
"]");
-                    }
+                    debug(record, "[" + formatIdx + "]=[" + dataFormat + "]");
                     break;
 
                 /* ExtendedFormatRecord */
@@ -305,10 +287,9 @@
                         short dataFormatIdx = xFormatRecord.getFormatIndex();
                         if (dataFormatIdx > 0) {
                             extendedFormats.put(currentXFormatIdx, 
dataFormatIdx);
-                            if (log.isDebugEnabled()) {
-                                debug(record, "[" + currentXFormatIdx
-                                        + "]=FormatRecord[" + dataFormatIdx + 
"]");
-                            }
+                            debug(record,
+                                    "[" + currentXFormatIdx
+                                    + "]=FormatRecord[" + dataFormatIdx + "]");
                         }
                     }
                     currentXFormatIdx++;
@@ -319,7 +300,7 @@
                             && record instanceof CellValueRecordInterface) {
                         processCellValue(sid, 
(CellValueRecordInterface)record);
                     } else {
-                        debug(record);
+                        debug(record, "");
                     }
                     break;
             }
@@ -453,15 +434,6 @@
                 }
             }
             return text;
-        }
-
-        /**
-         * Record debugging.
-         *
-         * @param record The Record
-         */
-        private void debug(Record record) {
-            debug(record, "");
         }
 
         /**


Reply via email to