Author: nick
Date: Thu Dec 27 04:40:05 2007
New Revision: 607058

URL: http://svn.apache.org/viewvc?rev=607058&view=rev
Log:
Make a start on a text extractor for xlsx files

Added:
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java  
 (with props)
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/
    
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
   (with props)
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/
    
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
   (with props)
    poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/
    
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
   (with props)
Modified:
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java

Modified: poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java?rev=607058&r1=607057&r2=607058&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java 
(original)
+++ poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java Thu 
Dec 27 04:40:05 2007
@@ -16,6 +16,8 @@
 ==================================================================== */
 package org.apache.poi;
 
+import org.apache.poi.hxf.HXFDocument;
+
 /** 
  * Parent class of all UserModel POI XML (ooxml) 
  *  implementations.
@@ -23,5 +25,9 @@
  *  for the XML based classes.
  */
 public abstract class POIXMLDocument {
-       // TODO
+       private HXFDocument document;
+
+       protected POIXMLDocument(HXFDocument document) {
+               this.document = document;
+       }
 }

Added: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java?rev=607058&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java 
(added)
+++ poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java 
Thu Dec 27 04:40:05 2007
@@ -0,0 +1,31 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+public abstract class POIXMLTextExtractor extends POITextExtractor {
+       /** The POIXMLDocument that's open */
+       protected POIXMLDocument document;
+
+       /**
+        * Creates a new text extractor for the given document
+        */
+       public POIXMLTextExtractor(POIXMLDocument document) {
+               super(null);
+               
+               this.document = document;
+       }
+}

Propchange: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java?rev=607058&view=auto
==============================================================================
--- 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
 (added)
+++ 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
 Thu Dec 27 04:40:05 2007
@@ -0,0 +1,113 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.extractor;
+
+import java.io.IOException;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCellFormula;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
+
+public class HXFExcelExtractor extends POIXMLTextExtractor {
+       private HSSFXMLWorkbook workbook;
+       private boolean includeSheetNames = true;
+       private boolean formulasNotResults = false;
+       
+       public HXFExcelExtractor(Package container) throws XmlException, 
OpenXML4JException, IOException {
+               this(new HSSFXMLWorkbook(
+                               new HSSFXML(container)
+               ));
+       }
+       public HXFExcelExtractor(HSSFXMLWorkbook workbook) {
+               super(workbook);
+               this.workbook = workbook;
+       }
+
+       /**
+        * Should sheet names be included? Default is true
+        */
+       public void setIncludeSheetNames(boolean includeSheetNames) {
+               this.includeSheetNames = includeSheetNames;
+       }
+       /**
+        * Should we return the formula itself, and not
+        *  the result it produces? Default is false
+        */
+       public void setFormulasNotResults(boolean formulasNotResults) {
+               this.formulasNotResults = formulasNotResults;
+       }
+       
+       /**
+        * Retreives the text contents of the file
+        */
+       public String getText() {
+               StringBuffer text = new StringBuffer();
+               
+               CTSheet[] sheetRefs =
+                       
workbook._getHSSFXML().getSheetReferences().getSheetArray();
+               for(int i=0; i<sheetRefs.length; i++) {
+                       try {
+                               CTWorksheet sheet =
+                                       
workbook._getHSSFXML().getSheet(sheetRefs[i]);
+                               CTRow[] rows =
+                                       sheet.getSheetData().getRowArray();
+                               
+                               if(i > 0) {
+                                       text.append("\n");
+                               }
+                               if(includeSheetNames) {
+                                       text.append(sheetRefs[i].getName() + 
"\n");
+                               }
+                               
+                               for(int j=0; j<rows.length; j++) {
+                                       CTCell[] cells = rows[j].getCArray();
+                                       for(int k=0; k<cells.length; k++) {
+                                               CTCell cell = cells[k];
+                                               if(k > 0) {
+                                                       text.append("\t");
+                                               }
+                                               
+                                               // Is it a formula one?
+                                               if(cell.getF() != null) {
+                                                       if(formulasNotResults) {
+                                                               
text.append(cell.getF().getStringValue());
+                                                       } else {
+                                                               
text.append(cell.getV());
+                                                       }
+                                               } else {
+                                                       // Probably just want 
the v value
+                                                       
text.append(cell.getV());
+                                               }
+                                       }
+                                       text.append("\n");
+                               }
+                       } catch(Exception e) {
+                               throw new RuntimeException(e);
+                       }
+               }
+               
+               return text.toString();
+       }
+}

Propchange: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java?rev=607058&view=auto
==============================================================================
--- 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
 (added)
+++ 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
 Thu Dec 27 04:40:05 2007
@@ -0,0 +1,33 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.hssf.HSSFXML;
+
+public class HSSFXMLWorkbook extends POIXMLDocument {
+       private HSSFXML hssfXML;
+       
+       public HSSFXMLWorkbook(HSSFXML xml) {
+               super(xml);
+               this.hssfXML = xml;
+       }
+       
+       public HSSFXML _getHSSFXML() {
+               return hssfXML;
+       }
+}

Propchange: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java?rev=607058&view=auto
==============================================================================
--- 
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
 (added)
+++ 
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
 Thu Dec 27 04:40:05 2007
@@ -0,0 +1,75 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.extractor;
+
+import java.io.File;
+
+import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
+import org.apache.poi.hxf.HXFDocument;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for HXFExcelExtractor
+ */
+public class TestHXFExcelExtractor extends TestCase {
+       /**
+        * A very simple file
+        */
+       private HSSFXML xmlA;
+       /**
+        * A fairly complex file
+        */
+       private HSSFXML xmlB;
+
+       protected void setUp() throws Exception {
+               super.setUp();
+               
+               File fileA = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "sample.xlsx"
+               );
+               File fileB = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "AverageTaxRates.xlsx"
+               );
+               
+               xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
+               xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
+       }
+
+       /**
+        * Get text out of the simple file
+        */
+       public void testGetSimpleText() throws Exception {
+               new HXFExcelExtractor(xmlA.getPackage());
+               new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
+               
+               HXFExcelExtractor extractor = 
+                       new HXFExcelExtractor(xmlA.getPackage());
+               extractor.getText();
+               
+               String text = extractor.getText();
+               assertTrue(text.length() > 0);
+               System.err.println(text);
+               
+               // Check sheet names
+               assertTrue(text.startsWith("Sheet1"));
+               assertTrue(text.endsWith("Sheet3\n"));
+       }
+}

Propchange: 
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to