Author: nick
Date: Thu Dec 27 05:02:17 2007
New Revision: 607063

URL: http://svn.apache.org/viewvc?rev=607063&view=rev
Log:
Improve the xlsx text extraction, and have proper tests for it

Added:
    
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
   (with props)
Modified:
    
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
    
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java

Modified: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java?rev=607063&r1=607062&r2=607063&view=diff
==============================================================================
--- 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
 (original)
+++ 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
 Thu Dec 27 05:02:17 2007
@@ -20,6 +20,7 @@
 
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hssf.usermodel.HSSFXMLCell;
 import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
@@ -89,16 +90,18 @@
                                                        text.append("\t");
                                                }
                                                
+                                               boolean done = false;
+                                               
                                                // Is it a formula one?
                                                if(cell.getF() != null) {
                                                        if(formulasNotResults) {
                                                                
text.append(cell.getF().getStringValue());
-                                                       } else {
-                                                               
text.append(cell.getV());
+                                                               done = true;
                                                        }
-                                               } else {
-                                                       // Probably just want 
the v value
-                                                       
text.append(cell.getV());
+                                               }
+                                               if(!done) {
+                                                       HSSFXMLCell uCell = new 
HSSFXMLCell(cell);
+                                                       
text.append(uCell.getStringValue());
                                                }
                                        }
                                        text.append("\n");

Added: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java?rev=607063&view=auto
==============================================================================
--- 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
 (added)
+++ 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
 Thu Dec 27 05:02:17 2007
@@ -0,0 +1,48 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+
+/**
+ * User facing wrapper around an underlying cell object
+ */
+public class HSSFXMLCell {
+       private CTCell cell;
+       public HSSFXMLCell(CTCell rawCell) {
+               this.cell = rawCell;
+       }
+       
+       /**
+        * Formats the cell's contents, based on its type,
+        *  and returns it as a string.
+        */
+       public String getStringValue() {
+               if(cell.getV() != null) {
+                       return cell.getV();
+               }
+               if(cell.getIs() != null) {
+                       return cell.getIs().getT();
+               }
+               // TODO: Formatting
+               return Long.toString(cell.getS());
+       }
+       
+       public String toString() {
+               return cell.getR() + " - " + getStringValue(); 
+       }
+}

Propchange: 
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java?rev=607063&r1=607062&r2=607063&view=diff
==============================================================================
--- 
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
 (original)
+++ 
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
 Thu Dec 27 05:02:17 2007
@@ -66,10 +66,78 @@
                
                String text = extractor.getText();
                assertTrue(text.length() > 0);
-               System.err.println(text);
                
                // Check sheet names
                assertTrue(text.startsWith("Sheet1"));
                assertTrue(text.endsWith("Sheet3\n"));
+               
+               // Now without, will have text
+               extractor.setIncludeSheetNames(false);
+               text = extractor.getText();
+               assertEquals(
+                               "0\t111\n" +
+                               "1\t222\n" +
+                               "2\t333\n" +
+                               "3\t444\n" +
+                               "4\t555\n" +
+                               "5\t666\n" +
+                               "6\t777\n" +
+                               "7\t888\n" +
+                               "8\t999\n" +
+                               "9\t4995\n" +
+                               "\n\n", text);
+               
+               // Now get formulas not their values
+               extractor.setFormulasNotResults(true);
+               text = extractor.getText();
+               assertEquals(
+                               "0\t111\n" +
+                               "1\t222\n" +
+                               "2\t333\n" +
+                               "3\t444\n" +
+                               "4\t555\n" +
+                               "5\t666\n" +
+                               "6\t777\n" +
+                               "7\t888\n" +
+                               "8\t999\n" +
+                               "9\tSUM(B1:B9)\n" +
+                               "\n\n", text);
+               
+               // With sheet names too
+               extractor.setIncludeSheetNames(true);
+               text = extractor.getText();
+               assertEquals(
+                               "Sheet1\n" +
+                               "0\t111\n" +
+                               "1\t222\n" +
+                               "2\t333\n" +
+                               "3\t444\n" +
+                               "4\t555\n" +
+                               "5\t666\n" +
+                               "6\t777\n" +
+                               "7\t888\n" +
+                               "8\t999\n" +
+                               "9\tSUM(B1:B9)\n\n" +
+                               "Sheet2\n\n" +
+                               "Sheet3\n"
+                               , text);
+       }
+       
+       public void testGetComplexText() throws Exception {
+               new HXFExcelExtractor(xmlB.getPackage());
+               new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
+               
+               HXFExcelExtractor extractor = 
+                       new HXFExcelExtractor(xmlB.getPackage());
+               extractor.getText();
+               
+               String text = extractor.getText();
+               assertTrue(text.length() > 0);
+               
+               // Might not have all formatting it should do!
+               assertTrue(text.startsWith(
+                                               "Avgtxfull\n" +
+                                               "3\t13\t3\t2\t2\t3\t2\t"        
+               ));
        }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to