Author: nick
Date: Thu Dec 27 05:02:17 2007
New Revision: 607063
URL: http://svn.apache.org/viewvc?rev=607063&view=rev
Log:
Improve the xlsx text extraction, and have proper tests for it
Added:
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
(with props)
Modified:
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
Modified:
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java?rev=607063&r1=607062&r2=607063&view=diff
==============================================================================
---
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
(original)
+++
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
Thu Dec 27 05:02:17 2007
@@ -20,6 +20,7 @@
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hssf.usermodel.HSSFXMLCell;
import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
@@ -89,16 +90,18 @@
text.append("\t");
}
+ boolean done = false;
+
// Is it a formula one?
if(cell.getF() != null) {
if(formulasNotResults) {
text.append(cell.getF().getStringValue());
- } else {
-
text.append(cell.getV());
+ done = true;
}
- } else {
- // Probably just want
the v value
-
text.append(cell.getV());
+ }
+ if(!done) {
+ HSSFXMLCell uCell = new
HSSFXMLCell(cell);
+
text.append(uCell.getStringValue());
}
}
text.append("\n");
Added:
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java?rev=607063&view=auto
==============================================================================
---
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
(added)
+++
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
Thu Dec 27 05:02:17 2007
@@ -0,0 +1,48 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+
+/**
+ * User facing wrapper around an underlying cell object
+ */
+public class HSSFXMLCell {
+ private CTCell cell;
+ public HSSFXMLCell(CTCell rawCell) {
+ this.cell = rawCell;
+ }
+
+ /**
+ * Formats the cell's contents, based on its type,
+ * and returns it as a string.
+ */
+ public String getStringValue() {
+ if(cell.getV() != null) {
+ return cell.getV();
+ }
+ if(cell.getIs() != null) {
+ return cell.getIs().getT();
+ }
+ // TODO: Formatting
+ return Long.toString(cell.getS());
+ }
+
+ public String toString() {
+ return cell.getR() + " - " + getStringValue();
+ }
+}
Propchange:
poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java?rev=607063&r1=607062&r2=607063&view=diff
==============================================================================
---
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
(original)
+++
poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
Thu Dec 27 05:02:17 2007
@@ -66,10 +66,78 @@
String text = extractor.getText();
assertTrue(text.length() > 0);
- System.err.println(text);
// Check sheet names
assertTrue(text.startsWith("Sheet1"));
assertTrue(text.endsWith("Sheet3\n"));
+
+ // Now without, will have text
+ extractor.setIncludeSheetNames(false);
+ text = extractor.getText();
+ assertEquals(
+ "0\t111\n" +
+ "1\t222\n" +
+ "2\t333\n" +
+ "3\t444\n" +
+ "4\t555\n" +
+ "5\t666\n" +
+ "6\t777\n" +
+ "7\t888\n" +
+ "8\t999\n" +
+ "9\t4995\n" +
+ "\n\n", text);
+
+ // Now get formulas not their values
+ extractor.setFormulasNotResults(true);
+ text = extractor.getText();
+ assertEquals(
+ "0\t111\n" +
+ "1\t222\n" +
+ "2\t333\n" +
+ "3\t444\n" +
+ "4\t555\n" +
+ "5\t666\n" +
+ "6\t777\n" +
+ "7\t888\n" +
+ "8\t999\n" +
+ "9\tSUM(B1:B9)\n" +
+ "\n\n", text);
+
+ // With sheet names too
+ extractor.setIncludeSheetNames(true);
+ text = extractor.getText();
+ assertEquals(
+ "Sheet1\n" +
+ "0\t111\n" +
+ "1\t222\n" +
+ "2\t333\n" +
+ "3\t444\n" +
+ "4\t555\n" +
+ "5\t666\n" +
+ "6\t777\n" +
+ "7\t888\n" +
+ "8\t999\n" +
+ "9\tSUM(B1:B9)\n\n" +
+ "Sheet2\n\n" +
+ "Sheet3\n"
+ , text);
+ }
+
+ public void testGetComplexText() throws Exception {
+ new HXFExcelExtractor(xmlB.getPackage());
+ new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
+
+ HXFExcelExtractor extractor =
+ new HXFExcelExtractor(xmlB.getPackage());
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Might not have all formatting it should do!
+ assertTrue(text.startsWith(
+ "Avgtxfull\n" +
+ "3\t13\t3\t2\t2\t3\t2\t"
+ ));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]