Author: jukka
Date: Sun Jan 25 21:08:53 2009
New Revision: 737581
URL: http://svn.apache.org/viewvc?rev=737581&view=rev
Log:
TIKA-189: Text extraction from Excel files juxtaposes cells
It seems like row and column numbering starts at zero. Fixed the code so that
values in the first two columns don't get concatenated.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
lucene/tika/trunk/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=737581&r1=737580&r2=737581&view=diff
==============================================================================
---
lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
(original)
+++
lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
Sun Jan 25 21:08:53 2009
@@ -343,8 +343,8 @@
handler.startElement("tbody");
// Process Rows
- int currentRow = 1;
- int currentColumn = 1;
+ int currentRow = 0;
+ int currentColumn = 0;
handler.startElement("tr");
handler.startElement("td");
for (Map.Entry<Point, Cell> entry : currentSheet.entrySet()) {
@@ -354,7 +354,7 @@
handler.startElement("tr");
handler.startElement("td");
currentRow++;
- currentColumn = 1;
+ currentColumn = 0;
}
while (currentColumn < entry.getKey().x) {
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=737581&r1=737580&r2=737581&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
Sun Jan 25 21:08:53 2009
@@ -42,10 +42,12 @@
String content = handler.toString();
assertTrue(content.contains("Sample Excel Worksheet"));
assertTrue(content.contains("Numbers and their Squares"));
+ assertTrue(content.contains("\t\tNumber\tSquare"));
assertTrue(content.contains("9"));
assertFalse(content.contains("9.0"));
assertTrue(content.contains("196"));
assertFalse(content.contains("196.0"));
+ System.out.println(content);
} finally {
input.close();
}