Author: tallison
Date: Tue Jun 20 18:11:34 2017
New Revision: 1799360
URL: http://svn.apache.org/viewvc?rev=1799360&view=rev
Log:
bug 61045 -- allow for (and log!) extra bytes in FormatRecord.
Added:
poi/trunk/test-data/spreadsheet/61045_govdocs1_626534.xls (with props)
Modified:
poi/site/src/documentation/content/xdocs/status.xml
poi/trunk/src/java/org/apache/poi/hssf/record/DimensionsRecord.java
poi/trunk/src/java/org/apache/poi/hssf/record/FormatRecord.java
poi/trunk/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
Modified: poi/site/src/documentation/content/xdocs/status.xml
URL:
http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1799360&r1=1799359&r2=1799360&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Tue Jun 20 18:11:34 2017
@@ -58,6 +58,7 @@
<release version="3.17-beta1" date="2017-07-??">
<actions>
+ <action dev="PD" type="fix" fixes-bug="61045" module="HSLF">Allow for
extra bytes in FormatRecord</action>
<action dev="PD" type="add" fixes-bug="52063" module="SS
Common">Add formula support for LOOKUP(lookup_value, array).</action>
<action dev="PD" type="fix" fixes-bug="61169" module="SL Common">Text
with Japanese characters overflows textbox</action>
<action dev="PD" type="add" module="XSSF">XSSFTable improved support
for creating columns and setting table areas, without needing to use CT
classes</action>
Modified: poi/trunk/src/java/org/apache/poi/hssf/record/DimensionsRecord.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/record/DimensionsRecord.java?rev=1799360&r1=1799359&r2=1799360&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/record/DimensionsRecord.java
(original)
+++ poi/trunk/src/java/org/apache/poi/hssf/record/DimensionsRecord.java Tue Jun
20 18:11:34 2017
@@ -20,6 +20,8 @@
package org.apache.poi.hssf.record;
import org.apache.poi.util.LittleEndianOutput;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
/**
* Title: Dimensions Record<P>
@@ -32,6 +34,9 @@ import org.apache.poi.util.LittleEndianO
*/
public final class DimensionsRecord extends StandardRecord implements
Cloneable {
+
+ private static final POILogger logger =
POILogFactory.getLogger(DimensionsRecord.class);
+
public final static short sid = 0x200;
private int field_1_first_row;
private int field_2_last_row; // plus 1
@@ -50,6 +55,11 @@ public final class DimensionsRecord exte
field_3_first_col = in.readShort();
field_4_last_col = in.readShort();
field_5_zero = in.readShort();
+ //POI-61045 -- in practice, there can be an extra 2 bytes
+ if (in.available() == 2) {
+ logger.log(POILogger.INFO, "DimensionsRecord has extra 2 bytes.");
+ in.readShort();
+ }
}
/**
Modified: poi/trunk/src/java/org/apache/poi/hssf/record/FormatRecord.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/record/FormatRecord.java?rev=1799360&r1=1799359&r2=1799360&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/record/FormatRecord.java (original)
+++ poi/trunk/src/java/org/apache/poi/hssf/record/FormatRecord.java Tue Jun 20
18:11:34 2017
@@ -18,7 +18,10 @@
package org.apache.poi.hssf.record;
import org.apache.poi.util.HexDump;
+import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.LittleEndianOutput;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil;
/**
@@ -28,6 +31,9 @@ import org.apache.poi.util.StringUtil;
* REFERENCE: PG 317 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)
*/
public final class FormatRecord extends StandardRecord implements Cloneable {
+
+ private static final POILogger logger =
POILogFactory.getLogger(FormatRecord.class);
+
public final static short sid = 0x041E;
private final int field_1_index_code;
@@ -52,9 +58,9 @@ public final class FormatRecord extends
field_3_hasMultibyte = (in.readByte() & 0x01) != 0;
if (field_3_hasMultibyte) {
- field_4_formatstring = in.readUnicodeLEString(field_3_unicode_len);
+ field_4_formatstring = readStringCommon(in, field_3_unicode_len,
false);
} else {
- field_4_formatstring =
in.readCompressedUnicode(field_3_unicode_len);
+ field_4_formatstring = readStringCommon(in, field_3_unicode_len,
true);
}
}
@@ -113,4 +119,55 @@ public final class FormatRecord extends
public FormatRecord clone() {
return new FormatRecord(this);
}
+
+ private static String readStringCommon(RecordInputStream ris, int
requestedLength, boolean pIsCompressedEncoding) {
+ //custom copy of ris.readUnicodeLEString to allow for extra bytes at
the end
+
+ // Sanity check to detect garbage string lengths
+ if (requestedLength < 0 || requestedLength > 0x100000) { // 16 million
chars?
+ throw new IllegalArgumentException("Bad requested string length ("
+ requestedLength + ")");
+ }
+ char[] buf = null;
+ boolean isCompressedEncoding = pIsCompressedEncoding;
+ int availableChars = isCompressedEncoding ? ris.remaining() :
ris.remaining() / LittleEndianConsts.SHORT_SIZE;
+ //everything worked out. Great!
+ int remaining = ris.remaining();
+ if (requestedLength == availableChars) {
+ buf = new char[requestedLength];
+ } else {
+ //sometimes in older Excel 97 .xls files,
+ //the requested length is wrong.
+ //Read all available characters.
+ buf = new char[availableChars];
+ }
+ for (int i = 0; i < buf.length; i++) {
+ char ch;
+ if (isCompressedEncoding) {
+ ch = (char) ris.readUByte();
+ } else {
+ ch = (char) ris.readShort();
+ }
+ buf[i] = ch;
+ }
+
+ //TIKA-2154's file shows that even in a unicode string
+ //there can be a remaining byte (without proper final '00')
+ //that should be read as a byte
+ if (ris.available() == 1) {
+ char[] tmp = new char[buf.length+1];
+ System.arraycopy(buf, 0, tmp, 0, buf.length);
+ tmp[buf.length] = (char)ris.readUByte();
+ buf = tmp;
+ }
+
+ if (ris.available() > 0) {
+ logger.log(POILogger.INFO, "FormatRecord has "+ris.available()+"
unexplained bytes. Silently skipping");
+ //swallow what's left
+ while (ris.available() > 0) {
+ ris.readByte();
+ }
+ }
+ return new String(buf);
+ }
+
}
Modified:
poi/trunk/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java?rev=1799360&r1=1799359&r2=1799360&view=diff
==============================================================================
---
poi/trunk/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
(original)
+++
poi/trunk/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
Tue Jun 20 18:11:34 2017
@@ -17,11 +17,11 @@
package org.apache.poi.hssf.extractor;
+import static org.apache.poi.POITestCase.assertContains;
+import static org.apache.poi.POITestCase.assertStartsWith;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
-import static org.apache.poi.POITestCase.assertContains;
-import static org.apache.poi.POITestCase.assertStartsWith;
import java.io.File;
import java.io.IOException;
@@ -388,4 +388,13 @@ public final class TestExcelExtractor {
assertNotNull(extractor.getText());
extractor.close();
}
+
+ @Test
+ public void test61045() throws IOException {
+ //bug 61045. File is govdocs1 626534
+ ExcelExtractor extractor =
createExtractor("61045_govdocs1_626534.xls");
+ String txt = extractor.getText();
+ assertContains(txt, "NONBUSINESS");
+ }
+
}
Added: poi/trunk/test-data/spreadsheet/61045_govdocs1_626534.xls
URL:
http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/61045_govdocs1_626534.xls?rev=1799360&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/61045_govdocs1_626534.xls
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]