jheight 2003/10/13 23:54:00
Modified: src/java/org/apache/poi/hssf/record SSTDeserializer.java
SSTRecord.java
src/testcases/org/apache/poi/hssf/record
TestSSTDeserializer.java TestSSTRecord.java
Log:
Fixed the double byte bugs in SSTDeserializer. Testcases provided in bugs 15556 and
22742 now work.
Patch for the rel 2.0 branch will follow shortly.
PR: 15556, 22742
Revision Changes Path
1.7 +47 -32
jakarta-poi/src/java/org/apache/poi/hssf/record/SSTDeserializer.java
Index: SSTDeserializer.java
===================================================================
RCS file:
/home/cvs/jakarta-poi/src/java/org/apache/poi/hssf/record/SSTDeserializer.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- SSTDeserializer.java 18 Sep 2003 02:10:50 -0000 1.6
+++ SSTDeserializer.java 14 Oct 2003 06:53:59 -0000 1.7
@@ -62,13 +62,14 @@
* Handles the task of deserializing a SST string. The two main entry points are
*
* @author Glen Stampoultzis (glens at apache.org)
+ * @author Jason Height (jheight at apache.org)
*/
class SSTDeserializer
{
private BinaryTree strings;
- /** this is the number of characters we expect in the first sub-record in a
subsequent continuation record */
- private int continuationExpectedChars;
+ /** this is the number of characters that have been read prior to the
continuation */
+ private int continuationReadChars;
/** this is the string we were working on before hitting the end of the current
record. This string is NOT finished. */
private String unfinishedString;
/** this is true if the string uses wide characters */
@@ -82,6 +83,7 @@
/** Number of characters in current string */
private int charCount;
private int extensionLength;
+ private int continueSkipBytes = 0;
public SSTDeserializer( BinaryTree strings )
@@ -93,13 +95,14 @@
private void initVars()
{
runCount = 0;
- continuationExpectedChars = 0;
+ continuationReadChars = 0;
unfinishedString = "";
// bytesInCurrentSegment = 0;
// stringDataOffset = 0;
wideChar = false;
richText = false;
extendedText = false;
+ continueSkipBytes = 0;
}
/**
@@ -107,14 +110,15 @@
* strings may span across multiple continuations. Read the SST record
* carefully before beginning to hack.
*/
- public void manufactureStrings( final byte[] data, final int initialOffset,
short dataSize )
+ public void manufactureStrings( final byte[] data, final int initialOffset)
{
initVars();
int offset = initialOffset;
- while ( ( offset - initialOffset ) < dataSize )
+ final int dataSize = data.length;
+ while ( offset < dataSize )
{
- int remaining = dataSize - offset + initialOffset;
+ int remaining = dataSize - offset;
if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE )
)
{
@@ -122,26 +126,31 @@
}
if ( remaining == LittleEndianConsts.SHORT_SIZE )
{
- setContinuationExpectedChars( LittleEndian.getUShort( data, offset
) );
+ //JMH Dont know about this
+ setContinuationCharsRead( 0 );//LittleEndian.getUShort( data,
offset ) );
unfinishedString = "";
break;
}
charCount = LittleEndian.getUShort( data, offset );
+ int charsRead = charCount;
readStringHeader( data, offset );
boolean stringContinuesOverContinuation = remaining < totalStringSize();
if ( stringContinuesOverContinuation )
{
- int remainingBytes = ( initialOffset + dataSize ) - offset -
stringHeaderOverhead();
- setContinuationExpectedChars( charCount - calculateCharCount(
remainingBytes ) );
- charCount -= getContinuationExpectedChars();
+ int remainingBytes = dataSize - offset - stringHeaderOverhead();
+ //Only read the size of the string or whatever is left before the
+ //continuation
+ charsRead = Math.min(charsRead, calculateCharCount( remainingBytes
));
+ setContinuationCharsRead( charsRead );
+ if (charsRead == charCount) {
+ //Since all of the characters will have been read, but the entire
string (including formatting runs etc)
+ //hasnt, Compute the number of bytes to skip when the continue
record starts
+ continueSkipBytes = offsetForContinuedRecord(0) - (remainingBytes
- calculateByteCount(charsRead));
}
- else
- {
- setContinuationExpectedChars( 0 );
}
- processString( data, offset, charCount );
+ processString( data, offset, charsRead );
offset += totalStringSize();
- if ( getContinuationExpectedChars() != 0 )
+ if ( stringContinuesOverContinuation )
{
break;
}
@@ -222,6 +231,7 @@
UnicodeString string = new UnicodeString( UnicodeString.sid,
(short) unicodeStringBuffer.length,
unicodeStringBuffer );
+ setContinuationCharsRead( calculateCharCount(bytesRead));
if ( isStringFinished() )
{
@@ -238,7 +248,7 @@
private boolean isStringFinished()
{
- return getContinuationExpectedChars() == 0;
+ return getContinuationCharsRead() == charCount;
}
/**
@@ -300,8 +310,9 @@
{
if ( isStringFinished() )
{
+ final int offset = continueSkipBytes;
initVars();
- manufactureStrings( record, 0, (short) record.length );
+ manufactureStrings( record, offset);
}
else
{
@@ -329,13 +340,12 @@
*/
private void readStringRemainder( final byte[] record )
{
- int stringRemainderSizeInBytes = calculateByteCount(
getContinuationExpectedChars() );
-// stringDataOffset = LittleEndianConsts.BYTE_SIZE;
+ int stringRemainderSizeInBytes = calculateByteCount(
charCount-getContinuationCharsRead() );
byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD
- + calculateByteCount( getContinuationExpectedChars() )];
+ + stringRemainderSizeInBytes];
// write the string length
- LittleEndian.putShort( unicodeStringData, 0, (short)
getContinuationExpectedChars() );
+ LittleEndian.putShort( unicodeStringData, 0, (short)
(charCount-getContinuationCharsRead()) );
// write the options flag
unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte(
wideChar, richText, extendedText );
@@ -344,7 +354,7 @@
// past all the overhead of the str_data array
arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData,
SSTRecord.STRING_MINIMAL_OVERHEAD,
- unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
+ stringRemainderSizeInBytes );
// use special constructor to create the final string
UnicodeString string = new UnicodeString( UnicodeString.sid,
@@ -355,7 +365,7 @@
addToStringTable( strings, integer, string );
int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes );
- manufactureStrings( record, newOffset, (short) ( record.length - newOffset
) );
+ manufactureStrings( record, newOffset);
}
/**
@@ -387,8 +397,12 @@
private int offsetForContinuedRecord( int stringRemainderSizeInBytes )
{
- return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE
- + runCount * LittleEndianConsts.INT_SIZE + extensionLength;
+ int offset = stringRemainderSizeInBytes + runCount *
LittleEndianConsts.INT_SIZE + extensionLength;
+ if (stringRemainderSizeInBytes != 0)
+ //If a portion of the string remains then the wideChar options byte is
repeated,
+ //so need to skip this.
+ offset += + LittleEndianConsts.BYTE_SIZE;
+ return offset;
}
private byte createOptionByte( boolean wideChar, boolean richText, boolean
farEast )
@@ -408,17 +422,18 @@
int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE;
byte[] unicodeStringData = new byte[record.length +
LittleEndianConsts.SHORT_SIZE];
- LittleEndian.putShort( unicodeStringData, (byte) 0, (short)
calculateCharCount( dataLengthInBytes ) );
+ int charsRead = calculateCharCount( dataLengthInBytes );
+ LittleEndian.putShort( unicodeStringData, (byte) 0, (short) charsRead );
arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE,
record.length );
UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short)
unicodeStringData.length, unicodeStringData );
unfinishedString = unfinishedString + ucs.getString();
- setContinuationExpectedChars( getContinuationExpectedChars() -
calculateCharCount( dataLengthInBytes ) );
+ setContinuationCharsRead( charsRead );
}
private boolean stringSpansContinuation( int continuationSizeInBytes )
{
- return calculateByteCount( getContinuationExpectedChars() ) >
continuationSizeInBytes;
+ return calculateByteCount( charCount - getContinuationCharsRead() ) >
continuationSizeInBytes;
}
/**
@@ -426,14 +441,14 @@
* sub-record in a subsequent continuation record
*/
- int getContinuationExpectedChars()
+ int getContinuationCharsRead()
{
- return continuationExpectedChars;
+ return continuationReadChars;
}
- private void setContinuationExpectedChars( final int count )
+ private void setContinuationCharsRead( final int count )
{
- continuationExpectedChars = count;
+ continuationReadChars = count;
}
private int calculateByteCount( final int character_count )
1.11 +1 -1 jakarta-poi/src/java/org/apache/poi/hssf/record/SSTRecord.java
Index: SSTRecord.java
===================================================================
RCS file: /home/cvs/jakarta-poi/src/java/org/apache/poi/hssf/record/SSTRecord.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- SSTRecord.java 25 Sep 2003 07:18:08 -0000 1.10
+++ SSTRecord.java 14 Oct 2003 06:53:59 -0000 1.11
@@ -482,7 +482,7 @@
field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
field_3_strings = new BinaryTree();
deserializer = new SSTDeserializer(field_3_strings);
- deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
+ deserializer.manufactureStrings( data, 8 + offset);
}
1.4 +5 -5
jakarta-poi/src/testcases/org/apache/poi/hssf/record/TestSSTDeserializer.java
Index: TestSSTDeserializer.java
===================================================================
RCS file:
/home/cvs/jakarta-poi/src/testcases/org/apache/poi/hssf/record/TestSSTDeserializer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- TestSSTDeserializer.java 30 Apr 2003 04:39:04 -0000 1.3
+++ TestSSTDeserializer.java 14 Oct 2003 06:54:00 -0000 1.4
@@ -88,7 +88,7 @@
byte[] bytes = HexRead.readData( _test_file_path + File.separator +
"richtextdata.txt", "header" );
BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings );
- deserializer.manufactureStrings( bytes, 0, (short)bytes.length );
+ deserializer.manufactureStrings( bytes, 0);
byte[] continueBytes = HexRead.readData( _test_file_path + File.separator +
"richtextdata.txt", "continue1" );
deserializer.processContinueRecord( continueBytes );
@@ -101,7 +101,7 @@
byte[] bytes = HexRead.readData( _test_file_path + File.separator +
"evencontinuation.txt", "header" );
BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings );
- deserializer.manufactureStrings( bytes, 0, (short)bytes.length );
+ deserializer.manufactureStrings( bytes, 0);
byte[] continueBytes = HexRead.readData( _test_file_path + File.separator +
"evencontinuation.txt", "continue1" );
deserializer.processContinueRecord( continueBytes );
@@ -119,7 +119,7 @@
byte[] bytes = HexRead.readData( _test_file_path + File.separator +
"stringacross2continuations.txt", "header" );
BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings );
- deserializer.manufactureStrings( bytes, 0, (short)bytes.length );
+ deserializer.manufactureStrings( bytes, 0);
bytes = HexRead.readData( _test_file_path + File.separator +
"stringacross2continuations.txt", "continue1" );
deserializer.processContinueRecord( bytes );
bytes = HexRead.readData( _test_file_path + File.separator +
"stringacross2continuations.txt", "continue2" );
@@ -136,7 +136,7 @@
byte[] bytes = HexRead.readData( _test_file_path + File.separator +
"extendedtextstrings.txt", "rich-header" );
BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings );
- deserializer.manufactureStrings( bytes, 0, (short)bytes.length );
+ deserializer.manufactureStrings( bytes, 0);
byte[] continueBytes = HexRead.readData( _test_file_path + File.separator +
"extendedtextstrings.txt", "rich-continue1" );
deserializer.processContinueRecord( continueBytes );
@@ -146,7 +146,7 @@
bytes = HexRead.readData( _test_file_path + File.separator +
"extendedtextstrings.txt", "norich-header" );
strings = new BinaryTree();
deserializer = new SSTDeserializer( strings );
- deserializer.manufactureStrings( bytes, 0, (short)bytes.length );
+ deserializer.manufactureStrings( bytes, 0);
continueBytes = HexRead.readData( _test_file_path + File.separator +
"extendedtextstrings.txt", "norich-continue1" );
deserializer.processContinueRecord( continueBytes );
1.7 +2 -2
jakarta-poi/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java
Index: TestSSTRecord.java
===================================================================
RCS file:
/home/cvs/jakarta-poi/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- TestSSTRecord.java 30 Apr 2003 04:39:04 -0000 1.6
+++ TestSSTRecord.java 14 Oct 2003 06:54:00 -0000 1.7
@@ -429,7 +429,7 @@
assertEquals( 1464, record.getNumStrings() );
assertEquals( 688, record.getNumUniqueStrings() );
assertEquals( 492, record.countStrings() );
- assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() );
+//jmh assertEquals( 1,
record.getDeserializer().getContinuationExpectedChars() );
assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai",
record.getDeserializer().getUnfinishedString() );
// assertEquals( 52, record.getDeserializer().getTotalLength() );
@@ -448,7 +448,7 @@
assertEquals( 0, record.getNumStrings() );
assertEquals( 0, record.getNumUniqueStrings() );
assertEquals( 0, record.countStrings() );
- assertEquals( 0, record.getDeserializer().getContinuationExpectedChars() );
+ assertEquals( 0, record.getDeserializer().getContinuationCharsRead() );
assertEquals( "", record.getDeserializer().getUnfinishedString() );
// assertEquals( 0, record.getDeserializer().getTotalLength() );
// assertEquals( 0, record.getDeserializer().getStringDataOffset() );
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]