Author: sergey
Date: Thu Aug 18 14:29:59 2011
New Revision: 1159244
URL: http://svn.apache.org/viewvc?rev=1159244&view=rev
Log:
fix 51678 -- Extracting text from Bug51524.zip is slow
Modified:
poi/trunk/src/documentation/content/xdocs/status.xml
poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL:
http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=1159244&r1=1159243&r2=1159244&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Thu Aug 18 14:29:59
2011
@@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta4" date="2011-??-??">
+ <action dev="poi-developers" type="fix">51678 - Extracting text
from Bug51524.zip is slow</action>
<action dev="poi-developers" type="fix">51671 - HWPFDocument.write
based on NPOIFSFileSystem throws a NullPointerException</action>
<action dev="poi-developers" type="add">support for tables and
hyperlinks in XSLF</action>
<action dev="poi-developers" type="fix">51535 - correct signed vs
unsigned short reading in NDocumentInputStream</action>
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java?rev=1159244&r1=1159243&r2=1159244&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java Thu
Aug 18 14:29:59 2011
@@ -21,6 +21,8 @@ import java.lang.ref.WeakReference;
import java.util.List;
import java.util.NoSuchElementException;
+import org.apache.poi.hwpf.model.BytePropertyNode;
+
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.model.CHPX;
@@ -770,16 +772,28 @@ public class Range { // TODO -instantiab
return null;
}
- int[] point = findRange( _paragraphs, _parStart,
- Math.max( chpx.getStart(), _start ), chpx.getEnd() );
-
- if ( point[0] >= _paragraphs.size() )
+ short istd;
+ if ( this instanceof Paragraph )
{
- return null;
+ istd = ((Paragraph) this)._istd;
}
+ else
+ {
+ int[] point = findRange( _paragraphs,
+ Math.max( chpx.getStart(), _start ),
+ Math.min( chpx.getEnd(), _end ) );
+
+ initParagraphs();
+ int parStart = Math.max( point[0], _parStart );
+
+ if ( parStart >= _paragraphs.size() )
+ {
+ return null;
+ }
- PAPX papx = _paragraphs.get( point[0] );
- short istd = papx.getIstd();
+ PAPX papx = _paragraphs.get( point[0] );
+ istd = papx.getIstd();
+ }
CharacterRun chp = new CharacterRun( chpx, _doc.getStyleSheet(), istd,
this );
@@ -924,7 +938,7 @@ public class Range { // TODO -instantiab
*/
private void initParagraphs() {
if (!_parRangeFound) {
- int[] point = findRange(_paragraphs, _parStart, _start,
_end);
+ int[] point = findRange(_paragraphs, _start, _end);
_parStart = point[0];
_parEnd = point[1];
_parRangeFound = true;
@@ -936,7 +950,7 @@ public class Range { // TODO -instantiab
*/
private void initCharacterRuns() {
if (!_charRangeFound) {
- int[] point = findRange(_characters, _charStart,
_start, _end);
+ int[] point = findRange(_characters, _start, _end);
_charStart = point[0];
_charEnd = point[1];
_charRangeFound = true;
@@ -955,6 +969,105 @@ public class Range { // TODO -instantiab
}
}
+ private static int binarySearchStart( List<? extends PropertyNode<?>> rpl,
+ int start )
+ {
+ if ( rpl.get( 0 ).getStart() >= start )
+ return 0;
+
+ int low = 0;
+ int high = rpl.size() - 1;
+
+ while ( low <= high )
+ {
+ int mid = ( low + high ) >>> 1;
+ PropertyNode<?> node = rpl.get( mid );
+
+ if ( node.getStart() < start )
+ {
+ low = mid + 1;
+ }
+ else if ( node.getStart() > start )
+ {
+ high = mid - 1;
+ }
+ else
+ {
+ assert node.getStart() == start;
+ return mid;
+ }
+ }
+ assert low != 0;
+ return low - 1;
+ }
+
+ private static int binarySearchEnd( List<? extends PropertyNode<?>> rpl,
+ int foundStart, int end )
+ {
+ if ( rpl.get( rpl.size() - 1 ).getEnd() <= end )
+ return rpl.size() - 1;
+
+ int low = foundStart;
+ int high = rpl.size() - 1;
+
+ while ( low <= high )
+ {
+ int mid = ( low + high ) >>> 1;
+ PropertyNode<?> node = rpl.get( mid );
+
+ if ( node.getEnd() < end )
+ {
+ low = mid + 1;
+ }
+ else if ( node.getEnd() > end )
+ {
+ high = mid - 1;
+ }
+ else
+ {
+ assert node.getEnd() == end;
+ return mid;
+ }
+ }
+ assert 0 <= low && low < rpl.size();
+
+ return low;
+ }
+
+ /**
+ * Used to find the list indexes of a particular property.
+ *
+ * @param rpl
+ * A list of property nodes.
+ * @param min
+ * A hint on where to start looking.
+ * @param start
+ * The starting character offset.
+ * @param end
+ * The ending character offset.
+ * @return An int array of length 2. The first int is the start index and
+ * the second int is the end index.
+ */
+ private int[] findRange( List<? extends PropertyNode<?>> rpl, int start,
+ int end )
+ {
+ int startIndex = binarySearchStart( rpl, start );
+ while ( startIndex > 0 && rpl.get( startIndex - 1 ).getStart() >=
start )
+ startIndex--;
+
+ int endIndex = binarySearchEnd( rpl, startIndex, end );
+ while ( endIndex < rpl.size() - 1
+ && rpl.get( endIndex + 1 ).getEnd() <= end )
+ endIndex--;
+
+ if ( startIndex < 0 || startIndex >= rpl.size()
+ || startIndex > endIndex || endIndex < 0
+ || endIndex >= rpl.size() )
+ throw new AssertionError();
+
+ return new int[] { startIndex, endIndex + 1 };
+ }
+
/**
* Used to find the list indexes of a particular property.
*
@@ -971,7 +1084,7 @@ public class Range { // TODO -instantiab
*/
private int[] findRange(List<? extends PropertyNode<?>> rpl, int min,
int start, int end) {
int x = min;
-
+
if ( rpl.size() == min )
return new int[] { min, min };
Modified:
poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java?rev=1159244&r1=1159243&r2=1159244&view=diff
==============================================================================
---
poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
(original)
+++
poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
Thu Aug 18 14:29:59 2011
@@ -42,7 +42,7 @@ public final class TestHWPFOldDocument e
// Check
assertEquals(1, doc.getRange().numSections());
assertEquals(1, doc.getRange().numParagraphs());
- assertEquals(1, doc.getRange().numCharacterRuns());
+ assertEquals(2, doc.getRange().numCharacterRuns());
assertEquals(
"The quick brown fox jumps over the lazy dog\r",
@@ -96,7 +96,7 @@ public final class TestHWPFOldDocument e
assertEquals(5, doc.getRange().getParagraph(4).numCharacterRuns());
assertEquals(1, doc.getRange().getParagraph(5).numCharacterRuns());
// Normal, superscript for 4th, normal
- assertEquals(3, doc.getRange().getParagraph(6).numCharacterRuns());
+ assertEquals(4, doc.getRange().getParagraph(6).numCharacterRuns());
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]