This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new e2ec95baa TIKA-4299 -- clean up pagination in AbstractPDF2XHTML.
(#1924)
e2ec95baa is described below
commit e2ec95baa7b64243caeff2e44a73902da52d9104
Author: Tim Allison <[email protected]>
AuthorDate: Wed Aug 21 09:07:57 2024 -0400
TIKA-4299 -- clean up pagination in AbstractPDF2XHTML. (#1924)
---
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 22 ----------------------
1 file changed, 22 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 53e28514f..f9a6c27ff 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -180,7 +180,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
private final Set<COSBase> extractedFiles = new HashSet<>();
//zero-based pageIndex
int pageIndex = 0;
- int startPage = -1;
//private in PDFTextStripper...must have own copy because we override
processpages
int unmappedUnicodeCharsPerPage = 0;
int totalCharsPerPage = 0;
@@ -1358,18 +1357,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
*/
@Override
protected void processPages(PDPageTree pages) throws IOException {
- //we currently need this hack because we aren't able to increment
- //the private currentPageNo in PDFTextStripper,
- //and PDFTextStripper's processPage relies on that variable
- //being >= startPage when deciding whether or not to process a page
- // See:
- // if (currentPageNo >= startPage && currentPageNo <= endPage
- // && (startBookmarkPageNumber == -1 ||
- // currentPageNo >= startBookmarkPageNumber)
- // && (endBookmarkPageNumber == -1 ||
- // currentPageNo <= endBookmarkPageNumber))
- // {
- super.setStartPage(1);
for (PDPage page : pages) {
if (getCurrentPageNo() >= getStartPage() && getCurrentPageNo() <=
getEndPage()) {
processPage(page);
@@ -1392,15 +1379,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
"to implement this.");
}
- @Override
- public int getStartPage() {
- return startPage;
- }
-
- @Override
- public void setStartPage(int startPage) {
- this.startPage = startPage;
- }
@Override
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code,