This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 185714ebf TIKA-4327: improve my TODO comment
185714ebf is described below
commit 185714ebf16a68f57d6d43672bae2fff91d6f9f9
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Jan 15 10:25:48 2025 +0100
TIKA-4327: improve my TODO comment
---
.../main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java | 2 ++
1 file changed, 2 insertions(+)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
index 5ee8cbab6..7d8386eeb 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
@@ -280,6 +280,8 @@ public class PDFMarkedContent2XHTML extends PDF2XHTML {
// and maybe dereference COSObject first, i.e. before the first
"if"?
// No, because we're using the object key for a map
// However, we could replace ObjectRef with COSBase for
currentPageRef.
+ // This way we could also get rid of findPages because that logic
is in the
+ // iterator of PageTree which we get by calling
PDDocument.getPages()
COSDictionary dict = (COSDictionary) ((COSObject)
kids).getObject();
COSName type = dict.getCOSName(COSName.TYPE);
if (COSName.OBJR.equals(type)) {