This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch branch_2x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_2x by this push:
new 82421ed60 TIKA-4311: simplify
82421ed60 is described below
commit 82421ed6035dbf5f1b8d880b2fdace009d66b87f
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Sep 18 08:47:51 2024 +0200
TIKA-4311: simplify
---
.../java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
index b745fea02..cc829fc07 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
@@ -37,7 +37,6 @@ import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
-import
org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDObjectReference;
import
org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
import
org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.apache.pdfbox.text.PDFMarkedContentExtractor;
@@ -281,9 +280,8 @@ public class PDFMarkedContent2XHTML extends PDF2XHTML {
// and maybe dereference COSObject first, i.e. before the first
"if"?
COSDictionary dict = (COSDictionary) ((COSObject)
kids).getObject();
COSName type = dict.getCOSName(COSName.TYPE);
- if (COSName.getPDFName(PDObjectReference.TYPE).equals(type)) //
OBJR
- {
- recurse(dict.getDictionaryObject(COSName.OBJ),
currentPageRef,depth + 1, paragraphs,
+ if (COSName.OBJR.equals(type)) {
+ recurse(dict.getDictionaryObject(COSName.OBJ), currentPageRef,
depth + 1, paragraphs,
roleMap);
}