This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4171b in repository https://gitbox.apache.org/repos/asf/tika.git
commit 63cbe674e409ed05bb810488084beddb3da43a35 Author: tallison <talli...@apache.org> AuthorDate: Sat Mar 23 15:19:28 2024 -0400 TIKA-4171 -- fix regression when field names are missing in the XFAExtractor --- .../src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java | 3 +++ .../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java index 14bb07b1d..a79e942e8 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java @@ -123,6 +123,9 @@ class XFAExtractor { (field.toolTip == null || field.toolTip.trim().length() == 0) ? fieldName : field.toolTip; String[] fieldValues = pdfObjRToValues.getValues(fieldName); + if (fieldValues.length == 0) { + fieldValues = new String[]{""}; + } for (String fieldValue : fieldValues) { AttributesImpl attrs = new AttributesImpl(); attrs.addAttribute("", "fieldName", "fieldName", "CDATA", fieldName); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 6e9167f37..6eb0b4a0a 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -986,7 +986,7 @@ public class PDFParserTest extends TikaTest { while (matcher.find()) { listItems++; } - assertEquals(24, listItems); + assertEquals(27, listItems); } @Test