This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4171b
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 63cbe674e409ed05bb810488084beddb3da43a35
Author: tallison <talli...@apache.org>
AuthorDate: Sat Mar 23 15:19:28 2024 -0400

    TIKA-4171 -- fix regression when field names are missing in the XFAExtractor
---
 .../src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java         | 3 +++
 .../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java        | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
index 14bb07b1d..a79e942e8 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
@@ -123,6 +123,9 @@ class XFAExtractor {
                     (field.toolTip == null || field.toolTip.trim().length() == 
0) ? fieldName :
                             field.toolTip;
             String[] fieldValues = pdfObjRToValues.getValues(fieldName);
+            if (fieldValues.length == 0) {
+                fieldValues = new String[]{""};
+            }
             for (String fieldValue : fieldValues) {
                 AttributesImpl attrs = new AttributesImpl();
                 attrs.addAttribute("", "fieldName", "fieldName", "CDATA", 
fieldName);
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 6e9167f37..6eb0b4a0a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -986,7 +986,7 @@ public class PDFParserTest extends TikaTest {
         while (matcher.find()) {
             listItems++;
         }
-        assertEquals(24, listItems);
+        assertEquals(27, listItems);
     }
 
     @Test

Reply via email to