This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch TIKA-4756
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 4afc6b5009c2f33da19938ebf0b1bde1c2d9b69f
Author: tballison <[email protected]>
AuthorDate: Thu Jun 11 06:43:22 2026 +0200

    TIKA-4756 -- add HAS_SIGNATURE_FIELDS
---
 .../src/main/java/org/apache/tika/metadata/PDF.java      |  6 ++++++
 .../main/java/org/apache/tika/parser/pdf/PDFParser.java  | 14 ++++++++++----
 .../java/org/apache/tika/parser/pdf/PDFParserTest.java   | 16 +++++++++++++---
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java 
b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
index f852189365..51451e71df 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
@@ -151,6 +151,12 @@ public interface PDF {
      */
     Property HAS_ACROFORM_FIELDS = Property.internalBoolean(PDF_PREFIX + 
"hasAcroFormFields");
 
+    /**
+     * Has at least one AcroForm signature field (/FT /Sig), whether or not it 
has been signed.
+     * For documents that have been actually signed, see {@link 
TikaCoreProperties#HAS_SIGNATURE}.
+     */
+    Property HAS_SIGNATURE_FIELDS = Property.internalBoolean(PDF_PREFIX + 
"hasSignatureFields");
+
     Property HAS_MARKED_CONTENT = Property.internalBoolean(PDF_PREFIX + 
"hasMarkedContent");
 
     /**
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index f4e734f532..25aa853e54 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -53,6 +53,7 @@ import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup;
 import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormDefaultsProcessor;
 import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
 import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
+import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -398,13 +399,19 @@ public class PDFParser implements Parser, RenderingParser 
{
     }
 
     private void extractSignatures(PDDocument pdfDocument, Metadata metadata) {
+        List<PDSignatureField> sigFields = pdfDocument.getSignatureFields();
+        if (sigFields.isEmpty()) {
+            return;
+        }
+        metadata.set(PDF.HAS_SIGNATURE_FIELDS, true);
+
         boolean hasSignature = false;
-        for (PDSignature signature : pdfDocument.getSignatureDictionaries()) {
+        for (PDSignatureField sigField : sigFields) {
+            PDSignature signature = sigField.getSignature();
             if (signature == null) {
                 continue;
             }
             PDMetadataExtractor.addNotNull(signature.getName(), metadata, 
TikaCoreProperties.SIGNATURE_NAME);
-
             Calendar date = signature.getSignDate();
             if (date != null) {
                 metadata.add(TikaCoreProperties.SIGNATURE_DATE, date);
@@ -414,11 +421,10 @@ public class PDFParser implements Parser, RenderingParser 
{
             PDMetadataExtractor.addNotNull(signature.getLocation(), metadata, 
TikaCoreProperties.SIGNATURE_LOCATION);
             PDMetadataExtractor.addNotNull(signature.getReason(), metadata, 
TikaCoreProperties.SIGNATURE_REASON);
             hasSignature = true;
-
         }
 
         if (hasSignature) {
-            metadata.set(TikaCoreProperties.HAS_SIGNATURE, hasSignature);
+            metadata.set(TikaCoreProperties.HAS_SIGNATURE, true);
         }
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 947a45dbdd..987d2c7083 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -621,17 +621,27 @@ public class PDFParserTest extends TikaTest {
     //TIKA-1226
     @Test
     public void testSignatureInAcroForm() throws Exception {
-        //The current test doc does not contain any content in the signature 
area.
-        //This just tests that a RuntimeException is not thrown.
-        //TODO: find a better test file for this issue.
         XMLResult result = getXML("testPDF_acroform3.pdf");
         Metadata m = result.metadata;
         assertEquals("true", m.get(PDF.HAS_XMP));
         assertEquals("true", m.get(PDF.HAS_ACROFORM_FIELDS));
         assertEquals("false", m.get(PDF.HAS_XFA));
+        assertEquals("true", m.get(PDF.HAS_SIGNATURE_FIELDS));
+        assertNull(m.get(TikaCoreProperties.HAS_SIGNATURE));
         assertContains("<li>aTextField: TIKA-1226</li>", result.xml);
     }
 
+    //TIKA-4756
+    @Test
+    public void testUnsignedSignatureField() throws Exception {
+        // PDF has an AcroForm with /SigFlags 1 and a /Sig type field, but no 
actual signature value.
+        // Should detect the signature field but not report hasSignature.
+        Metadata m = getXML("testPDF_sigflags.pdf").metadata;
+        assertEquals("true", m.get(PDF.HAS_ACROFORM_FIELDS));
+        assertEquals("true", m.get(PDF.HAS_SIGNATURE_FIELDS));
+        assertNull(m.get(TikaCoreProperties.HAS_SIGNATURE));
+    }
+
     @Test
     public void testSingleCloseDoc() throws Exception {
         //TIKA-1341

Reply via email to