This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new cb08f56d9 TIKA-4442: add test
cb08f56d9 is described below

commit cb08f56d9670a32cff0c8dafab9c915033603a61
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Jun 25 09:40:35 2025 +0200

    TIKA-4442: add test
---
 .../apache/tika/parser/pdf/CustomTikaXMPTest.java  |  17 ++++
 .../resources/test-documents/xmp/TIKA-4442.xmp     | 101 +++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
index ded6cffd5..9b7907b4d 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
@@ -30,6 +30,7 @@ import org.apache.tika.TikaTest;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.PDF;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.utils.XMLReaderUtils;
 
@@ -68,6 +69,22 @@ public class CustomTikaXMPTest extends TikaTest {
                 metadata.getDate(PDF.PDFVT_MODIFIED).toInstant().toString());
     }
 
+    /**
+     * TIKA-4442: Test unusual dublin core properties.
+     * 
+     * @throws Exception 
+     */
+    @Test
+    public void testDublinCore() throws Exception {
+        Metadata metadata = extract("TIKA-4442.xmp"); // test file based on 
file 188032
+        assertEquals("research papers", metadata.get(TikaCoreProperties.TYPE));
+        assertEquals("doi:1234/S56789", 
metadata.get(TikaCoreProperties.IDENTIFIER));
+        assertEquals("en", metadata.get(TikaCoreProperties.LANGUAGE));
+        assertEquals("International Union of Thinkology", 
metadata.get(TikaCoreProperties.PUBLISHER));
+        assertEquals("Relation", metadata.get(TikaCoreProperties.RELATION));
+        assertEquals("Journal of Thinkology", 
metadata.get(TikaCoreProperties.SOURCE));
+    }
+
     private Metadata extract(String xmpFileName) throws IOException, 
TikaException, SAXException {
         try (InputStream is = getResourceAsStream("/test-documents/xmp/" + 
xmpFileName)) {
             Document doc = XMLReaderUtils.buildDOM(is);
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/xmp/TIKA-4442.xmp
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/xmp/TIKA-4442.xmp
new file mode 100644
index 000000000..8ff773629
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/xmp/TIKA-4442.xmp
@@ -0,0 +1,101 @@
+<?xpacket begin="﻾" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/">
+       <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+                xmlns:iX='http://ns.adobe.com/iX/1.0/'>
+               <rdf:Description rdf:about=""
+                                
xmlns:xapRights='http://ns.adobe.com/xap/1.0/rights/'>
+                       <xapRights:Marked>True</xapRights:Marked>
+                       <xapRights:UsageTerms>
+                               <rdf:Alt>
+                                       <rdf:li 
xml:lang="x-default">http://journals.iucr.org/services/termsofuse.html</rdf:li>
+                               </rdf:Alt>
+                       </xapRights:UsageTerms>
+               </rdf:Description>
+               <rdf:Description rdf:about=""
+                                xmlns:dc="http://purl.org/dc/elements/1.1/";>
+                       <dc:identifier>doi:1234/S56789</dc:identifier>
+                       <dc:source>Journal of Thinkology</dc:source>
+                       <dc:type>
+                               <rdf:Bag>
+                                       <rdf:li xml:lang="x-default">research 
papers</rdf:li>
+                               </rdf:Bag>
+                       </dc:type>
+                       <dc:format>application/pdf</dc:format>
+                       <dc:title>
+                               <rdf:Alt>
+                                       <rdf:li xml:lang="x-default">The 
minimization of thoughts while raw dogging</rdf:li>
+                               </rdf:Alt>
+                       </dc:title>
+                       <dc:language>
+                               <rdf:Bag>
+                                       <rdf:li xml:lang="x-default">en</rdf:li>
+                               </rdf:Bag>
+                       </dc:language>
+                       <dc:description>
+                               <rdf:Alt>
+                                       <rdf:li xml:lang="x-default">Thinking: 
is it needed?</rdf:li>
+                               </rdf:Alt>
+                       </dc:description>
+                       <dc:date>
+                               <rdf:Seq>
+                                       <rdf:li>1939-07-17</rdf:li>
+                               </rdf:Seq>
+                       </dc:date>
+                       <dc:publisher>
+                               <rdf:Bag>
+                                       <rdf:li 
xml:lang="x-default">International Union of Thinkology</rdf:li>
+                               </rdf:Bag>
+                       </dc:publisher>
+                       <dc:relation>
+                               <rdf:Bag>
+                                       <rdf:li>Relation</rdf:li>
+                               </rdf:Bag>
+                       </dc:relation>
+                       <dc:subject>
+                               <rdf:Bag>
+                                       <rdf:li>THOUGHTS</rdf:li>
+                                       <rdf:li>HAPPINESS</rdf:li>
+                                       <rdf:li>FEAR</rdf:li>
+                                       <rdf:li>ANGER</rdf:li>
+                                       <rdf:li>DESPAIR</rdf:li>
+                               </rdf:Bag>
+                       </dc:subject>
+                       <dc:creator>
+                               <rdf:Seq>
+                                       <rdf:li>Dorothy</rdf:li>
+                                       <rdf:li>Toto</rdf:li>
+                                       <rdf:li>Scarecrow</rdf:li>
+                                       <rdf:li>Tin Man</rdf:li>
+                                       <rdf:li>Cowardly Lion</rdf:li>
+                               </rdf:Seq>
+                       </dc:creator>
+                       <dc:rights>
+                               <rdf:Alt>
+                                       <rdf:li xml:lang="x-default">Copyright 
(c) 1939 International Union of Thinkology</rdf:li>
+                               </rdf:Alt>
+                       </dc:rights>
+               </rdf:Description>
+       </rdf:RDF>
+</x:xmpmeta>
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                                                                               
                     
+                           
+<?xpacket end="r"?>

Reply via email to