This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new cb08f56d9 TIKA-4442: add test
cb08f56d9 is described below
commit cb08f56d9670a32cff0c8dafab9c915033603a61
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Jun 25 09:40:35 2025 +0200
TIKA-4442: add test
---
.../apache/tika/parser/pdf/CustomTikaXMPTest.java | 17 ++++
.../resources/test-documents/xmp/TIKA-4442.xmp | 101 +++++++++++++++++++++
2 files changed, 118 insertions(+)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
index ded6cffd5..9b7907b4d 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
@@ -30,6 +30,7 @@ import org.apache.tika.TikaTest;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.PDF;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.utils.XMLReaderUtils;
@@ -68,6 +69,22 @@ public class CustomTikaXMPTest extends TikaTest {
metadata.getDate(PDF.PDFVT_MODIFIED).toInstant().toString());
}
+ /**
+ * TIKA-4442: Test unusual dublin core properties.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testDublinCore() throws Exception {
+ Metadata metadata = extract("TIKA-4442.xmp"); // test file based on
file 188032
+ assertEquals("research papers", metadata.get(TikaCoreProperties.TYPE));
+ assertEquals("doi:1234/S56789",
metadata.get(TikaCoreProperties.IDENTIFIER));
+ assertEquals("en", metadata.get(TikaCoreProperties.LANGUAGE));
+ assertEquals("International Union of Thinkology",
metadata.get(TikaCoreProperties.PUBLISHER));
+ assertEquals("Relation", metadata.get(TikaCoreProperties.RELATION));
+ assertEquals("Journal of Thinkology",
metadata.get(TikaCoreProperties.SOURCE));
+ }
+
private Metadata extract(String xmpFileName) throws IOException,
TikaException, SAXException {
try (InputStream is = getResourceAsStream("/test-documents/xmp/" +
xmpFileName)) {
Document doc = XMLReaderUtils.buildDOM(is);
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/xmp/TIKA-4442.xmp
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/xmp/TIKA-4442.xmp
new file mode 100644
index 000000000..8ff773629
--- /dev/null
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/xmp/TIKA-4442.xmp
@@ -0,0 +1,101 @@
+<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/">
+ <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+ xmlns:iX='http://ns.adobe.com/iX/1.0/'>
+ <rdf:Description rdf:about=""
+
xmlns:xapRights='http://ns.adobe.com/xap/1.0/rights/'>
+ <xapRights:Marked>True</xapRights:Marked>
+ <xapRights:UsageTerms>
+ <rdf:Alt>
+ <rdf:li
xml:lang="x-default">http://journals.iucr.org/services/termsofuse.html</rdf:li>
+ </rdf:Alt>
+ </xapRights:UsageTerms>
+ </rdf:Description>
+ <rdf:Description rdf:about=""
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <dc:identifier>doi:1234/S56789</dc:identifier>
+ <dc:source>Journal of Thinkology</dc:source>
+ <dc:type>
+ <rdf:Bag>
+ <rdf:li xml:lang="x-default">research
papers</rdf:li>
+ </rdf:Bag>
+ </dc:type>
+ <dc:format>application/pdf</dc:format>
+ <dc:title>
+ <rdf:Alt>
+ <rdf:li xml:lang="x-default">The
minimization of thoughts while raw dogging</rdf:li>
+ </rdf:Alt>
+ </dc:title>
+ <dc:language>
+ <rdf:Bag>
+ <rdf:li xml:lang="x-default">en</rdf:li>
+ </rdf:Bag>
+ </dc:language>
+ <dc:description>
+ <rdf:Alt>
+ <rdf:li xml:lang="x-default">Thinking:
is it needed?</rdf:li>
+ </rdf:Alt>
+ </dc:description>
+ <dc:date>
+ <rdf:Seq>
+ <rdf:li>1939-07-17</rdf:li>
+ </rdf:Seq>
+ </dc:date>
+ <dc:publisher>
+ <rdf:Bag>
+ <rdf:li
xml:lang="x-default">International Union of Thinkology</rdf:li>
+ </rdf:Bag>
+ </dc:publisher>
+ <dc:relation>
+ <rdf:Bag>
+ <rdf:li>Relation</rdf:li>
+ </rdf:Bag>
+ </dc:relation>
+ <dc:subject>
+ <rdf:Bag>
+ <rdf:li>THOUGHTS</rdf:li>
+ <rdf:li>HAPPINESS</rdf:li>
+ <rdf:li>FEAR</rdf:li>
+ <rdf:li>ANGER</rdf:li>
+ <rdf:li>DESPAIR</rdf:li>
+ </rdf:Bag>
+ </dc:subject>
+ <dc:creator>
+ <rdf:Seq>
+ <rdf:li>Dorothy</rdf:li>
+ <rdf:li>Toto</rdf:li>
+ <rdf:li>Scarecrow</rdf:li>
+ <rdf:li>Tin Man</rdf:li>
+ <rdf:li>Cowardly Lion</rdf:li>
+ </rdf:Seq>
+ </dc:creator>
+ <dc:rights>
+ <rdf:Alt>
+ <rdf:li xml:lang="x-default">Copyright
(c) 1939 International Union of Thinkology</rdf:li>
+ </rdf:Alt>
+ </dc:rights>
+ </rdf:Description>
+ </rdf:RDF>
+</x:xmpmeta>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<?xpacket end="r"?>