This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4426 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 40468104b633e64673f753a2c9a22877a9e401ce Author: tallison <[email protected]> AuthorDate: Fri May 30 09:27:42 2025 -0400 TIKA-4426 -- add img: prefix to unknown metadata keys in image formats --- .../tika/parser/image/ImageMetadataExtractor.java | 8 +- .../org/apache/tika/parser/image/ImageParser.java | 20 ++- .../apache/tika/parser/image/HeifParserTest.java | 6 +- .../parser/image/ImageMetadataExtractorTest.java | 2 +- .../apache/tika/parser/image/ImageParserTest.java | 134 ++++++++++----------- .../apache/tika/parser/image/JpegParserTest.java | 2 +- .../apache/tika/parser/image/WebPParserTest.java | 8 +- .../tika/parser/ocr/TesseractOCRParserTest.java | 7 +- .../org/apache/tika/parser/pdf/PDFParserTest.java | 5 +- 9 files changed, 95 insertions(+), 97 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java index cabf2345b..69f271940 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java @@ -77,12 +77,14 @@ import org.apache.tika.utils.XMLReaderUtils; * As of 2.8.0 the library supports webp. */ public class ImageMetadataExtractor { + //TODO: add this to the signatures from the actual parse private static final ParseContext EMPTY_PARSE_CONTEXT = new ParseContext(); private static final String GEO_DECIMAL_FORMAT_STRING = "#.######"; // 6 dp seems to be reasonable - private static final String ICC_NS = "ICC" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER; + public static final String UNKNOWN_IMG_NS = "img" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER; + public static final String ICC_NS = "ICC" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER; private final Metadata metadata; private DirectoryHandler[] handlers; @@ -292,11 +294,11 @@ public class ImageMetadataExtractor { value = Boolean.FALSE.toString(); } if (directory instanceof ExifDirectoryBase) { - metadata.set(directory.getName() + ":" + name, value); + metadata.set(UNKNOWN_IMG_NS + directory.getName() + ":" + name, value); } else if (directory instanceof IccDirectory) { metadata.set(ICC_NS + name, value); } else { - metadata.set(name, value); + metadata.set(UNKNOWN_IMG_NS + name, value); } } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java index 9fea04e16..0bee1042c 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java @@ -70,12 +70,6 @@ public class ImageParser extends AbstractImageParser { private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(TMP_SUPPORTED); - private static void setIfPresent(Metadata metadata, String imageIOkey, String tikaKey) { - if (metadata.get(imageIOkey) != null) { - metadata.set(tikaKey, metadata.get(imageIOkey)); - } - } - private static void setIfPresent(Metadata metadata, String imageIOkey, Property tikaProp) { if (metadata.get(imageIOkey) != null) { String v = metadata.get(imageIOkey); @@ -112,7 +106,7 @@ public class ImageParser extends AbstractImageParser { int length = map.getLength(); if (length == 1) { - metadata.add(parents, normalize(map.item(0).getNodeValue())); + metadata.add(ImageMetadataExtractor.UNKNOWN_IMG_NS + parents, normalize(map.item(0).getNodeValue())); } else if (length > 1) { StringBuilder value = new StringBuilder(); for (int i = 0; i < length; i++) { @@ -124,7 +118,7 @@ public class ImageParser extends AbstractImageParser { value.append("="); value.append(normalize(attr.getNodeValue())); } - metadata.add(parents, value.toString()); + metadata.add(ImageMetadataExtractor.UNKNOWN_IMG_NS + parents, value.toString()); } } @@ -181,8 +175,8 @@ public class ImageParser extends AbstractImageParser { } metadata.set(Metadata.IMAGE_WIDTH, Integer.toString(reader.getWidth(0))); metadata.set(Metadata.IMAGE_LENGTH, Integer.toString(reader.getHeight(0))); - metadata.set("height", Integer.toString(reader.getHeight(0))); - metadata.set("width", Integer.toString(reader.getWidth(0))); + metadata.set(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height", Integer.toString(reader.getHeight(0))); + metadata.set(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width", Integer.toString(reader.getWidth(0))); loadMetadata(reader.getImageMetadata(0), metadata); } @@ -193,10 +187,10 @@ public class ImageParser extends AbstractImageParser { // Translate certain Metadata tags from the ImageIO // specific namespace into the general Tika one - setIfPresent(metadata, "CommentExtensions CommentExtension", + setIfPresent(metadata, ImageMetadataExtractor.UNKNOWN_IMG_NS + "CommentExtensions CommentExtension", TikaCoreProperties.COMMENTS); - setIfPresent(metadata, "markerSequence com", TikaCoreProperties.COMMENTS); - setIfPresent(metadata, "Data BitsPerSample", Metadata.BITS_PER_SAMPLE); + setIfPresent(metadata, ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence com", TikaCoreProperties.COMMENTS); + setIfPresent(metadata, ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data BitsPerSample", Metadata.BITS_PER_SAMPLE); } catch (IIOException e) { // TIKA-619: There is a known bug in the Sun API when dealing with GIF images // which Tika will just ignore. diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java index 6f023a441..0f72082aa 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java @@ -48,9 +48,9 @@ public class HeifParserTest { parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); - assertEquals("heic", metadata.get("Major Brand")); - assertEquals("512 pixels", metadata.get("Width")); - assertEquals("512 pixels", metadata.get("Height")); + assertEquals("heic", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Major Brand")); + assertEquals("512 pixels", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Width")); + assertEquals("512 pixels", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Height")); assertEquals("image/heic", metadata.get(Metadata.CONTENT_TYPE)); assertEquals("23.177917", metadata.get(Metadata.LATITUDE)); assertEquals("113.394317", metadata.get(Metadata.LONGITUDE)); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java index ab8f0c805..267659c00 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java @@ -129,7 +129,7 @@ public class ImageMetadataExtractorTest { Mockito.when(d.getTags()).thenReturn(tags); Metadata metadata = new Metadata(); new ImageMetadataExtractor.CopyUnknownFieldsHandler().handle(d, metadata); - assertEquals("t1", metadata.get("Image Description")); + assertEquals("t1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Image Description")); assertNull(metadata.get(TikaCoreProperties.SUBJECT), "keywords should be excluded from bulk copy because it is a defined field"); assertNull(metadata.get(TikaCoreProperties.DESCRIPTION)); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java index 085c7bd81..b66ef1c12 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java @@ -42,14 +42,14 @@ public class ImageParserTest extends TikaTest { parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); } - assertEquals("75", metadata.get("height")); - assertEquals("100", metadata.get("width")); - assertEquals("8 8 8", metadata.get("Data BitsPerSample")); - assertEquals("1.0", metadata.get("Dimension PixelAspectRatio")); + assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height")); + assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width")); + assertEquals("8 8 8", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data BitsPerSample")); + assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension PixelAspectRatio")); //TODO: figure out why we're getting 0.35273367 in Ubuntu, but not Windows //assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing")); //assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing")); - assertEquals("BI_RGB", metadata.get("Compression CompressionTypeName")); + assertEquals("BI_RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName")); assertEquals("image/bmp", metadata.get("Content-Type")); assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH)); @@ -66,31 +66,31 @@ public class ImageParserTest extends TikaTest { parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); } - assertEquals("75", metadata.get("height")); - assertEquals("100", metadata.get("width")); - assertEquals("true", metadata.get("Compression Lossless")); - assertEquals("Normal", metadata.get("Dimension ImageOrientation")); - assertEquals("lzw", metadata.get("Compression CompressionTypeName")); - assertEquals("0", metadata.get("Dimension HorizontalPixelOffset")); + assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height")); + assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width")); + assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless")); + assertEquals("Normal", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension ImageOrientation")); + assertEquals("lzw", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName")); + assertEquals("0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension HorizontalPixelOffset")); assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, " + - "imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor")); - assertEquals("Index", metadata.get("Data SampleFormat")); - assertEquals("3", metadata.get("Chroma NumChannels")); - assertEquals("1", metadata.get("Compression NumProgressiveScans")); - assertEquals("RGB", metadata.get("Chroma ColorSpaceType")); + "imageHeight=75, interlaceFlag=false", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "ImageDescriptor")); + assertEquals("Index", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat")); + assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma NumChannels")); + assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression NumProgressiveScans")); + assertEquals("RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType")); assertEquals("Licensed to the Apache Software Foundation (ASF) under " + "one or more contributor license agreements. See the NOTICE file " + "distributed with this work for additional information regarding " + - "copyright ownership.", metadata.get("CommentExtensions CommentExtension")); + "copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "CommentExtensions CommentExtension")); assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one " + "or more contributor license agreements. See the NOTICE file " + "distributed with this work for additional information regarding " + "copyright ownership., encoding=ISO-8859-1, compression=none", - metadata.get("Text TextEntry")); - assertEquals("true", metadata.get("Chroma BlackIsZero")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text TextEntry")); + assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma BlackIsZero")); assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, " + - "delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension")); - assertEquals("0", metadata.get("Dimension VerticalPixelOffset")); + "delayTime=0, transparentColorIndex=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "GraphicControlExtension")); + assertEquals("0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension VerticalPixelOffset")); assertEquals("image/gif", metadata.get("Content-Type")); assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH)); @@ -110,40 +110,40 @@ public class ImageParserTest extends TikaTest { parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); } - assertEquals("75", metadata.get("height")); - assertEquals("100", metadata.get("width")); - assertEquals("0.35277778", metadata.get("Dimension VerticalPixelSize")); - assertEquals("false", metadata.get("Compression Lossless")); - assertEquals("class=0, htableId=0", metadata.get("markerSequence dht dhtable")); + assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height")); + assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width")); + assertEquals("0.35277778", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension VerticalPixelSize")); + assertEquals("false", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless")); + assertEquals("class=0, htableId=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence dht dhtable")); assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, " + - "Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get("JPEGvariety app0JFIF")); - assertEquals("225", metadata.get("markerSequence unknown")); + "Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "JPEGvariety app0JFIF")); + assertEquals("225", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence unknown")); assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0", - metadata.get("markerSequence sos scanComponentSpec")); - assertEquals("normal", metadata.get("Dimension ImageOrientation")); - assertEquals("1.0", metadata.get("Dimension PixelAspectRatio")); - assertEquals("elementPrecision=0, qtableId=0", metadata.get("markerSequence dqt dqtable")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sos scanComponentSpec")); + assertEquals("normal", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension ImageOrientation")); + assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension PixelAspectRatio")); + assertEquals("elementPrecision=0, qtableId=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence dqt dqtable")); assertEquals("numScanComponents=3, startSpectralSelection=0, " + "endSpectralSelection=63, approxHigh=0, approxLow=0", - metadata.get("markerSequence sos")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sos")); assertEquals("componentId=1, HsamplingFactor=1, " + "VsamplingFactor=1, QtableSelector=0", - metadata.get("markerSequence sof componentSpec")); - assertEquals("JPEG", metadata.get("Compression CompressionTypeName")); - assertEquals("0.35277778", metadata.get("Dimension HorizontalPixelSize")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sof componentSpec")); + assertEquals("JPEG", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName")); + assertEquals("0.35277778", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension HorizontalPixelSize")); assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " + "more contributor license agreements. See the NOTICE file " + "distributed with this work for additional information " + - "regarding copyright ownership.", metadata.get("markerSequence com")); - assertEquals("3", metadata.get("Chroma NumChannels")); - assertEquals("1", metadata.get("Compression NumProgressiveScans")); - assertEquals("YCbCr", metadata.get("Chroma ColorSpaceType")); + "regarding copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence com")); + assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma NumChannels")); + assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression NumProgressiveScans")); + assertEquals("YCbCr", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType")); assertEquals("keyword=comment, value=Licensed to the Apache Software Foundation " + "(ASF) under one or more contributor license agreements. See the NOTICE" + " file distributed with this work for additional information regarding " + - "copyright ownership.", metadata.get("Text TextEntry")); + "copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text TextEntry")); assertEquals("image/jpeg", metadata.get("Content-Type")); assertEquals("process=0, samplePrecision=8, numLines=75, samplesPerLine=100, " + - "numFrameComponents=3", metadata.get("markerSequence sof")); + "numFrameComponents=3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sof")); assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH)); assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH)); @@ -162,41 +162,41 @@ public class ImageParserTest extends TikaTest { parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); } - assertEquals("75", metadata.get("height")); - assertEquals("100", metadata.get("width")); - assertEquals("0.35273367", metadata.get("Dimension VerticalPixelSize")); - assertEquals("8 8 8", metadata.get("Data BitsPerSample")); - assertEquals("Perceptual", metadata.get("sRGB")); - assertEquals("true", metadata.get("Compression Lossless")); + assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height")); + assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width")); + assertEquals("0.35273367", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension VerticalPixelSize")); + assertEquals("8 8 8", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data BitsPerSample")); + assertEquals("Perceptual", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "sRGB")); + assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless")); assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", - metadata.get("tIME")); - assertEquals("Normal", metadata.get("Dimension ImageOrientation")); - assertEquals("1.0", metadata.get("Dimension PixelAspectRatio")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "tIME")); + assertEquals("Normal", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension ImageOrientation")); + assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension PixelAspectRatio")); assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " + "(ASF) under one or more contributor license agreements. See the " + "NOTICE file distributed with this work for additional information " + - "regarding copyright ownership.", metadata.get("tEXt tEXtEntry")); - assertEquals("deflate", metadata.get("Compression CompressionTypeName")); - assertEquals("UnsignedIntegral", metadata.get("Data SampleFormat")); - assertEquals("0.35273367", metadata.get("Dimension HorizontalPixelSize")); - assertEquals("none", metadata.get("Transparency Alpha")); + "regarding copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "tEXt tEXtEntry")); + assertEquals("deflate", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName")); + assertEquals("UnsignedIntegral", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat")); + assertEquals("0.35273367", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension HorizontalPixelSize")); + assertEquals("none", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Transparency Alpha")); assertEquals("pixelsPerUnitXAxis=2835, pixelsPerUnitYAxis=2835, unitSpecifier=meter", - metadata.get("pHYs")); - assertEquals("3", metadata.get("Chroma NumChannels")); - assertEquals("1", metadata.get("Compression NumProgressiveScans")); - assertEquals("RGB", metadata.get("Chroma ColorSpaceType")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "pHYs")); + assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma NumChannels")); + assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression NumProgressiveScans")); + assertEquals("RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType")); assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " + "(ASF) under one or more contributor license agreements. See the " + "NOTICE file distributed with this work for additional information " + "regarding copyright ownership., encoding=ISO-8859-1, compression=none", - metadata.get("Text TextEntry")); - assertEquals("PixelInterleaved", metadata.get("Data PlanarConfiguration")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text TextEntry")); + assertEquals("PixelInterleaved", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data PlanarConfiguration")); assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, " + "compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none", - metadata.get("IHDR")); - assertEquals("true", metadata.get("Chroma BlackIsZero")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "IHDR")); + assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma BlackIsZero")); assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", - metadata.get("Document ImageModificationTime")); + metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Document ImageModificationTime")); assertEquals("image/png", metadata.get("Content-Type")); assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH)); @@ -212,8 +212,8 @@ public class ImageParserTest extends TikaTest { try (InputStream stream = getResourceAsStream("/test-documents/testJBIG2.jb2")) { parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); } - assertEquals("78", metadata.get("height")); - assertEquals("328", metadata.get("width")); + assertEquals("78", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height")); + assertEquals("328", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width")); assertEquals("image/x-jbig2", metadata.get("Content-Type")); assertEquals(1, metadata.getInt(TikaCoreProperties.NUM_IMAGES)); } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java index 9775b9464..76a7367cb 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java @@ -73,7 +73,7 @@ public class JpegParserTest { // Check that EXIF/TIFF tags come through with their raw values too // (This may be removed for Tika 1.0, as we support more of them // with explicit Metadata entries) - assertEquals("Canon EOS 40D", metadata.get("Exif IFD0:Model")); + assertEquals("Canon EOS 40D", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Exif IFD0:Model")); // Common tags assertEquals("2009-10-02T23:02:49", metadata.get(TikaCoreProperties.MODIFIED)); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java index 60baf04e6..0eb8cc02d 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java @@ -41,10 +41,10 @@ public class WebPParserTest extends TikaTest { public void testSimple() throws Exception { Metadata metadata = getXML("testWebp_Alpha_Lossy.webp").metadata; - assertEquals("301", metadata.get("Image Height")); - assertEquals("400", metadata.get("Image Width")); - assertEquals("true", metadata.get("Has Alpha")); - assertEquals("false", metadata.get("Is Animation")); + assertEquals("301", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Image Height")); + assertEquals("400", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Image Width")); + assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Has Alpha")); + assertEquals("false", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Is Animation")); assertEquals("image/webp", metadata.get(Metadata.CONTENT_TYPE)); metadata = getXML("testWebp_Alpha_Lossless.webp").metadata; diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java index 7a898e3f6..f8f3cf878 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java @@ -37,6 +37,7 @@ import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.DefaultParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; +import org.apache.tika.parser.image.ImageMetadataExtractor; import org.apache.tika.parser.image.ImageParser; import org.apache.tika.parser.pdf.PDFParserConfig; import org.apache.tika.sax.BasicContentHandlerFactory; @@ -220,7 +221,7 @@ public class TesseractOCRParserTest extends TikaTest { //gif Metadata m = getXML("testGIF.gif").metadata; assertTrue(m.names().length > 20); - assertEquals("RGB", m.get("Chroma ColorSpaceType")); + assertEquals("RGB", m.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType")); //jpg m = getXML("testOCR.jpg").metadata; @@ -239,13 +240,13 @@ public class TesseractOCRParserTest extends TikaTest { m = getXML("testPNG.png").metadata; assertEquals("100", m.get(Metadata.IMAGE_WIDTH)); assertEquals("75", m.get(Metadata.IMAGE_LENGTH)); - assertEquals("UnsignedIntegral", m.get("Data SampleFormat")); + assertEquals("UnsignedIntegral", m.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat")); //tiff m = getXML("testTIFF.tif").metadata; assertEquals("100", m.get(Metadata.IMAGE_WIDTH)); assertEquals("75", m.get(Metadata.IMAGE_LENGTH)); - assertEquals("72 dots per inch", m.get("Exif IFD0:Y Resolution")); + assertEquals("72 dots per inch", m.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Exif IFD0:Y Resolution")); } @Test diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 78f54c4f1..49b0042cb 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -49,6 +49,7 @@ import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.parser.RecursiveParserWrapper; import org.apache.tika.parser.external.ExternalParser; +import org.apache.tika.parser.image.ImageMetadataExtractor; import org.apache.tika.parser.ocr.TesseractOCRConfig; import org.apache.tika.parser.ocr.TesseractOCRParser; import org.apache.tika.parser.xml.XMLProfiler; @@ -355,8 +356,8 @@ public class PDFParserTest extends TikaTest { fail("Exception: " + metadatas.get(1).get(key)); } } - assertEquals("91", metadatas.get(1).get("height")); - assertEquals("352", metadatas.get(1).get("width")); + assertEquals("91", metadatas.get(1).get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height")); + assertEquals("352", metadatas.get(1).get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width")); assertNull(metadatas.get(0).get(TikaCoreProperties.RESOURCE_NAME_KEY)); assertEquals("image0.jb2", metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
