This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 677912188 TIKA-4426 -- add img: prefix to unknown metadata keys in
image formats (#2227)
677912188 is described below
commit 6779121885135e771b44fa9d3c47f5759f99f05f
Author: Tim Allison <[email protected]>
AuthorDate: Fri May 30 09:48:37 2025 -0400
TIKA-4426 -- add img: prefix to unknown metadata keys in image formats
(#2227)
---
.../tika/parser/image/ImageMetadataExtractor.java | 8 +-
.../org/apache/tika/parser/image/ImageParser.java | 20 ++-
.../apache/tika/parser/image/HeifParserTest.java | 6 +-
.../parser/image/ImageMetadataExtractorTest.java | 2 +-
.../apache/tika/parser/image/ImageParserTest.java | 134 ++++++++++-----------
.../apache/tika/parser/image/JpegParserTest.java | 2 +-
.../apache/tika/parser/image/WebPParserTest.java | 8 +-
.../tika/parser/ocr/TesseractOCRParserTest.java | 7 +-
.../org/apache/tika/parser/pdf/PDFParserTest.java | 5 +-
9 files changed, 95 insertions(+), 97 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
index cabf2345b..69f271940 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
@@ -77,12 +77,14 @@ import org.apache.tika.utils.XMLReaderUtils;
* As of 2.8.0 the library supports webp.
*/
public class ImageMetadataExtractor {
+
//TODO: add this to the signatures from the actual parse
private static final ParseContext EMPTY_PARSE_CONTEXT = new ParseContext();
private static final String GEO_DECIMAL_FORMAT_STRING = "#.######";
// 6 dp seems to be reasonable
- private static final String ICC_NS = "ICC" +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
+ public static final String UNKNOWN_IMG_NS = "img" +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
+ public static final String ICC_NS = "ICC" +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
private final Metadata metadata;
private DirectoryHandler[] handlers;
@@ -292,11 +294,11 @@ public class ImageMetadataExtractor {
value = Boolean.FALSE.toString();
}
if (directory instanceof ExifDirectoryBase) {
- metadata.set(directory.getName() + ":" + name,
value);
+ metadata.set(UNKNOWN_IMG_NS + directory.getName()
+ ":" + name, value);
} else if (directory instanceof IccDirectory) {
metadata.set(ICC_NS + name, value);
} else {
- metadata.set(name, value);
+ metadata.set(UNKNOWN_IMG_NS + name, value);
}
}
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
index 9fea04e16..0bee1042c 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
@@ -70,12 +70,6 @@ public class ImageParser extends AbstractImageParser {
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(TMP_SUPPORTED);
- private static void setIfPresent(Metadata metadata, String imageIOkey,
String tikaKey) {
- if (metadata.get(imageIOkey) != null) {
- metadata.set(tikaKey, metadata.get(imageIOkey));
- }
- }
-
private static void setIfPresent(Metadata metadata, String imageIOkey,
Property tikaProp) {
if (metadata.get(imageIOkey) != null) {
String v = metadata.get(imageIOkey);
@@ -112,7 +106,7 @@ public class ImageParser extends AbstractImageParser {
int length = map.getLength();
if (length == 1) {
- metadata.add(parents, normalize(map.item(0).getNodeValue()));
+ metadata.add(ImageMetadataExtractor.UNKNOWN_IMG_NS + parents,
normalize(map.item(0).getNodeValue()));
} else if (length > 1) {
StringBuilder value = new StringBuilder();
for (int i = 0; i < length; i++) {
@@ -124,7 +118,7 @@ public class ImageParser extends AbstractImageParser {
value.append("=");
value.append(normalize(attr.getNodeValue()));
}
- metadata.add(parents, value.toString());
+ metadata.add(ImageMetadataExtractor.UNKNOWN_IMG_NS + parents,
value.toString());
}
}
@@ -181,8 +175,8 @@ public class ImageParser extends AbstractImageParser {
}
metadata.set(Metadata.IMAGE_WIDTH,
Integer.toString(reader.getWidth(0)));
metadata.set(Metadata.IMAGE_LENGTH,
Integer.toString(reader.getHeight(0)));
- metadata.set("height",
Integer.toString(reader.getHeight(0)));
- metadata.set("width",
Integer.toString(reader.getWidth(0)));
+ metadata.set(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"height", Integer.toString(reader.getHeight(0)));
+ metadata.set(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"width", Integer.toString(reader.getWidth(0)));
loadMetadata(reader.getImageMetadata(0), metadata);
}
@@ -193,10 +187,10 @@ public class ImageParser extends AbstractImageParser {
// Translate certain Metadata tags from the ImageIO
// specific namespace into the general Tika one
- setIfPresent(metadata, "CommentExtensions CommentExtension",
+ setIfPresent(metadata, ImageMetadataExtractor.UNKNOWN_IMG_NS +
"CommentExtensions CommentExtension",
TikaCoreProperties.COMMENTS);
- setIfPresent(metadata, "markerSequence com",
TikaCoreProperties.COMMENTS);
- setIfPresent(metadata, "Data BitsPerSample",
Metadata.BITS_PER_SAMPLE);
+ setIfPresent(metadata, ImageMetadataExtractor.UNKNOWN_IMG_NS +
"markerSequence com", TikaCoreProperties.COMMENTS);
+ setIfPresent(metadata, ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Data BitsPerSample", Metadata.BITS_PER_SAMPLE);
} catch (IIOException e) {
// TIKA-619: There is a known bug in the Sun API when dealing with
GIF images
// which Tika will just ignore.
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java
index 6f023a441..0f72082aa 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/HeifParserTest.java
@@ -48,9 +48,9 @@ public class HeifParserTest {
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
- assertEquals("heic", metadata.get("Major Brand"));
- assertEquals("512 pixels", metadata.get("Width"));
- assertEquals("512 pixels", metadata.get("Height"));
+ assertEquals("heic",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Major Brand"));
+ assertEquals("512 pixels",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Width"));
+ assertEquals("512 pixels",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Height"));
assertEquals("image/heic", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("23.177917", metadata.get(Metadata.LATITUDE));
assertEquals("113.394317", metadata.get(Metadata.LONGITUDE));
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
index ab8f0c805..267659c00 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
@@ -129,7 +129,7 @@ public class ImageMetadataExtractorTest {
Mockito.when(d.getTags()).thenReturn(tags);
Metadata metadata = new Metadata();
new ImageMetadataExtractor.CopyUnknownFieldsHandler().handle(d,
metadata);
- assertEquals("t1", metadata.get("Image Description"));
+ assertEquals("t1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Image Description"));
assertNull(metadata.get(TikaCoreProperties.SUBJECT),
"keywords should be excluded from bulk copy because it is a
defined field");
assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
index 085c7bd81..b66ef1c12 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
@@ -42,14 +42,14 @@ public class ImageParserTest extends TikaTest {
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
}
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
- assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "height"));
+ assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "width"));
+ assertEquals("8 8 8",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data BitsPerSample"));
+ assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Dimension PixelAspectRatio"));
//TODO: figure out why we're getting 0.35273367 in Ubuntu, but not
Windows
//assertEquals("0", metadata.get("Dimension
VerticalPhysicalPixelSpacing"));
//assertEquals("0", metadata.get("Dimension
HorizontalPhysicalPixelSpacing"));
- assertEquals("BI_RGB", metadata.get("Compression
CompressionTypeName"));
+ assertEquals("BI_RGB",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression
CompressionTypeName"));
assertEquals("image/bmp", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
@@ -66,31 +66,31 @@ public class ImageParserTest extends TikaTest {
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
}
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("true", metadata.get("Compression Lossless"));
- assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
- assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
- assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
+ assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "height"));
+ assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "width"));
+ assertEquals("true",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless"));
+ assertEquals("Normal",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension
ImageOrientation"));
+ assertEquals("lzw", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Compression CompressionTypeName"));
+ assertEquals("0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Dimension HorizontalPixelOffset"));
assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100,
" +
- "imageHeight=75, interlaceFlag=false",
metadata.get("ImageDescriptor"));
- assertEquals("Index", metadata.get("Data SampleFormat"));
- assertEquals("3", metadata.get("Chroma NumChannels"));
- assertEquals("1", metadata.get("Compression NumProgressiveScans"));
- assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
+ "imageHeight=75, interlaceFlag=false",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "ImageDescriptor"));
+ assertEquals("Index",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat"));
+ assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Chroma NumChannels"));
+ assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Compression NumProgressiveScans"));
+ assertEquals("RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Chroma ColorSpaceType"));
assertEquals("Licensed to the Apache Software Foundation (ASF) under "
+
"one or more contributor license agreements. See the NOTICE
file " +
"distributed with this work for additional information
regarding " +
- "copyright ownership.", metadata.get("CommentExtensions
CommentExtension"));
+ "copyright ownership.",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "CommentExtensions
CommentExtension"));
assertEquals("value=Licensed to the Apache Software Foundation (ASF)
under one " +
"or more contributor license agreements. See the
NOTICE file " +
"distributed with this work for additional information
regarding " +
"copyright ownership., encoding=ISO-8859-1,
compression=none",
- metadata.get("Text TextEntry"));
- assertEquals("true", metadata.get("Chroma BlackIsZero"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text
TextEntry"));
+ assertEquals("true",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma BlackIsZero"));
assertEquals("disposalMethod=none, userInputFlag=false,
transparentColorFlag=false, " +
- "delayTime=0, transparentColorIndex=0",
metadata.get("GraphicControlExtension"));
- assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
+ "delayTime=0, transparentColorIndex=0",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"GraphicControlExtension"));
+ assertEquals("0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Dimension VerticalPixelOffset"));
assertEquals("image/gif", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
@@ -110,40 +110,40 @@ public class ImageParserTest extends TikaTest {
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
}
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("0.35277778", metadata.get("Dimension
VerticalPixelSize"));
- assertEquals("false", metadata.get("Compression Lossless"));
- assertEquals("class=0, htableId=0", metadata.get("markerSequence dht
dhtable"));
+ assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "height"));
+ assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "width"));
+ assertEquals("0.35277778",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension
VerticalPixelSize"));
+ assertEquals("false",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless"));
+ assertEquals("class=0, htableId=0",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence dht
dhtable"));
assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72,
" +
- "Ydensity=72, thumbWidth=0, thumbHeight=0",
metadata.get("JPEGvariety app0JFIF"));
- assertEquals("225", metadata.get("markerSequence unknown"));
+ "Ydensity=72, thumbWidth=0, thumbHeight=0",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "JPEGvariety app0JFIF"));
+ assertEquals("225", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "markerSequence unknown"));
assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0",
- metadata.get("markerSequence sos scanComponentSpec"));
- assertEquals("normal", metadata.get("Dimension ImageOrientation"));
- assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
- assertEquals("elementPrecision=0, qtableId=0",
metadata.get("markerSequence dqt dqtable"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"markerSequence sos scanComponentSpec"));
+ assertEquals("normal",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension
ImageOrientation"));
+ assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Dimension PixelAspectRatio"));
+ assertEquals("elementPrecision=0, qtableId=0",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence dqt
dqtable"));
assertEquals("numScanComponents=3, startSpectralSelection=0, " +
"endSpectralSelection=63, approxHigh=0, approxLow=0",
- metadata.get("markerSequence sos"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"markerSequence sos"));
assertEquals("componentId=1, HsamplingFactor=1, " +
"VsamplingFactor=1, QtableSelector=0",
- metadata.get("markerSequence sof componentSpec"));
- assertEquals("JPEG", metadata.get("Compression CompressionTypeName"));
- assertEquals("0.35277778", metadata.get("Dimension
HorizontalPixelSize"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"markerSequence sof componentSpec"));
+ assertEquals("JPEG",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression
CompressionTypeName"));
+ assertEquals("0.35277778",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension
HorizontalPixelSize"));
assertEquals("Licensed to the Apache Software Foundation (ASF) under
one or " +
"more contributor license agreements. See the NOTICE file " +
"distributed with this work for additional information " +
- "regarding copyright ownership.", metadata.get("markerSequence
com"));
- assertEquals("3", metadata.get("Chroma NumChannels"));
- assertEquals("1", metadata.get("Compression NumProgressiveScans"));
- assertEquals("YCbCr", metadata.get("Chroma ColorSpaceType"));
+ "regarding copyright ownership.",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence com"));
+ assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Chroma NumChannels"));
+ assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Compression NumProgressiveScans"));
+ assertEquals("YCbCr",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType"));
assertEquals("keyword=comment, value=Licensed to the Apache Software
Foundation " +
"(ASF) under one or more contributor license agreements. See
the NOTICE" +
" file distributed with this work for additional information
regarding " +
- "copyright ownership.", metadata.get("Text TextEntry"));
+ "copyright ownership.",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text TextEntry"));
assertEquals("image/jpeg", metadata.get("Content-Type"));
assertEquals("process=0, samplePrecision=8, numLines=75,
samplesPerLine=100, " +
- "numFrameComponents=3", metadata.get("markerSequence sof"));
+ "numFrameComponents=3",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sof"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
@@ -162,41 +162,41 @@ public class ImageParserTest extends TikaTest {
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
}
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("0.35273367", metadata.get("Dimension
VerticalPixelSize"));
- assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
- assertEquals("Perceptual", metadata.get("sRGB"));
- assertEquals("true", metadata.get("Compression Lossless"));
+ assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "height"));
+ assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "width"));
+ assertEquals("0.35273367",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension
VerticalPixelSize"));
+ assertEquals("8 8 8",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data BitsPerSample"));
+ assertEquals("Perceptual",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "sRGB"));
+ assertEquals("true",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless"));
assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47",
- metadata.get("tIME"));
- assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
- assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "tIME"));
+ assertEquals("Normal",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension
ImageOrientation"));
+ assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Dimension PixelAspectRatio"));
assertEquals("keyword=Comment, value=Licensed to the Apache Software
Foundation " +
"(ASF) under one or more contributor license agreements. See
the " +
"NOTICE file distributed with this work for additional
information " +
- "regarding copyright ownership.", metadata.get("tEXt
tEXtEntry"));
- assertEquals("deflate", metadata.get("Compression
CompressionTypeName"));
- assertEquals("UnsignedIntegral", metadata.get("Data SampleFormat"));
- assertEquals("0.35273367", metadata.get("Dimension
HorizontalPixelSize"));
- assertEquals("none", metadata.get("Transparency Alpha"));
+ "regarding copyright ownership.",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "tEXt tEXtEntry"));
+ assertEquals("deflate",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression
CompressionTypeName"));
+ assertEquals("UnsignedIntegral",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat"));
+ assertEquals("0.35273367",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension
HorizontalPixelSize"));
+ assertEquals("none",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Transparency Alpha"));
assertEquals("pixelsPerUnitXAxis=2835, pixelsPerUnitYAxis=2835,
unitSpecifier=meter",
- metadata.get("pHYs"));
- assertEquals("3", metadata.get("Chroma NumChannels"));
- assertEquals("1", metadata.get("Compression NumProgressiveScans"));
- assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "pHYs"));
+ assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Chroma NumChannels"));
+ assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Compression NumProgressiveScans"));
+ assertEquals("RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Chroma ColorSpaceType"));
assertEquals("keyword=Comment, value=Licensed to the Apache Software
Foundation " +
"(ASF) under one or more contributor license
agreements. See the " +
"NOTICE file distributed with this work for additional
information " +
"regarding copyright ownership., encoding=ISO-8859-1,
compression=none",
- metadata.get("Text TextEntry"));
- assertEquals("PixelInterleaved", metadata.get("Data
PlanarConfiguration"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text
TextEntry"));
+ assertEquals("PixelInterleaved",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data
PlanarConfiguration"));
assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, " +
"compressionMethod=deflate, filterMethod=adaptive,
interlaceMethod=none",
- metadata.get("IHDR"));
- assertEquals("true", metadata.get("Chroma BlackIsZero"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "IHDR"));
+ assertEquals("true",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma BlackIsZero"));
assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47",
- metadata.get("Document ImageModificationTime"));
+ metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Document
ImageModificationTime"));
assertEquals("image/png", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
@@ -212,8 +212,8 @@ public class ImageParserTest extends TikaTest {
try (InputStream stream =
getResourceAsStream("/test-documents/testJBIG2.jb2")) {
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
}
- assertEquals("78", metadata.get("height"));
- assertEquals("328", metadata.get("width"));
+ assertEquals("78", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "height"));
+ assertEquals("328", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "width"));
assertEquals("image/x-jbig2", metadata.get("Content-Type"));
assertEquals(1, metadata.getInt(TikaCoreProperties.NUM_IMAGES));
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
index 9775b9464..76a7367cb 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
@@ -73,7 +73,7 @@ public class JpegParserTest {
// Check that EXIF/TIFF tags come through with their raw values too
// (This may be removed for Tika 1.0, as we support more of them
// with explicit Metadata entries)
- assertEquals("Canon EOS 40D", metadata.get("Exif IFD0:Model"));
+ assertEquals("Canon EOS 40D",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Exif IFD0:Model"));
// Common tags
assertEquals("2009-10-02T23:02:49",
metadata.get(TikaCoreProperties.MODIFIED));
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java
index 60baf04e6..0eb8cc02d 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java
@@ -41,10 +41,10 @@ public class WebPParserTest extends TikaTest {
public void testSimple() throws Exception {
Metadata metadata = getXML("testWebp_Alpha_Lossy.webp").metadata;
- assertEquals("301", metadata.get("Image Height"));
- assertEquals("400", metadata.get("Image Width"));
- assertEquals("true", metadata.get("Has Alpha"));
- assertEquals("false", metadata.get("Is Animation"));
+ assertEquals("301", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Image Height"));
+ assertEquals("400", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS
+ "Image Width"));
+ assertEquals("true",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Has Alpha"));
+ assertEquals("false",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Is Animation"));
assertEquals("image/webp", metadata.get(Metadata.CONTENT_TYPE));
metadata = getXML("testWebp_Alpha_Lossless.webp").metadata;
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 7a898e3f6..f8f3cf878 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -37,6 +37,7 @@ import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.image.ImageMetadataExtractor;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BasicContentHandlerFactory;
@@ -220,7 +221,7 @@ public class TesseractOCRParserTest extends TikaTest {
//gif
Metadata m = getXML("testGIF.gif").metadata;
assertTrue(m.names().length > 20);
- assertEquals("RGB", m.get("Chroma ColorSpaceType"));
+ assertEquals("RGB", m.get(ImageMetadataExtractor.UNKNOWN_IMG_NS +
"Chroma ColorSpaceType"));
//jpg
m = getXML("testOCR.jpg").metadata;
@@ -239,13 +240,13 @@ public class TesseractOCRParserTest extends TikaTest {
m = getXML("testPNG.png").metadata;
assertEquals("100", m.get(Metadata.IMAGE_WIDTH));
assertEquals("75", m.get(Metadata.IMAGE_LENGTH));
- assertEquals("UnsignedIntegral", m.get("Data SampleFormat"));
+ assertEquals("UnsignedIntegral",
m.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat"));
//tiff
m = getXML("testTIFF.tif").metadata;
assertEquals("100", m.get(Metadata.IMAGE_WIDTH));
assertEquals("75", m.get(Metadata.IMAGE_LENGTH));
- assertEquals("72 dots per inch", m.get("Exif IFD0:Y Resolution"));
+ assertEquals("72 dots per inch",
m.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Exif IFD0:Y Resolution"));
}
@Test
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 78f54c4f1..49b0042cb 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -49,6 +49,7 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.parser.external.ExternalParser;
+import org.apache.tika.parser.image.ImageMetadataExtractor;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.ocr.TesseractOCRParser;
import org.apache.tika.parser.xml.XMLProfiler;
@@ -355,8 +356,8 @@ public class PDFParserTest extends TikaTest {
fail("Exception: " + metadatas.get(1).get(key));
}
}
- assertEquals("91", metadatas.get(1).get("height"));
- assertEquals("352", metadatas.get(1).get("width"));
+ assertEquals("91",
metadatas.get(1).get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height"));
+ assertEquals("352",
metadatas.get(1).get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width"));
assertNull(metadatas.get(0).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals("image0.jb2",
metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));