This is an automated email from the ASF dual-hosted git repository.

kinow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-imaging.git

commit abac6c05136548ebe83dd93c978daa4a8bf6bd8f
Author: Yavuz Kavus <[email protected]>
AuthorDate: Sun Mar 14 00:48:45 2021 +0300

    [IMAGING-168] Encoding Support for IPTC metadata when reading an image 
metadata and writing an image metadata.
---
 .../imaging/formats/jpeg/iptc/IptcParser.java      | 61 ++++++++++++++++++++--
 1 file changed, 57 insertions(+), 4 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java 
b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
index 8814075..6cde3dd 100644
--- a/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
+++ b/src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
@@ -29,6 +29,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteOrder;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -36,6 +37,7 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -65,6 +67,10 @@ public class IptcParser extends BinaryFileParser {
      */
     private static final List<Integer> PHOTOSHOP_IGNORED_BLOCK_TYPE = 
Arrays.asList(1084, 1085, 1086, 1087);
 
+    private static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
+    private static final int ENV_TAG_CODED_CHARACTER_SET = 90;
+    private static final byte[] CHARACTER_ESCAPE_SEQUENCE = {'\u001B', '%', 
'G'};
+
     public IptcParser() {
         setByteOrder(ByteOrder.BIG_ENDIAN);
     }
@@ -143,6 +149,7 @@ public class IptcParser extends BinaryFileParser {
     }
 
     protected List<IptcRecord> parseIPTCBlock(final byte[] bytes) {
+        Charset charset = DEFAULT_CHARSET;
         final List<IptcRecord> elements = new ArrayList<>();
 
         int index = 0;
@@ -202,6 +209,11 @@ public class IptcParser extends BinaryFileParser {
             // Debug.debug("recordSize", recordSize + " (0x"
             // + Integer.toHexString(recordSize) + ")");
 
+            if (recordNumber == IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && 
recordType == ENV_TAG_CODED_CHARACTER_SET) {
+                charset = findCharset(recordData);
+                continue;
+            }
+
             if (recordNumber != 
IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
                 continue;
             }
@@ -237,7 +249,7 @@ public class IptcParser extends BinaryFileParser {
             // continue;
             // }
 
-            final String value = new String(recordData, 
StandardCharsets.ISO_8859_1);
+            final String value = new String(recordData, charset);
 
             final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
 
@@ -405,9 +417,25 @@ public class IptcParser extends BinaryFileParser {
 
     public byte[] writeIPTCBlock(List<IptcRecord> elements)
             throws ImageWriteException, IOException {
+        Charset charset = DEFAULT_CHARSET;
+        for (final IptcRecord element : elements) {
+            final byte[] recordData = element.getValue().getBytes(charset);
+            if (!new String(recordData, charset).equals(element.getValue())) {
+                charset = StandardCharsets.UTF_8;
+                break;
+            }
+        }
         byte[] blockData;
         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
         try (BinaryOutputStream bos = new BinaryOutputStream(baos, 
getByteOrder())) {
+            if (charset != null && !charset.equals(DEFAULT_CHARSET)) {
+                bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
+                bos.write(IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER);
+                bos.write(ENV_TAG_CODED_CHARACTER_SET);
+                byte[] codedCharset = charset.equals(StandardCharsets.UTF_8) ? 
CHARACTER_ESCAPE_SEQUENCE: charset.name().getBytes(StandardCharsets.ISO_8859_1);
+                bos.write2Bytes(codedCharset.length);
+                bos.write(codedCharset);
+            }
 
             // first, right record version record
             bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
@@ -440,11 +468,13 @@ public class IptcParser extends BinaryFileParser {
                 }
                 bos.write(element.iptcType.getType());
 
-                final byte[] recordData = 
element.getValue().getBytes(StandardCharsets.ISO_8859_1);
-                if (!new String(recordData, 
StandardCharsets.ISO_8859_1).equals(element.getValue())) {
+                final byte[] recordData = element.getValue().getBytes(charset);
+                /*
+                if (!new String(recordData, 
charset).equals(element.getValue())) {
                     throw new ImageWriteException(
-                            "Invalid record value, not ISO-8859-1");
+                            "Invalid record value, not " + charset.name());
                 }
+                */
 
                 bos.write2Bytes(recordData.length);
                 bos.write(recordData);
@@ -456,4 +486,27 @@ public class IptcParser extends BinaryFileParser {
         return blockData;
     }
 
+    private Charset findCharset(byte[] codedCharset) {
+        String codedCharsetString = new String(codedCharset, 
StandardCharsets.ISO_8859_1);
+        try {
+            if (Charset.isSupported(codedCharsetString)) {
+                return Charset.forName(codedCharsetString);
+            }
+        } catch (IllegalArgumentException e) { }
+        // check if encoding is a escape sequence
+        // normalize encoding byte sequence
+        byte[] codedCharsetNormalized = new byte[codedCharset.length];
+        int j = 0;
+        for (int i = 0; i < codedCharset.length; i++) {
+            if (codedCharset[i] != ' ') {
+                codedCharsetNormalized[j++] = codedCharset[i];
+            }
+        }
+
+        if (Objects.deepEquals(codedCharsetNormalized, 
CHARACTER_ESCAPE_SEQUENCE)) {
+            return StandardCharsets.UTF_8;
+        }
+        return DEFAULT_CHARSET;
+    }
+
 }

Reply via email to