yavuzkavus commented on a change in pull request #124:
URL: https://github.com/apache/commons-imaging/pull/124#discussion_r593807945



##########
File path: 
src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
##########
@@ -143,6 +149,7 @@ public PhotoshopApp13Data parsePhotoshopSegment(final 
byte[] bytes, final boolea
     }
 
     protected List<IptcRecord> parseIPTCBlock(final byte[] bytes) {
+        Charset charset = DEFAULT_CHARSET;

Review comment:
       Starting with default charset

##########
File path: 
src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
##########
@@ -202,6 +209,11 @@ public PhotoshopApp13Data parsePhotoshopSegment(final 
byte[] bytes, final boolea
             // Debug.debug("recordSize", recordSize + " (0x"
             // + Integer.toHexString(recordSize) + ")");
 
+            if( recordNumber==IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && 
recordType==ENV_TAG_CODED_CHARACTER_SET ) {
+                charset = findCharset(recordData);
+                continue;
+            }

Review comment:
       If record type is coded character set type, then investigate charset.

##########
File path: 
src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
##########
@@ -440,11 +468,13 @@ public PhotoshopApp13Data parsePhotoshopSegment(final 
byte[] bytes, final boolea
                 }
                 bos.write(element.iptcType.getType());
 
-                final byte[] recordData = 
element.getValue().getBytes(StandardCharsets.ISO_8859_1);
-                if (!new String(recordData, 
StandardCharsets.ISO_8859_1).equals(element.getValue())) {
+                final byte[] recordData = element.getValue().getBytes(charset);
+                /*
+                if (!new String(recordData, 
charset).equals(element.getValue())) {
                     throw new ImageWriteException(
-                            "Invalid record value, not ISO-8859-1");
+                            "Invalid record value, not " + charset.name());
                 }
+                */

Review comment:
       no longer necessary

##########
File path: 
src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
##########
@@ -405,9 +417,25 @@ public PhotoshopApp13Data parsePhotoshopSegment(final 
byte[] bytes, final boolea
 
     public byte[] writeIPTCBlock(List<IptcRecord> elements)
             throws ImageWriteException, IOException {
+        Charset charset = DEFAULT_CHARSET;
+        for (final IptcRecord element : elements) {
+            final byte[] recordData = element.getValue().getBytes(charset);
+            if (!new String(recordData, charset).equals(element.getValue())) {
+                charset = StandardCharsets.UTF_8;
+                break;
+            }
+        }

Review comment:
       Check if block values can be decoded in default charset. If not use 
UTF-8 instead.

##########
File path: 
src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
##########
@@ -237,7 +249,7 @@ public PhotoshopApp13Data parsePhotoshopSegment(final 
byte[] bytes, final boolea
             // continue;
             // }
 
-            final String value = new String(recordData, 
StandardCharsets.ISO_8859_1);
+            final String value = new String(recordData, charset);

Review comment:
       Use encoded charset instead of ISO

##########
File path: 
src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
##########
@@ -405,9 +417,25 @@ public PhotoshopApp13Data parsePhotoshopSegment(final 
byte[] bytes, final boolea
 
     public byte[] writeIPTCBlock(List<IptcRecord> elements)
             throws ImageWriteException, IOException {
+        Charset charset = DEFAULT_CHARSET;
+        for (final IptcRecord element : elements) {
+            final byte[] recordData = element.getValue().getBytes(charset);
+            if (!new String(recordData, charset).equals(element.getValue())) {
+                charset = StandardCharsets.UTF_8;
+                break;
+            }
+        }
         byte[] blockData;
         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
         try (BinaryOutputStream bos = new BinaryOutputStream(baos, 
getByteOrder())) {
+            if( charset!=null && !charset.equals(DEFAULT_CHARSET) ) {
+                bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
+                bos.write(IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER);
+                bos.write(ENV_TAG_CODED_CHARACTER_SET);
+                byte[] codedCharset = charset.equals(StandardCharsets.UTF_8) ? 
CHARACTER_ESCAPE_SEQUENCE: charset.name().getBytes(StandardCharsets.ISO_8859_1);
+                bos.write2Bytes(codedCharset.length);
+                bos.write(codedCharset);
+            }

Review comment:
       Write encoded charset tag if charset is not null and ISO-8859-1. In the 
case of UTF-8, CHARACTER_ESCAPE_SEQUENCE is used, as done in general.

##########
File path: 
src/main/java/org/apache/commons/imaging/formats/jpeg/iptc/IptcParser.java
##########
@@ -456,4 +486,26 @@ public PhotoshopApp13Data parsePhotoshopSegment(final 
byte[] bytes, final boolea
         return blockData;
     }
 
+    private Charset findCharset(byte[] codedCharset) {
+        String codedCharsetString = new String(codedCharset);
+        try {
+            if (Charset.isSupported(codedCharsetString)) {
+                return Charset.forName(codedCharsetString);
+            }
+        } catch (IllegalArgumentException e) { }
+        // check if encoding is a escape sequence
+        // normalize encoding byte sequence
+        byte[] codedCharsetNormalized = new byte[codedCharset.length];
+        int j = 0;
+        for (int i = 0; i < codedCharset.length; i++) {
+            if (codedCharset[i] != ' ') {
+                codedCharsetNormalized[j++] = codedCharset[i];
+            }
+        }
+
+        if( Objects.deepEquals(codedCharsetNormalized, 
CHARACTER_ESCAPE_SEQUENCE) )
+            return StandardCharsets.UTF_8;
+        return DEFAULT_CHARSET;
+    }
+

Review comment:
       Charset detection method.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to