exceptionfactory commented on code in PR #8350:
URL: https://github.com/apache/nifi/pull/8350#discussion_r1476854242


##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestUnpackContent.java:
##########
@@ -222,6 +225,88 @@ public void testInvalidZip() throws IOException {
             flowFile.assertContentEquals(path.toFile());
         }
     }
+    @Test
+    public void testZipEncodingField() {
+        final TestRunner unpackRunner = TestRunners.newTestRunner(new 
UnpackContent());
+        unpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT, 
UnpackContent.PackageFormat.ZIP_FORMAT.toString());
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
"invalid-encoding");
+        unpackRunner.assertNotValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "IBM437");
+        unpackRunner.assertValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "Cp437");
+        unpackRunner.assertValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
StandardCharsets.ISO_8859_1.name());
+        unpackRunner.assertValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
StandardCharsets.UTF_8.name());
+        unpackRunner.assertValid();
+
+    }
+    @Test
+    public void testZipWithCp437Encoding() throws IOException {
+        String zipFilename = "windows-with-cp437.zip";
+        final TestRunner unpackRunner = TestRunners.newTestRunner(new 
UnpackContent());
+        final TestRunner autoUnpackRunner = TestRunners.newTestRunner(new 
UnpackContent());
+        unpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT, 
UnpackContent.PackageFormat.ZIP_FORMAT.toString());
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "Cp437");
+        
unpackRunner.setProperty(UnpackContent.ALLOW_STORED_ENTRIES_WITH_DATA_DESCRIPTOR,
 "true"); // just forces this to be exercised
+
+        autoUnpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT, 
UnpackContent.PackageFormat.AUTO_DETECT_FORMAT.toString());
+        autoUnpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
"Cp437");
+
+        unpackRunner.enqueue(dataPath.resolve(zipFilename));
+        unpackRunner.enqueue(dataPath.resolve(zipFilename));
+
+        Map<String, String> attributes = new HashMap<>(1);
+        attributes.put("mime.type", "application/zip");
+        autoUnpackRunner.enqueue(dataPath.resolve(zipFilename), attributes);
+        autoUnpackRunner.enqueue(dataPath.resolve(zipFilename), attributes);
+        unpackRunner.run(2);
+        autoUnpackRunner.run(2);
+
+        unpackRunner.assertTransferCount(UnpackContent.REL_FAILURE, 0);
+        autoUnpackRunner.assertTransferCount(UnpackContent.REL_FAILURE, 0);
+
+        final List<MockFlowFile> unpacked =
+            
unpackRunner.getFlowFilesForRelationship(UnpackContent.REL_SUCCESS);
+        for (final MockFlowFile flowFile : unpacked) {
+            final String filename = 
flowFile.getAttribute(CoreAttributes.FILENAME.key());
+            assertTrue(StringUtils.containsNone(filename, "?"), "filename 
contains '?': " + filename);
+            assertTrue(StringUtils.containsNone(filename, "�"), "filename 
contains '�': " + filename);
+            final String path = 
flowFile.getAttribute(CoreAttributes.PATH.key());
+            assertTrue(StringUtils.containsNone(path, "?"), "path contains 
'?': " + path);
+            assertTrue(StringUtils.containsNone(path, "�"), "path contains 
'�': " + path);

Review Comment:
   These tests for other characters are helpful, but actual character 
representation is not clear. Instead of using the literal unknown character 
here, recommend using the Java [Unicode 
Character](https://www.baeldung.com/java-unicode-character-from-code-point-hex-string#understanding-the-hexadecimal-number-after-u)
 reference with the hexadecimal number for readability.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to