exceptionfactory commented on code in PR #8350:
URL: https://github.com/apache/nifi/pull/8350#discussion_r1476854242
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestUnpackContent.java:
##########
@@ -222,6 +225,88 @@ public void testInvalidZip() throws IOException {
flowFile.assertContentEquals(path.toFile());
}
}
+ @Test
+ public void testZipEncodingField() {
+ final TestRunner unpackRunner = TestRunners.newTestRunner(new
UnpackContent());
+ unpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT,
UnpackContent.PackageFormat.ZIP_FORMAT.toString());
+ unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET,
"invalid-encoding");
+ unpackRunner.assertNotValid();
+ unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "IBM437");
+ unpackRunner.assertValid();
+ unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "Cp437");
+ unpackRunner.assertValid();
+ unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET,
StandardCharsets.ISO_8859_1.name());
+ unpackRunner.assertValid();
+ unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET,
StandardCharsets.UTF_8.name());
+ unpackRunner.assertValid();
+
+ }
+ @Test
+ public void testZipWithCp437Encoding() throws IOException {
+ String zipFilename = "windows-with-cp437.zip";
+ final TestRunner unpackRunner = TestRunners.newTestRunner(new
UnpackContent());
+ final TestRunner autoUnpackRunner = TestRunners.newTestRunner(new
UnpackContent());
+ unpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT,
UnpackContent.PackageFormat.ZIP_FORMAT.toString());
+ unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "Cp437");
+
unpackRunner.setProperty(UnpackContent.ALLOW_STORED_ENTRIES_WITH_DATA_DESCRIPTOR,
"true"); // just forces this to be exercised
+
+ autoUnpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT,
UnpackContent.PackageFormat.AUTO_DETECT_FORMAT.toString());
+ autoUnpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET,
"Cp437");
+
+ unpackRunner.enqueue(dataPath.resolve(zipFilename));
+ unpackRunner.enqueue(dataPath.resolve(zipFilename));
+
+ Map<String, String> attributes = new HashMap<>(1);
+ attributes.put("mime.type", "application/zip");
+ autoUnpackRunner.enqueue(dataPath.resolve(zipFilename), attributes);
+ autoUnpackRunner.enqueue(dataPath.resolve(zipFilename), attributes);
+ unpackRunner.run(2);
+ autoUnpackRunner.run(2);
+
+ unpackRunner.assertTransferCount(UnpackContent.REL_FAILURE, 0);
+ autoUnpackRunner.assertTransferCount(UnpackContent.REL_FAILURE, 0);
+
+ final List<MockFlowFile> unpacked =
+
unpackRunner.getFlowFilesForRelationship(UnpackContent.REL_SUCCESS);
+ for (final MockFlowFile flowFile : unpacked) {
+ final String filename =
flowFile.getAttribute(CoreAttributes.FILENAME.key());
+ assertTrue(StringUtils.containsNone(filename, "?"), "filename
contains '?': " + filename);
+ assertTrue(StringUtils.containsNone(filename, "�"), "filename
contains '�': " + filename);
+ final String path =
flowFile.getAttribute(CoreAttributes.PATH.key());
+ assertTrue(StringUtils.containsNone(path, "?"), "path contains
'?': " + path);
+ assertTrue(StringUtils.containsNone(path, "�"), "path contains
'�': " + path);
Review Comment:
These tests for other characters are helpful, but actual character
representation is not clear. Instead of using the literal unknown character
here, recommend using the Java [Unicode
Character](https://www.baeldung.com/java-unicode-character-from-code-point-hex-string#understanding-the-hexadecimal-number-after-u)
reference with the hexadecimal number for readability.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]