Github user mxm commented on a diff in the pull request:
https://github.com/apache/flink/pull/2618#discussion_r83007178
--- Diff:
flink-fs-tests/src/test/java/org/apache/flink/hdfstests/ContinuousFileProcessingTests.java
---
@@ -336,237 +348,294 @@ public int compare(String o1, String o2) {
Assert.assertEquals(expectedFileContents.get(fileIdx),
cntntStr.toString());
}
- for(org.apache.hadoop.fs.Path file: filesCreated) {
+ for (org.apache.hadoop.fs.Path file: filesCreated) {
hdfs.delete(file, false);
}
}
- private static class PathFilter extends FilePathFilter {
-
- @Override
- public boolean filterPath(Path filePath) {
- return filePath.getName().startsWith("**");
- }
- }
+ //// Monitoring Function Tests
//////
@Test
public void testFilePathFiltering() throws Exception {
- Set<String> uniqFilesFound = new HashSet<>();
Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
+ Set<String> filesKept = new TreeSet<>();
// create the files to be discarded
for (int i = 0; i < NO_OF_FILES; i++) {
- Tuple2<org.apache.hadoop.fs.Path, String> file =
fillWithData(hdfsURI, "**file", i, "This is test line.");
+ Tuple2<org.apache.hadoop.fs.Path, String> file =
createFileAndFillWithData(hdfsURI, "**file", i, "This is test line.");
filesCreated.add(file.f0);
}
// create the files to be kept
for (int i = 0; i < NO_OF_FILES; i++) {
- Tuple2<org.apache.hadoop.fs.Path, String> file =
fillWithData(hdfsURI, "file", i, "This is test line.");
+ Tuple2<org.apache.hadoop.fs.Path, String> file =
+ createFileAndFillWithData(hdfsURI, "file", i,
"This is test line.");
filesCreated.add(file.f0);
+ filesKept.add(file.f0.getName());
}
TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
format.setFilesFilter(new PathFilter());
+
ContinuousFileMonitoringFunction<String> monitoringFunction =
new ContinuousFileMonitoringFunction<>(format, hdfsURI,
FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
+ final FileVerifyingSourceContext context =
+ new FileVerifyingSourceContext(new OneShotLatch(),
monitoringFunction, 0, -1);
+
monitoringFunction.open(new Configuration());
- monitoringFunction.run(new
TestingSourceContext(monitoringFunction, uniqFilesFound));
+ monitoringFunction.run(context);
- Assert.assertEquals(NO_OF_FILES, uniqFilesFound.size());
- for(int i = 0; i < NO_OF_FILES; i++) {
- org.apache.hadoop.fs.Path file = new
org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
-
Assert.assertTrue(uniqFilesFound.contains(file.toString()));
- }
+ Assert.assertArrayEquals(filesKept.toArray(),
context.getSeenFiles().toArray());
- for(org.apache.hadoop.fs.Path file: filesCreated) {
+ // finally delete the files created for the test.
+ for (org.apache.hadoop.fs.Path file: filesCreated) {
hdfs.delete(file, false);
}
}
+ private static class PathFilter extends FilePathFilter {
--- End diff --
You moved this class but really you're only using it in one test case and
can simply make it an anonymous function.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---