Github user frreiss commented on a diff in the pull request:
https://github.com/apache/spark/pull/15262#discussion_r80826485
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
---
@@ -330,15 +353,42 @@ class FileStreamSourceSuite extends
FileStreamSourceTest {
val filtered = textStream.filter($"value" contains "keep")
testStream(filtered)(
- AddTextFileData("drop1\nkeep2\nkeep3", src, tmp),
+ AddTextLocalFileData("drop1\nkeep2\nkeep3", src, tmp),
+ CheckAnswer("keep2", "keep3"),
+ StopStream,
+ AddTextLocalFileData("drop4\nkeep5\nkeep6", src, tmp),
+ StartStream(),
+ CheckAnswer("keep2", "keep3", "keep5", "keep6"),
+ AddTextLocalFileData("drop7\nkeep8\nkeep9", src, tmp),
+ CheckAnswer("keep2", "keep3", "keep5", "keep6", "keep8", "keep9")
+ )
+ }
+ }
+
+ test("read from text files using hdfs") {
+ withTempDirs { case (_src, tmp) =>
+ // Create a mini dfs cluster.
+ System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA)
+ val conf = new HdfsConfiguration()
+ conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tmp.getAbsolutePath)
+ val cluster = new MiniDFSCluster.Builder(conf).build()
+ val hdfsHomeDirectory: Path = cluster.getFileSystem.getHomeDirectory
+ cluster.getFileSystem.mkdirs(hdfsHomeDirectory)
+ cluster.waitClusterUp()
+ val textStream = createFileStream("text", hdfsHomeDirectory.toString)
+ val filtered = textStream.filter($"value" contains "keep")
+ val src = hdfsHomeDirectory
+ testStream(filtered)(
+ AddTextHDFSFileData("drop1\nkeep2\nkeep3", src, tmp, conf),
CheckAnswer("keep2", "keep3"),
StopStream,
- AddTextFileData("drop4\nkeep5\nkeep6", src, tmp),
+ AddTextHDFSFileData("drop4\nkeep5\nkeep6", src, tmp, conf),
StartStream(),
CheckAnswer("keep2", "keep3", "keep5", "keep6"),
- AddTextFileData("drop7\nkeep8\nkeep9", src, tmp),
+ AddTextHDFSFileData("drop7\nkeep8\nkeep9", src, tmp, conf),
CheckAnswer("keep2", "keep3", "keep5", "keep6", "keep8", "keep9")
)
+ cluster.shutdown()
--- End diff --
You'll probably want to put this cleanup code somewhere where it will be
called even if another part of the test case crashes.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]