gaborgsomogyi commented on a change in pull request #27620: [SPARK-30866][SS]
FileStreamSource: Cache fetched list of files beyond maxFilesPerTrigger as
unread files
URL: https://github.com/apache/spark/pull/27620#discussion_r408784236
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
##########
@@ -1980,3 +2089,23 @@ class ExistsThrowsExceptionFileSystem extends
RawLocalFileSystem {
object ExistsThrowsExceptionFileSystem {
val scheme = s"FileStreamSourceSuite${math.abs(Random.nextInt)}fs"
}
+
+class CountListingLocalFileSystem extends RawLocalFileSystem {
+ import CountListingLocalFileSystem._
+
+ override def getUri: URI = {
+ URI.create(s"$scheme:///")
+ }
+
+ override def listStatus(f: Path): Array[FileStatus] = {
+ val path = f.toUri.getPath
+ val curVal = pathToNumListStatusCalled.getOrElseUpdate(path, new
AtomicLong(0))
+ curVal.incrementAndGet()
+ super.listStatus(f)
+ }
+}
+
+object CountListingLocalFileSystem {
+ val scheme = s"FileStreamSourceSuite${math.abs(Random.nextInt)}fs"
Review comment:
Maybe we can use the object name since there are multiple filesystems
declared here?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]