rdblue commented on a change in pull request #2021:
URL: https://github.com/apache/iceberg/pull/2021#discussion_r560582607



##########
File path: 
spark3-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DynamicFileFilterExec.scala
##########
@@ -47,13 +49,45 @@ case class DynamicFileFilterExec(
   override protected def doExecute(): RDD[InternalRow] = scanExec.execute()
   override protected def doExecuteColumnar(): RDD[ColumnarBatch] = 
scanExec.executeColumnar()
 
+  protected def doPrepare(): Unit
+
+  override def simpleString(maxFields: Int): String = {
+    s"DynamicFileFilterExec${truncatedString(output, "[", ", ", "]", 
maxFields)}"
+  }
+}
+
+case class DynamicFileFilterExec(
+    scanExec: SparkPlan,
+    fileFilterExec: SparkPlan,
+    @transient filterable: SupportsFileFilter)
+  extends DynamicFileFilterExecBase(scanExec, fileFilterExec, filterable) {
+
   override protected def doPrepare(): Unit = {
     val rows = fileFilterExec.executeCollect()
     val matchedFileLocations = rows.map(_.getString(0))
     filterable.filterFiles(matchedFileLocations.toSet.asJava)
   }
+}
 
-  override def simpleString(maxFields: Int): String = {
-    s"DynamicFileFilterExec${truncatedString(output, "[", ", ", "]", 
maxFields)}"
+case class DynamicFileFilterWithCountCheckExec(
+    scanExec: SparkPlan,
+    fileFilterExec: SparkPlan,
+    @transient filterable: SupportsFileFilter,
+    filesAccumulator: SetAccumulator[String],
+    @transient targetTableName: String)
+  extends DynamicFileFilterExecBase(scanExec, fileFilterExec, filterable)  {
+
+  override protected def doPrepare(): Unit = {
+    val rows = fileFilterExec.executeCollect()
+    if (rows.size > 0) {
+      val msg =
+        s"""
+           |The same row of target table `$targetTableName` was identified 
more than

Review comment:
       Nit: this error message starts with a newline. Can you start with 
`s"""The ...` instead?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to