cloud-fan commented on a change in pull request #23208: [SPARK-25530][SQL] data source v2 API refactor (batch write) URL: https://github.com/apache/spark/pull/23208#discussion_r247850691
########## File path: sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala ########## @@ -68,22 +65,50 @@ class SimpleWritableDataSource extends DataSourceV2 new CSVReaderFactory(serializableConf) } - override def readSchema(): StructType = writeSchema + override def readSchema(): StructType = tableSchema } - override def getTable(options: DataSourceOptions): Table = { - val path = new Path(options.get("path").get()) - val conf = SparkContext.getActive.get.hadoopConfiguration - new SimpleBatchTable { - override def newScanBuilder(options: DataSourceOptions): ScanBuilder = { - new MyScanBuilder(path.toUri.toString, conf) + class MyWriteBuilder(path: String) extends WriteBuilder with SupportsSaveMode { + private var queryId: String = _ + private var mode: SaveMode = _ + + override def withQueryId(queryId: String): WriteBuilder = { + this.queryId = queryId + this + } + + override def mode(mode: SaveMode): WriteBuilder = { + this.mode = mode + this + } + + override def buildForBatch(): BatchWrite = { + assert(mode != null) + + val hadoopPath = new Path(path) + val hadoopConf = SparkContext.getActive.get.hadoopConfiguration + val fs = hadoopPath.getFileSystem(hadoopConf) + + if (mode == SaveMode.ErrorIfExists) { + if (fs.exists(hadoopPath)) { + throw new RuntimeException("data already exists.") + } + } + if (mode == SaveMode.Ignore) { + if (fs.exists(hadoopPath)) { + return null + } + } + if (mode == SaveMode.Overwrite) { + fs.delete(hadoopPath, true) Review comment: Because of the ambiguity of SaveMode, I think data source is free to define its own behavior. It's fine this simple data source in the test defines the overwrite behavior in this way. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org