cloud-fan commented on code in PR #47301: URL: https://github.com/apache/spark/pull/47301#discussion_r1687308878
########## sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala: ########## @@ -330,6 +350,18 @@ trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] { */ def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T] + /** + * Clusters the output by the given columns on the storage. The rows with matching values in + * the specified clustering columns will be consolidated within the same group. + * + * For instance, if you cluster a dataset by date, the data sharing the same date will be stored + * together in a file. This arrangement improves query efficiency when you apply selective + * filters to these clustering columns, thanks to data skipping. + * + * @since 4.0.0 + */ + def clusterBy(colName: String, colNames: String*): CreateTableWriter[T] Review Comment: add `@scala.annotation.varargs`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org