Repository: spark Updated Branches: refs/heads/branch-2.1 def3690f6 -> ec0d6e21e
[DOC] bucketing is applicable to all file-based data sources ## What changes were proposed in this pull request? Starting Spark 2.1.0, bucketing feature is available for all file-based data sources. This patch fixes some function docs that haven't yet been updated to reflect that. ## How was this patch tested? N/A Author: Reynold Xin <r...@databricks.com> Closes #16349 from rxin/ds-doc. (cherry picked from commit 2e861df96eacd821edbbd9883121bff67611074f) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ec0d6e21 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ec0d6e21 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ec0d6e21 Branch: refs/heads/branch-2.1 Commit: ec0d6e21ed85164fd7eb519ec1d017497122c55c Parents: def3690 Author: Reynold Xin <r...@databricks.com> Authored: Wed Dec 21 23:46:33 2016 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Wed Dec 21 23:46:38 2016 -0800 ---------------------------------------------------------------------- .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ec0d6e21/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index fa8e8cb..44c407d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -150,7 +150,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * predicates on the partitioned columns. In order for partitioning to work well, the number * of distinct values in each column should typically be less than tens of thousands. * - * This was initially applicable for Parquet but in 1.5+ covers JSON, text, ORC and avro as well. + * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0. * * @since 1.4.0 */ @@ -164,7 +164,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * Buckets the output by the given columns. If specified, the output is laid out on the file * system similar to Hive's bucketing scheme. * - * This is applicable for Parquet, JSON and ORC. + * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0. * * @since 2.0 */ @@ -178,7 +178,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { /** * Sorts the output in each bucket by the given columns. * - * This is applicable for Parquet, JSON and ORC. + * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0. * * @since 2.0 */ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org