This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new a00235b [SPARK-34760][EXAMPLES] Replace `favorite_color` with `age` in JavaSQLDataSourceExample a00235b is described below commit a00235b004ceb345e599096f176b2176b0501ea4 Author: zengruios <578395...@qq.com> AuthorDate: Thu Mar 18 22:53:58 2021 +0800 [SPARK-34760][EXAMPLES] Replace `favorite_color` with `age` in JavaSQLDataSourceExample ### What changes were proposed in this pull request? In JavaSparkSQLExample when excecute 'peopleDF.write().partitionBy("favorite_color").bucketBy(42,"name").saveAsTable("people_partitioned_bucketed");' throws Exception: 'Exception in thread "main" org.apache.spark.sql.AnalysisException: partition column favorite_color is not defined in table people_partitioned_bucketed, defined table columns are: age, name;' Change the column favorite_color to age. ### Why are the changes needed? Run JavaSparkSQLExample successfully. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? test in JavaSparkSQLExample . Closes #31851 from zengruios/SPARK-34760. Authored-by: zengruios <578395...@qq.com> Signed-off-by: Kent Yao <y...@apache.org> (cherry picked from commit 5570f817b2862c2680546f35c412bb06779ae1c9) Signed-off-by: Kent Yao <y...@apache.org> --- .../spark/examples/sql/JavaSQLDataSourceExample.java | 6 +++--- .../apache/spark/examples/sql/JavaSparkSQLExample.java | 16 ++++++++-------- examples/src/main/python/sql/datasource.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java index 46e740d..cb34db1 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java @@ -204,15 +204,15 @@ public class JavaSQLDataSourceExample { .save("namesPartByColor.parquet"); // $example off:write_partitioning$ // $example on:write_partition_and_bucket$ - peopleDF + usersDF .write() .partitionBy("favorite_color") .bucketBy(42, "name") - .saveAsTable("people_partitioned_bucketed"); + .saveAsTable("users_partitioned_bucketed"); // $example off:write_partition_and_bucket$ spark.sql("DROP TABLE IF EXISTS people_bucketed"); - spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed"); + spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed"); } private static void runBasicParquetExample(SparkSession spark) { diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java index 8605852..86a9045 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java @@ -65,7 +65,7 @@ public class JavaSparkSQLExample { // $example on:create_ds$ public static class Person implements Serializable { private String name; - private int age; + private long age; public String getName() { return name; @@ -75,11 +75,11 @@ public class JavaSparkSQLExample { this.name = name; } - public int getAge() { + public long getAge() { return age; } - public void setAge(int age) { + public void setAge(long age) { this.age = age; } } @@ -225,11 +225,11 @@ public class JavaSparkSQLExample { // +---+----+ // Encoders for most common types are provided in class Encoders - Encoder<Integer> integerEncoder = Encoders.INT(); - Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder); - Dataset<Integer> transformedDS = primitiveDS.map( - (MapFunction<Integer, Integer>) value -> value + 1, - integerEncoder); + Encoder<Long> longEncoder = Encoders.LONG(); + Dataset<Long> primitiveDS = spark.createDataset(Arrays.asList(1L, 2L, 3L), longEncoder); + Dataset<Long> transformedDS = primitiveDS.map( + (MapFunction<Long, Long>) value -> value + 1L, + longEncoder); transformedDS.collect(); // Returns [2, 3, 4] // DataFrames can be converted to a Dataset by providing a class. Mapping based on name diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py index 8c146ba..3bc31a0 100644 --- a/examples/src/main/python/sql/datasource.py +++ b/examples/src/main/python/sql/datasource.py @@ -104,7 +104,7 @@ def basic_datasource_example(spark): .write .partitionBy("favorite_color") .bucketBy(42, "name") - .saveAsTable("people_partitioned_bucketed")) + .saveAsTable("users_partitioned_bucketed")) # $example off:write_partition_and_bucket$ # $example on:manual_load_options$ @@ -135,7 +135,7 @@ def basic_datasource_example(spark): # $example off:direct_sql$ spark.sql("DROP TABLE IF EXISTS people_bucketed") - spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed") + spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed") def parquet_example(spark): -