This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 71a2f48 [SPARK-34760][EXAMPLES] Replace `favorite_color` with `age` in JavaSQLDataSourceExample 71a2f48 is described below commit 71a2f48c6b61822f775e9603ef604187c9e0d081 Author: zengruios <578395...@qq.com> AuthorDate: Thu Mar 18 22:53:58 2021 +0800 [SPARK-34760][EXAMPLES] Replace `favorite_color` with `age` in JavaSQLDataSourceExample ### What changes were proposed in this pull request? In JavaSparkSQLExample when excecute 'peopleDF.write().partitionBy("favorite_color").bucketBy(42,"name").saveAsTable("people_partitioned_bucketed");' throws Exception: 'Exception in thread "main" org.apache.spark.sql.AnalysisException: partition column favorite_color is not defined in table people_partitioned_bucketed, defined table columns are: age, name;' Change the column favorite_color to age. ### Why are the changes needed? Run JavaSparkSQLExample successfully. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? test in JavaSparkSQLExample . Closes #31851 from zengruios/SPARK-34760. Authored-by: zengruios <578395...@qq.com> Signed-off-by: Kent Yao <y...@apache.org> (cherry picked from commit 5570f817b2862c2680546f35c412bb06779ae1c9) Signed-off-by: Kent Yao <y...@apache.org> --- .../spark/examples/sql/JavaSQLDataSourceExample.java | 6 +++--- .../apache/spark/examples/sql/JavaSparkSQLExample.java | 16 ++++++++-------- examples/src/main/python/sql/datasource.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java index 2295225..f4d4329 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java @@ -188,15 +188,15 @@ public class JavaSQLDataSourceExample { .save("namesPartByColor.parquet"); // $example off:write_partitioning$ // $example on:write_partition_and_bucket$ - peopleDF + usersDF .write() .partitionBy("favorite_color") .bucketBy(42, "name") - .saveAsTable("people_partitioned_bucketed"); + .saveAsTable("users_partitioned_bucketed"); // $example off:write_partition_and_bucket$ spark.sql("DROP TABLE IF EXISTS people_bucketed"); - spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed"); + spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed"); } private static void runBasicParquetExample(SparkSession spark) { diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java index 8605852..86a9045 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java @@ -65,7 +65,7 @@ public class JavaSparkSQLExample { // $example on:create_ds$ public static class Person implements Serializable { private String name; - private int age; + private long age; public String getName() { return name; @@ -75,11 +75,11 @@ public class JavaSparkSQLExample { this.name = name; } - public int getAge() { + public long getAge() { return age; } - public void setAge(int age) { + public void setAge(long age) { this.age = age; } } @@ -225,11 +225,11 @@ public class JavaSparkSQLExample { // +---+----+ // Encoders for most common types are provided in class Encoders - Encoder<Integer> integerEncoder = Encoders.INT(); - Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder); - Dataset<Integer> transformedDS = primitiveDS.map( - (MapFunction<Integer, Integer>) value -> value + 1, - integerEncoder); + Encoder<Long> longEncoder = Encoders.LONG(); + Dataset<Long> primitiveDS = spark.createDataset(Arrays.asList(1L, 2L, 3L), longEncoder); + Dataset<Long> transformedDS = primitiveDS.map( + (MapFunction<Long, Long>) value -> value + 1L, + longEncoder); transformedDS.collect(); // Returns [2, 3, 4] // DataFrames can be converted to a Dataset by providing a class. Mapping based on name diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py index 9f8fdd7..29bea14 100644 --- a/examples/src/main/python/sql/datasource.py +++ b/examples/src/main/python/sql/datasource.py @@ -86,7 +86,7 @@ def basic_datasource_example(spark): .write .partitionBy("favorite_color") .bucketBy(42, "name") - .saveAsTable("people_partitioned_bucketed")) + .saveAsTable("users_partitioned_bucketed")) # $example off:write_partition_and_bucket$ # $example on:manual_load_options$ @@ -117,7 +117,7 @@ def basic_datasource_example(spark): # $example off:direct_sql$ spark.sql("DROP TABLE IF EXISTS people_bucketed") - spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed") + spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed") def parquet_example(spark): -