This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 67bd52ce060e [SPARK-47619][PYTHON][DOCS] Refine docstring of `to_json/from_json` 67bd52ce060e is described below commit 67bd52ce060e86ff8582c4e7e40fa9c924e73fc5 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Thu Mar 28 13:48:57 2024 +0900 [SPARK-47619][PYTHON][DOCS] Refine docstring of `to_json/from_json` ### What changes were proposed in this pull request? This PR refines docstring of `to_json/from_json` with more descriptive examples. ### Why are the changes needed? For better API reference documentation. ### Does this PR introduce _any_ user-facing change? Yes, it fixes user-facing documentation. ### How was this patch tested? Manually tested. GitHub Actions should verify them. ### Was this patch authored or co-authored using generative AI tooling? No Closes #45742 from HyukjinKwon/SPARK-47619. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/functions/builtin.py | 147 ++++++++++++++++++++++++-------- 1 file changed, 110 insertions(+), 37 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 99a2375965c2..59167ad9e736 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -15066,29 +15066,66 @@ def from_json( Examples -------- - >>> from pyspark.sql.types import * - >>> data = [(1, '''{"a": 1}''')] + Example 1: Parsing JSON with a specified schema + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql.types import StructType, StructField, IntegerType >>> schema = StructType([StructField("a", IntegerType())]) - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(from_json(df.value, schema).alias("json")).collect() - [Row(json=Row(a=1))] - >>> df.select(from_json(df.value, "a INT").alias("json")).collect() - [Row(json=Row(a=1))] - >>> df.select(from_json(df.value, "MAP<STRING,INT>").alias("json")).collect() - [Row(json={'a': 1})] - >>> data = [(1, '''[{"a": 1}]''')] + >>> df = spark.createDataFrame([(1, '''{"a": 1}''')], ("key", "value")) + >>> df.select(sf.from_json(df.value, schema).alias("json")).show() + +----+ + |json| + +----+ + | {1}| + +----+ + + Example 2: Parsing JSON with a DDL-formatted string. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1, '''{"a": 1}''')], ("key", "value")) + >>> df.select(sf.from_json(df.value, "a INT").alias("json")).show() + +----+ + |json| + +----+ + | {1}| + +----+ + + Example 3: Parsing JSON into a MapType + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1, '''{"a": 1}''')], ("key", "value")) + >>> df.select(sf.from_json(df.value, "MAP<STRING,INT>").alias("json")).show() + +--------+ + | json| + +--------+ + |{a -> 1}| + +--------+ + + Example 4: Parsing JSON into an ArrayType of StructType + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql.types import ArrayType, StructType, StructField, IntegerType >>> schema = ArrayType(StructType([StructField("a", IntegerType())])) - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(from_json(df.value, schema).alias("json")).collect() - [Row(json=[Row(a=1)])] - >>> schema = schema_of_json(lit('''{"a": 0}''')) - >>> df.select(from_json(df.value, schema).alias("json")).collect() - [Row(json=Row(a=None))] - >>> data = [(1, '''[1, 2, 3]''')] + >>> df = spark.createDataFrame([(1, '''[{"a": 1}]''')], ("key", "value")) + >>> df.select(sf.from_json(df.value, schema).alias("json")).show() + +-----+ + | json| + +-----+ + |[{1}]| + +-----+ + + Example 5: Parsing JSON into an ArrayType + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql.types import ArrayType, IntegerType >>> schema = ArrayType(IntegerType()) - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(from_json(df.value, schema).alias("json")).collect() - [Row(json=[1, 2, 3])] + >>> df = spark.createDataFrame([(1, '''[1, 2, 3]''')], ("key", "value")) + >>> df.select(sf.from_json(df.value, schema).alias("json")).show() + +---------+ + | json| + +---------+ + |[1, 2, 3]| + +---------+ """ if isinstance(schema, DataType): @@ -15129,28 +15166,64 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co Examples -------- + Example 1: Converting a StructType column to JSON + + >>> import pyspark.sql.functions as sf >>> from pyspark.sql import Row - >>> from pyspark.sql.types import * >>> data = [(1, Row(age=2, name='Alice'))] >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(to_json(df.value).alias("json")).collect() - [Row(json='{"age":2,"name":"Alice"}')] + >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False) + +------------------------+ + |json | + +------------------------+ + |{"age":2,"name":"Alice"}| + +------------------------+ + + Example 2: Converting an ArrayType column to JSON + + >>> import pyspark.sql.functions as sf + >>> from pyspark.sql import Row >>> data = [(1, [Row(age=2, name='Alice'), Row(age=3, name='Bob')])] >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(to_json(df.value).alias("json")).collect() - [Row(json='[{"age":2,"name":"Alice"},{"age":3,"name":"Bob"}]')] - >>> data = [(1, {"name": "Alice"})] - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(to_json(df.value).alias("json")).collect() - [Row(json='{"name":"Alice"}')] - >>> data = [(1, [{"name": "Alice"}, {"name": "Bob"}])] - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(to_json(df.value).alias("json")).collect() - [Row(json='[{"name":"Alice"},{"name":"Bob"}]')] - >>> data = [(1, ["Alice", "Bob"])] - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(to_json(df.value).alias("json")).collect() - [Row(json='["Alice","Bob"]')] + >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False) + +-------------------------------------------------+ + |json | + +-------------------------------------------------+ + |[{"age":2,"name":"Alice"},{"age":3,"name":"Bob"}]| + +-------------------------------------------------+ + + Example 3: Converting a MapType column to JSON + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1, {"name": "Alice"})], ("key", "value")) + >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False) + +----------------+ + |json | + +----------------+ + |{"name":"Alice"}| + +----------------+ + + Example 4: Converting a nested MapType column to JSON + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1, [{"name": "Alice"}, {"name": "Bob"}])], ("key", "value")) + >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False) + +---------------------------------+ + |json | + +---------------------------------+ + |[{"name":"Alice"},{"name":"Bob"}]| + +---------------------------------+ + + Example 5: Converting a simple ArrayType column to JSON + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1, ["Alice", "Bob"])], ("key", "value")) + >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False) + +---------------+ + |json | + +---------------+ + |["Alice","Bob"]| + +---------------+ """ return _invoke_function("to_json", _to_java_column(col), _options_to_str(options)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org