This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 67bd52ce060e [SPARK-47619][PYTHON][DOCS] Refine docstring of
`to_json/from_json`
67bd52ce060e is described below
commit 67bd52ce060e86ff8582c4e7e40fa9c924e73fc5
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Mar 28 13:48:57 2024 +0900
[SPARK-47619][PYTHON][DOCS] Refine docstring of `to_json/from_json`
### What changes were proposed in this pull request?
This PR refines docstring of `to_json/from_json` with more descriptive
examples.
### Why are the changes needed?
For better API reference documentation.
### Does this PR introduce _any_ user-facing change?
Yes, it fixes user-facing documentation.
### How was this patch tested?
Manually tested. GitHub Actions should verify them.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #45742 from HyukjinKwon/SPARK-47619.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/functions/builtin.py | 147 ++++++++++++++++++++++++--------
1 file changed, 110 insertions(+), 37 deletions(-)
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index 99a2375965c2..59167ad9e736 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -15066,29 +15066,66 @@ def from_json(
Examples
--------
- >>> from pyspark.sql.types import *
- >>> data = [(1, '''{"a": 1}''')]
+ Example 1: Parsing JSON with a specified schema
+
+ >>> import pyspark.sql.functions as sf
+ >>> from pyspark.sql.types import StructType, StructField, IntegerType
>>> schema = StructType([StructField("a", IntegerType())])
- >>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(from_json(df.value, schema).alias("json")).collect()
- [Row(json=Row(a=1))]
- >>> df.select(from_json(df.value, "a INT").alias("json")).collect()
- [Row(json=Row(a=1))]
- >>> df.select(from_json(df.value,
"MAP<STRING,INT>").alias("json")).collect()
- [Row(json={'a': 1})]
- >>> data = [(1, '''[{"a": 1}]''')]
+ >>> df = spark.createDataFrame([(1, '''{"a": 1}''')], ("key", "value"))
+ >>> df.select(sf.from_json(df.value, schema).alias("json")).show()
+ +----+
+ |json|
+ +----+
+ | {1}|
+ +----+
+
+ Example 2: Parsing JSON with a DDL-formatted string.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(1, '''{"a": 1}''')], ("key", "value"))
+ >>> df.select(sf.from_json(df.value, "a INT").alias("json")).show()
+ +----+
+ |json|
+ +----+
+ | {1}|
+ +----+
+
+ Example 3: Parsing JSON into a MapType
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(1, '''{"a": 1}''')], ("key", "value"))
+ >>> df.select(sf.from_json(df.value,
"MAP<STRING,INT>").alias("json")).show()
+ +--------+
+ | json|
+ +--------+
+ |{a -> 1}|
+ +--------+
+
+ Example 4: Parsing JSON into an ArrayType of StructType
+
+ >>> import pyspark.sql.functions as sf
+ >>> from pyspark.sql.types import ArrayType, StructType, StructField,
IntegerType
>>> schema = ArrayType(StructType([StructField("a", IntegerType())]))
- >>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(from_json(df.value, schema).alias("json")).collect()
- [Row(json=[Row(a=1)])]
- >>> schema = schema_of_json(lit('''{"a": 0}'''))
- >>> df.select(from_json(df.value, schema).alias("json")).collect()
- [Row(json=Row(a=None))]
- >>> data = [(1, '''[1, 2, 3]''')]
+ >>> df = spark.createDataFrame([(1, '''[{"a": 1}]''')], ("key", "value"))
+ >>> df.select(sf.from_json(df.value, schema).alias("json")).show()
+ +-----+
+ | json|
+ +-----+
+ |[{1}]|
+ +-----+
+
+ Example 5: Parsing JSON into an ArrayType
+
+ >>> import pyspark.sql.functions as sf
+ >>> from pyspark.sql.types import ArrayType, IntegerType
>>> schema = ArrayType(IntegerType())
- >>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(from_json(df.value, schema).alias("json")).collect()
- [Row(json=[1, 2, 3])]
+ >>> df = spark.createDataFrame([(1, '''[1, 2, 3]''')], ("key", "value"))
+ >>> df.select(sf.from_json(df.value, schema).alias("json")).show()
+ +---------+
+ | json|
+ +---------+
+ |[1, 2, 3]|
+ +---------+
"""
if isinstance(schema, DataType):
@@ -15129,28 +15166,64 @@ def to_json(col: "ColumnOrName", options:
Optional[Dict[str, str]] = None) -> Co
Examples
--------
+ Example 1: Converting a StructType column to JSON
+
+ >>> import pyspark.sql.functions as sf
>>> from pyspark.sql import Row
- >>> from pyspark.sql.types import *
>>> data = [(1, Row(age=2, name='Alice'))]
>>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(to_json(df.value).alias("json")).collect()
- [Row(json='{"age":2,"name":"Alice"}')]
+ >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False)
+ +------------------------+
+ |json |
+ +------------------------+
+ |{"age":2,"name":"Alice"}|
+ +------------------------+
+
+ Example 2: Converting an ArrayType column to JSON
+
+ >>> import pyspark.sql.functions as sf
+ >>> from pyspark.sql import Row
>>> data = [(1, [Row(age=2, name='Alice'), Row(age=3, name='Bob')])]
>>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(to_json(df.value).alias("json")).collect()
- [Row(json='[{"age":2,"name":"Alice"},{"age":3,"name":"Bob"}]')]
- >>> data = [(1, {"name": "Alice"})]
- >>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(to_json(df.value).alias("json")).collect()
- [Row(json='{"name":"Alice"}')]
- >>> data = [(1, [{"name": "Alice"}, {"name": "Bob"}])]
- >>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(to_json(df.value).alias("json")).collect()
- [Row(json='[{"name":"Alice"},{"name":"Bob"}]')]
- >>> data = [(1, ["Alice", "Bob"])]
- >>> df = spark.createDataFrame(data, ("key", "value"))
- >>> df.select(to_json(df.value).alias("json")).collect()
- [Row(json='["Alice","Bob"]')]
+ >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False)
+ +-------------------------------------------------+
+ |json |
+ +-------------------------------------------------+
+ |[{"age":2,"name":"Alice"},{"age":3,"name":"Bob"}]|
+ +-------------------------------------------------+
+
+ Example 3: Converting a MapType column to JSON
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(1, {"name": "Alice"})], ("key", "value"))
+ >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False)
+ +----------------+
+ |json |
+ +----------------+
+ |{"name":"Alice"}|
+ +----------------+
+
+ Example 4: Converting a nested MapType column to JSON
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(1, [{"name": "Alice"}, {"name":
"Bob"}])], ("key", "value"))
+ >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False)
+ +---------------------------------+
+ |json |
+ +---------------------------------+
+ |[{"name":"Alice"},{"name":"Bob"}]|
+ +---------------------------------+
+
+ Example 5: Converting a simple ArrayType column to JSON
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(1, ["Alice", "Bob"])], ("key", "value"))
+ >>> df.select(sf.to_json(df.value).alias("json")).show(truncate=False)
+ +---------------+
+ |json |
+ +---------------+
+ |["Alice","Bob"]|
+ +---------------+
"""
return _invoke_function("to_json", _to_java_column(col),
_options_to_str(options))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]