gene-db commented on code in PR #46122:
URL: https://github.com/apache/spark/pull/46122#discussion_r1571377546
##########
python/pyspark/sql/variant_utils.py:
##########
@@ -86,19 +88,40 @@ class VariantUtils:
DECIMAL8 = 9
# 16-byte decimal. Content is 1-byte scale + 16-byte little-endian signed
integer.
DECIMAL16 = 10
+ # Date value. Content is 4-byte little-endian signed integer that
represents the number of days
+ # from the Unix epoch.
+ DATE = 11
+ # Timestamp value. Content is 8-byte little-endian signed integer that
represents the number of
+ # microseconds elapsed since the Unix epoch, 1970-01-01 00:00:00 UTC. It
is displayed to users in
+ # their local time zones and may be displayed differently depending on the
execution environment.
+ TIMESTAMP = 12
+ # Timestamp_ntz value. It has the same content as `TIMESTAMP` but should
always be interpreted
+ # as if the local time zone is UTC.
+ TIMESTAMP_NTZ = 13
+ # 4-byte IEEE float.
+ FLOAT = 14
+ # Binary value. The content is (4-byte little-endian unsigned integer
representing the binary
+ # size) + (size bytes of binary content).
+ BINARY = 15
# Long string value. The content is (4-byte little-endian unsigned integer
representing the
# string size) + (size bytes of string content).
LONG_STR = 16
U32_SIZE = 4
+ EPOCH = datetime.datetime(year = 1970, month = 1, day = 1, hour = 0,
minute = 0, second = 0,
+ tzinfo = datetime.timezone.utc)
+ EPOCH_NTZ = datetime.datetime(year = 1970, month = 1, day = 1, hour = 0,
minute = 0, second = 0)
+
+ # The valid zone ids can be found here:
+ # https://gist.github.com/heyalexej/8bf688fd67d7199be4a1682b3eec7568
@classmethod
- def to_json(cls, value: bytes, metadata: bytes) -> str:
+ def to_json(cls, value: bytes, metadata: bytes, zone_id: str = "UTC") ->
str:
"""
Convert the VariantVal to a JSON string.
Review Comment:
Can you update these comments to mention the `zone_id`, and that UTC is the
default behavior?
##########
python/pyspark/sql/variant_utils.py:
##########
@@ -86,19 +88,40 @@ class VariantUtils:
DECIMAL8 = 9
# 16-byte decimal. Content is 1-byte scale + 16-byte little-endian signed
integer.
DECIMAL16 = 10
+ # Date value. Content is 4-byte little-endian signed integer that
represents the number of days
+ # from the Unix epoch.
+ DATE = 11
+ # Timestamp value. Content is 8-byte little-endian signed integer that
represents the number of
+ # microseconds elapsed since the Unix epoch, 1970-01-01 00:00:00 UTC. It
is displayed to users in
+ # their local time zones and may be displayed differently depending on the
execution environment.
+ TIMESTAMP = 12
+ # Timestamp_ntz value. It has the same content as `TIMESTAMP` but should
always be interpreted
+ # as if the local time zone is UTC.
+ TIMESTAMP_NTZ = 13
+ # 4-byte IEEE float.
+ FLOAT = 14
+ # Binary value. The content is (4-byte little-endian unsigned integer
representing the binary
+ # size) + (size bytes of binary content).
+ BINARY = 15
# Long string value. The content is (4-byte little-endian unsigned integer
representing the
# string size) + (size bytes of string content).
LONG_STR = 16
U32_SIZE = 4
+ EPOCH = datetime.datetime(year = 1970, month = 1, day = 1, hour = 0,
minute = 0, second = 0,
+ tzinfo = datetime.timezone.utc)
+ EPOCH_NTZ = datetime.datetime(year = 1970, month = 1, day = 1, hour = 0,
minute = 0, second = 0)
+
+ # The valid zone ids can be found here:
+ # https://gist.github.com/heyalexej/8bf688fd67d7199be4a1682b3eec7568
@classmethod
- def to_json(cls, value: bytes, metadata: bytes) -> str:
+ def to_json(cls, value: bytes, metadata: bytes, zone_id: str = "UTC") ->
str:
Review Comment:
This reminds me, in `sql/types.py`, we have a `VariantVal` which calls this
`to_json`. However, I think just `__str__` calls `to_json`. Should we add a
`toJson()` which can take in an optional `zone_id`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]