HyukjinKwon commented on code in PR #46921:
URL: https://github.com/apache/spark/pull/46921#discussion_r1633926772
##########
python/pyspark/sql/streaming/listener.py:
##########
@@ -795,10 +825,22 @@ def prettyJson(self) -> str:
def __str__(self) -> str:
return self.prettyJson
+ def __repr__(self) -> str:
+ return self.prettyJson
+
+ def __getitem__(self, key):
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ internal_key = "_" + key
+ setattr(self, internal_key, value)
+
class SourceProgress:
"""
.. versionadded:: 3.4.0
Review Comment:
```suggestion
.. versionadded:: 3.4.0
```
##########
python/pyspark/sql/streaming/listener.py:
##########
@@ -644,11 +670,15 @@ def prettyJson(self) -> str:
def __str__(self) -> str:
return self.prettyJson
+ def __repr__(self) -> str:
+ return self.prettyJson
+
-class StateOperatorProgress:
+class StateOperatorProgress(dict):
"""
.. versionadded:: 3.4.0
Review Comment:
```
.. versionchanged:: 4.0.0
Becomes a subclass of dict
```
seems missing here.
##########
python/pyspark/sql/streaming/listener.py:
##########
@@ -500,6 +504,19 @@ def fromJson(cls, j: Dict[str, Any]) ->
"StreamingQueryProgress":
else {},
)
+ def __getitem__(self, key):
+ # Before Spark 4.0, StreamingQuery.lastProgress returns a dict, which
casts id and runId
+ # to string. But here they are UUID.
+ # To prevent breaking change, also cast them to string when accessed
with __getitem__.
+ if key == "id" or key == "runId":
+ return str(getattr(self, key))
+ else:
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
Review Comment:
Hmmm .. but the end users can't access to this value if I am reading this
correctly?
##########
python/pyspark/sql/streaming/listener.py:
##########
@@ -935,10 +977,22 @@ def prettyJson(self) -> str:
def __str__(self) -> str:
return self.prettyJson
+ def __repr__(self) -> str:
+ return self.prettyJson
+
+ def __getitem__(self, key):
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ internal_key = "_" + key
+ setattr(self, internal_key, value)
+
class SinkProgress:
"""
.. versionadded:: 3.4.0
+ .. versionchanged:: 4.0.0
Review Comment:
```suggestion
.. versionchanged:: 4.0.0
```
##########
python/pyspark/sql/streaming/listener.py:
##########
@@ -397,9 +397,11 @@ def errorClassOnException(self) -> Optional[str]:
return self._errorClassOnException
-class StreamingQueryProgress:
+class StreamingQueryProgress(dict):
"""
.. versionadded:: 3.4.0
Review Comment:
```suggestion
.. versionadded:: 3.4.0
```
##########
python/pyspark/sql/streaming/listener.py:
##########
@@ -795,10 +825,22 @@ def prettyJson(self) -> str:
def __str__(self) -> str:
return self.prettyJson
+ def __repr__(self) -> str:
+ return self.prettyJson
+
+ def __getitem__(self, key):
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ internal_key = "_" + key
+ setattr(self, internal_key, value)
+
class SourceProgress:
Review Comment:
This seems not a dict(?)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]