itholic commented on code in PR #42956:
URL: https://github.com/apache/spark/pull/42956#discussion_r1328217955
##########
python/pyspark/pandas/tests/connect/test_parity_internal.py:
##########
@@ -15,18 +15,86 @@
# limitations under the License.
#
import unittest
+import pandas as pd
from pyspark.pandas.tests.test_internal import InternalFrameTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.pandas.internal import (
+ InternalFrame,
+ SPARK_DEFAULT_INDEX_NAME,
+ SPARK_INDEX_NAME_FORMAT,
+)
+from pyspark.pandas.utils import spark_column_equals
class InternalFrameParityTests(
InternalFrameTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
):
- @unittest.skip("TODO(SPARK-43654): Enable
InternalFrameParityTests.test_from_pandas.")
def test_from_pandas(self):
- super().test_from_pandas()
+ pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
Review Comment:
> what about simplify the tests by comparing the column string
representations
Yeah, this is exactly what we're doing currently for `spark_column_equals`:
```python
if is_remote():
# Hide unrelated codes
return repr(left) == repr(right)
else:
return left._jc.equals(right._jc)
```
But it's not working for the case comparing
`internal.spark_column_for(("a",))` and `sdf["a"]` because they have different
string representations for some reason as below:
```python
import pandas as pd
from pyspark.pandas.internal import InternalFrame
internal = InternalFrame.from_pandas(pdf)
sdf = internal.spark_frame
pdf = pd.DataFrame({"a": [1, 2, 3]})
internal = InternalFrame.from_pandas(pdf)
sdf = internal.spark_frame
repr(internal.spark_column_for(("a",)))
# "Column<'`a`'>"
repr(sdf["a"])
# "Column<'a'>"
```
Do you happen to have any idea why the backtick surrounds the Column name in
Spark Connect?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]