dch nguyen created SPARK-37930: ---------------------------------- Summary: Fix DataFrame select subset with duplicated columns Key: SPARK-37930 URL: https://issues.apache.org/jira/browse/SPARK-37930 Project: Spark Issue Type: Bug Components: PySpark Affects Versions: 3.3.0 Reporter: dch nguyen
pandas {code:java} >>> pdf a 0 1 1 2 2 3 3 4 >>> pdf[['a', 'a']] a a 0 1 1 1 2 2 2 3 3 3 4 4 {code} pandas on spark {code:java} >>> psdf a 0 1 1 2 2 3 3 4 >>> psdf[['a', 'a']] Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/u02/spark/python/pyspark/pandas/frame.py", line 12077, in __repr__ pdf = self._get_or_create_repr_pandas_cache(max_display_count) File "/u02/spark/python/pyspark/pandas/frame.py", line 12068, in _get_or_create_repr_pandas_cache self, "_repr_pandas_cache", {n: self.head(n + 1)._to_internal_pandas()} File "/u02/spark/python/pyspark/pandas/frame.py", line 12063, in _to_internal_pandas return self._internal.to_pandas_frame File "/u02/spark/python/pyspark/pandas/utils.py", line 576, in wrapped_lazy_property setattr(self, attr_name, fn(self)) File "/u02/spark/python/pyspark/pandas/internal.py", line 1055, in to_pandas_frame return InternalFrame.restore_index(pdf, **self.arguments_for_restore_index) File "/u02/spark/python/pyspark/pandas/internal.py", line 1156, in restore_index pdf.columns = pd.Index( File "/u02/venv3.9-2/lib/python3.9/site-packages/pandas/core/generic.py", line 5500, in __setattr__ return object.__setattr__(self, name, value) File "pandas/_libs/properties.pyx", line 70, in pandas._libs.properties.AxisProperty.__set__ File "/u02/venv3.9-2/lib/python3.9/site-packages/pandas/core/generic.py", line 766, in _set_axis self._mgr.set_axis(axis, labels) File "/u02/venv3.9-2/lib/python3.9/site-packages/pandas/core/internals/managers.py", line 216, in set_axis self._validate_set_axis(axis, new_labels) File "/u02/venv3.9-2/lib/python3.9/site-packages/pandas/core/internals/base.py", line 57, in _validate_set_axis raise ValueError( ValueError: Length mismatch: Expected axis has 4 elements, new values have 2 elements {code} -- This message was sent by Atlassian Jira (v8.20.1#820001) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org