This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 76e8fe9 ARROW-4867: [Python] Respect ordering of columns argument
passed to Table.from_pandas
76e8fe9 is described below
commit 76e8fe98d9d61a58ed706c448697e8474fabd30f
Author: Wes McKinney <[email protected]>
AuthorDate: Sat Mar 16 17:33:26 2019 +0100
ARROW-4867: [Python] Respect ordering of columns argument passed to
Table.from_pandas
I read through the discussion on ARROW-3766 where this change was
originally made, and I think there was a misunderstanding about a comment I
made where I said "The columns argument in Table.from_pandas is just for column
filtering". I admit it's a big ambiguous what's the right thing to do, but it
seems like the user intent of passing `columns` is to use that order in the
resulting schema, but not error on columns that are not found. We could also
introduce "null" type columns for "n [...]
Author: Wes McKinney <[email protected]>
Closes #3930 from wesm/ARROW-4867 and squashes the following commits:
4b4ad64e <Wes McKinney> Respect ordering of columns argument passed to
Table.from_pandas
---
python/pyarrow/pandas_compat.py | 4 +---
python/pyarrow/tests/test_convert_pandas.py | 9 +++++----
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 10038de..90a0ad6 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -411,9 +411,7 @@ def _resolve_columns_of_interest(df, schema, columns):
elif schema is not None:
columns = schema.names
elif columns is not None:
- # columns is only for filtering, the function must keep the column
- # ordering of either the dataframe or the passed schema
- columns = [c for c in df.columns if c in columns]
+ columns = [c for c in columns if c in df.columns]
else:
columns = df.columns
diff --git a/python/pyarrow/tests/test_convert_pandas.py
b/python/pyarrow/tests/test_convert_pandas.py
index 16e2a76..0aab444 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -140,7 +140,8 @@ class TestConvertMetadata(object):
assert table.column(0).name == '0'
def test_from_pandas_with_columns(self):
- df = pd.DataFrame({0: [1, 2, 3], 1: [1, 3, 3], 2: [2, 4, 5]})
+ df = pd.DataFrame({0: [1, 2, 3], 1: [1, 3, 3], 2: [2, 4, 5]},
+ columns=[1, 0])
table = pa.Table.from_pandas(df, columns=[0, 1])
expected = pa.Table.from_pandas(df[[0, 1]])
@@ -2495,15 +2496,15 @@ def
test_table_from_pandas_columns_argument_only_does_filtering():
columns1 = ['arrays', 'floats', 'partition']
schema1 = pa.schema([
- ('partition', pa.int64()),
('arrays', pa.list_(pa.int64())),
('floats', pa.float64()),
+ ('partition', pa.int64())
])
columns2 = ['floats', 'partition']
schema2 = pa.schema([
- ('partition', pa.int64()),
- ('floats', pa.float64())
+ ('floats', pa.float64()),
+ ('partition', pa.int64())
])
table1 = pa.Table.from_pandas(df, columns=columns1, preserve_index=False)