This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1140a53f51 GH-34703: [Python] Set copy=False explicitly when creating
a pandas Series (#34593)
1140a53f51 is described below
commit 1140a53f51a35c37008d97d43b2d00cff4ae2caa
Author: Patrick Hoefler <[email protected]>
AuthorDate: Thu Mar 23 07:07:18 2023 -0400
GH-34703: [Python] Set copy=False explicitly when creating a pandas Series
(#34593)
### Rationale for this change
pandas will change the default for creating a Series from an array (numpy,
arrow, ...) to copy=True when Copy-on-Write is enabled. To avoid this when
using it internally, we have to specify copy=False explicitly.
### What changes are included in this PR?
Setting copy=False when creating a Series
### Are these changes tested?
This is equivalent to the current default behavior, so no reason to add any
additional tests.
### Are there any user-facing changes?
no
cc @ jorisvandenbossche
* Closes: #34703
Authored-by: Patrick Hoefler <[email protected]>
Signed-off-by: Alenka Frim <[email protected]>
---
python/pyarrow/array.pxi | 6 +++---
python/pyarrow/pandas_compat.py | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 4f8f9b6bf9..778d59058f 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1664,7 +1664,7 @@ cdef _array_like_to_pandas(obj, options, types_mapper):
else:
dtype = None
- result = pandas_api.series(arr, dtype=dtype, name=name)
+ result = pandas_api.series(arr, dtype=dtype, name=name, copy=False)
if (isinstance(original_type, TimestampType) and
original_type.tz is not None and
@@ -3065,7 +3065,7 @@ cdef class ExtensionArray(Array):
# pandas ExtensionDtype that implements conversion from pyarrow
if hasattr(pandas_dtype, '__from_arrow__'):
arr = pandas_dtype.__from_arrow__(self)
- return pandas_api.series(arr)
+ return pandas_api.series(arr, copy=False)
# otherwise convert the storage array with the base implementation
return Array._to_pandas(self.storage, options, **kwargs)
@@ -3169,7 +3169,7 @@ cdef object get_values(object obj, bint* is_series):
result = obj
is_series[0] = False
else:
- result = pandas_api.series(obj).values
+ result = pandas_api.series(obj, copy=False).values
is_series[0] = False
return result
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index a6de60e87b..00e8613717 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -1015,9 +1015,9 @@ def _extract_index_level(table, result_table, field_name,
values = values.copy()
if isinstance(col.type, pa.lib.TimestampType) and col.type.tz is not None:
- index_level = make_tz_aware(pd.Series(values), col.type.tz)
+ index_level = make_tz_aware(pd.Series(values, copy=False), col.type.tz)
else:
- index_level = pd.Series(values, dtype=values.dtype)
+ index_level = pd.Series(values, dtype=values.dtype, copy=False)
result_table = result_table.remove_column(
result_table.schema.get_field_index(field_name)
)