This is an automated email from the ASF dual-hosted git repository. raulcd pushed a commit to branch maint-15.0.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 215bcf9a5fc2a729bc4bc142f7a01a8a0dfd670b Author: Rok Mihevc <[email protected]> AuthorDate: Tue Jan 23 12:43:05 2024 +0100 GH-38655: [C++] "iso_calendar" kernel returns incorrect results for array length > 32 (#39360) ### Rationale for this change When defining `StructArray`'s field builders for `ISOCalendar` we don't pre-allocate memory and then use unsafe append. This causes the resulting array to be at most 32 rows long. ### What changes are included in this PR? This introduces required memory pre-allocation in the `ISOCalendar` c++ kernel. ### Are these changes tested? This adds a test for the Python wrapper. ### Are there any user-facing changes? Fixes the behavior of `iso_calendar` kernel. * Closes: #38655 Lead-authored-by: Rok Mihevc <[email protected]> Co-authored-by: Joris Van den Bossche <[email protected]> Signed-off-by: Joris Van den Bossche <[email protected]> --- cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc | 2 +- python/pyarrow/tests/test_compute.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index a88ce38936..f49e201492 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -1510,7 +1510,7 @@ struct ISOCalendar { for (int i = 0; i < 3; i++) { field_builders.push_back( checked_cast<BuilderType*>(struct_builder->field_builder(i))); - RETURN_NOT_OK(field_builders[i]->Reserve(1)); + RETURN_NOT_OK(field_builders[i]->Reserve(in.length)); } auto visit_null = [&]() { return struct_builder->AppendNull(); }; std::function<Status(typename InType::c_type arg)> visit_value; diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 7c5a134d33..9ceb2fd730 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2255,6 +2255,19 @@ def test_extract_datetime_components(): _check_datetime_components(timestamps, timezone) [email protected]("unit", ["s", "ms", "us", "ns"]) +def test_iso_calendar_longer_array(unit): + # https://github.com/apache/arrow/issues/38655 + # ensure correct result for array length > 32 + arr = pa.array([datetime.datetime(2022, 1, 2, 9)]*50, pa.timestamp(unit)) + result = pc.iso_calendar(arr) + expected = pa.StructArray.from_arrays( + [[2021]*50, [52]*50, [7]*50], + names=['iso_year', 'iso_week', 'iso_day_of_week'] + ) + assert result.equals(expected) + + @pytest.mark.pandas @pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), reason="Timezone database is not installed on Windows")
