Re: [PR] GH-48277: [C++][Parquet] unpack with shuffle algorithm [arrow]

via GitHub Wed, 11 Feb 2026 05:22:43 -0800


AntoinePrv commented on code in PR #47994:
URL: https://github.com/apache/arrow/pull/47994#discussion_r2793246448



##########
cpp/src/arrow/util/bpacking_simd_kernel_internal.h:
##########
@@ -0,0 +1,1106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Simd integer unpacking kernels, that is small functions that efficiently 
operate over
+/// a fixed input size.
+///
+/// This is a generalization of the algorithm from Daniel Lemire and Leonid 
Boytsov,
+/// Decoding billions of integers per second through vectorization, Software 
Practice &
+/// Experience 45 (1), 2015.
+/// http://arxiv.org/abs/1209.2137
+/// 
https://github.com/fast-pack/LittleIntPacker/blob/master/src/horizontalpacking32.c
+
+#pragma once
+
+#include <array>
+#include <concepts>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <numeric>
+#include <utility>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bpacking_dispatch_internal.h"
+#include "arrow/util/type_traits.h"
+
+namespace arrow::internal::bpacking {
+
+template <typename T, std::size_t N>
+constexpr std::array<T, N> BuildConstantArray(T val) {
+  std::array<T, N> out = {};
+  for (auto& v : out) {
+    v = val;
+  }
+  return out;
+}
+
+template <typename Arr>
+constexpr Arr BuildConstantArrayLike(typename Arr::value_type val) {
+  return BuildConstantArray<typename Arr::value_type, 
std::tuple_size_v<Arr>>(val);
+}
+
+/*********************
+ *  xsimd utilities  *
+ *********************/
+
+/// Simple constexpr maximum element suited for non empty arrays.
+template <typename T, std::size_t N>
+constexpr T max_value(const std::array<T, N>& arr) {
+  static_assert(N > 0);
+  T out = 0;
+  for (const T& v : arr) {
+    if (v > out) {
+      out = v;
+    }
+  }
+  return out;
+}
+
+template <std::array kArr, typename Arch, std::size_t... Is>
+constexpr auto array_to_batch_constant_impl(std::index_sequence<Is...>) {
+  using Array = std::decay_t<decltype(kArr)>;
+  using value_type = Array::value_type;
+
+  return xsimd::batch_constant<value_type, Arch, kArr[Is]...>{};
+}
+
+/// Make a ``xsimd::batch_constant`` from a static constexpr array.
+template <std::array kArr, typename Arch>
+constexpr auto array_to_batch_constant() {
+  return array_to_batch_constant_impl<kArr, Arch>(
+      std::make_index_sequence<kArr.size()>());
+}
+
+template <std::unsigned_integral Uint, typename Arch>
+xsimd::batch<uint8_t, Arch> load_val_as(const uint8_t* in) {
+  const Uint val = util::SafeLoadAs<Uint>(in);
+  const auto batch = xsimd::batch<Uint, Arch>(val);
+  return xsimd::bitwise_cast<uint8_t>(batch);
+}
+
+template <int kBytes, typename Arch>
+xsimd::batch<uint8_t, Arch> safe_load_bytes(const uint8_t* in) {
+  if constexpr (kBytes <= sizeof(uint64_t)) {
+    return load_val_as<SizedUint<kBytes>, Arch>(in);
+  }
+  using simd_bytes = xsimd::batch<uint8_t, Arch>;
+  return simd_bytes::load_unaligned(in);
+}
+
+template <std::integral Int, int kOffset, int kLength, typename Arr>
+constexpr auto select_stride_impl(Arr shifts) {
+  std::array<Int, shifts.size() / kLength> out{};
+  for (std::size_t i = 0; i < out.size(); ++i) {
+    out[i] = shifts[kLength * i + kOffset];
+  }
+  return out;
+}
+
+/// Select indices in a batch constant according to a given stride.
+///
+/// Given a ``xsimd::batch_constant`` over a given integer type ``Int``, 
return a new
+/// batch constant over a larger integer type ``ToInt`` that select one of 
every "stride"
+/// element from the input.
+/// Because batch constants can only contain a predetermined number of values, 
the
+/// "stride" is determined as the ratio of the sizes of the output and input 
integer
+/// types.
+/// The ``kOffset`` is used to determine which sequence of values picked by 
the stride.
+/// Equivalently, it is the index of the first value selected.
+///
+/// For instance given an input with the following values
+///         |0|1|2|3|4|5|6|7|
+/// and a stride of 2 (e.g. sizeof(uint16_t)/sizeof(uint8_t)), an offset of 0 
would
+/// return the values:
+///         |0|2|4|6|
+/// while an offset of 1 would return the values:
+///         |1|3|5|7|
+template <typename ToInt, int kOffset, typename Int, typename Arch, Int... 
kShifts>
+constexpr auto select_stride(xsimd::batch_constant<Int, Arch, kShifts...>) {
+  constexpr auto kStridesArr =
+      select_stride_impl<ToInt, kOffset, sizeof(ToInt) / sizeof(Int)>(
+          std::array{kShifts...});
+  return array_to_batch_constant<kStridesArr, Arch>();
+}
+
+/// Whether we are compiling for the SSE2 or above in the SSE family.
+template <typename Arch>
+constexpr bool IsSse2 = std::is_base_of_v<xsimd::sse2, Arch>;
+
+/// Whether we are compiling for the AVX2 or above in the SSE family.

Review Comment:
   In xsimd, there is `avx2+fma3` that is currently implemented as inheriting 
from `avx2`. Still I added extra clarifications.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-48277: [C++][Parquet] unpack with shuffle algorithm [arrow]

Reply via email to