AntoinePrv commented on code in PR #47994: URL: https://github.com/apache/arrow/pull/47994#discussion_r2793246448
########## cpp/src/arrow/util/bpacking_simd_kernel_internal.h: ########## @@ -0,0 +1,1106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Simd integer unpacking kernels, that is small functions that efficiently operate over +/// a fixed input size. +/// +/// This is a generalization of the algorithm from Daniel Lemire and Leonid Boytsov, +/// Decoding billions of integers per second through vectorization, Software Practice & +/// Experience 45 (1), 2015. +/// http://arxiv.org/abs/1209.2137 +/// https://github.com/fast-pack/LittleIntPacker/blob/master/src/horizontalpacking32.c + +#pragma once + +#include <array> +#include <concepts> +#include <cstdint> +#include <cstring> +#include <limits> +#include <numeric> +#include <utility> + +#include <xsimd/xsimd.hpp> + +#include "arrow/util/bit_util.h" +#include "arrow/util/bpacking_dispatch_internal.h" +#include "arrow/util/type_traits.h" + +namespace arrow::internal::bpacking { + +template <typename T, std::size_t N> +constexpr std::array<T, N> BuildConstantArray(T val) { + std::array<T, N> out = {}; + for (auto& v : out) { + v = val; + } + return out; +} + +template <typename Arr> +constexpr Arr BuildConstantArrayLike(typename Arr::value_type val) { + return BuildConstantArray<typename Arr::value_type, std::tuple_size_v<Arr>>(val); +} + +/********************* + * xsimd utilities * + *********************/ + +/// Simple constexpr maximum element suited for non empty arrays. +template <typename T, std::size_t N> +constexpr T max_value(const std::array<T, N>& arr) { + static_assert(N > 0); + T out = 0; + for (const T& v : arr) { + if (v > out) { + out = v; + } + } + return out; +} + +template <std::array kArr, typename Arch, std::size_t... Is> +constexpr auto array_to_batch_constant_impl(std::index_sequence<Is...>) { + using Array = std::decay_t<decltype(kArr)>; + using value_type = Array::value_type; + + return xsimd::batch_constant<value_type, Arch, kArr[Is]...>{}; +} + +/// Make a ``xsimd::batch_constant`` from a static constexpr array. +template <std::array kArr, typename Arch> +constexpr auto array_to_batch_constant() { + return array_to_batch_constant_impl<kArr, Arch>( + std::make_index_sequence<kArr.size()>()); +} + +template <std::unsigned_integral Uint, typename Arch> +xsimd::batch<uint8_t, Arch> load_val_as(const uint8_t* in) { + const Uint val = util::SafeLoadAs<Uint>(in); + const auto batch = xsimd::batch<Uint, Arch>(val); + return xsimd::bitwise_cast<uint8_t>(batch); +} + +template <int kBytes, typename Arch> +xsimd::batch<uint8_t, Arch> safe_load_bytes(const uint8_t* in) { + if constexpr (kBytes <= sizeof(uint64_t)) { + return load_val_as<SizedUint<kBytes>, Arch>(in); + } + using simd_bytes = xsimd::batch<uint8_t, Arch>; + return simd_bytes::load_unaligned(in); +} + +template <std::integral Int, int kOffset, int kLength, typename Arr> +constexpr auto select_stride_impl(Arr shifts) { + std::array<Int, shifts.size() / kLength> out{}; + for (std::size_t i = 0; i < out.size(); ++i) { + out[i] = shifts[kLength * i + kOffset]; + } + return out; +} + +/// Select indices in a batch constant according to a given stride. +/// +/// Given a ``xsimd::batch_constant`` over a given integer type ``Int``, return a new +/// batch constant over a larger integer type ``ToInt`` that select one of every "stride" +/// element from the input. +/// Because batch constants can only contain a predetermined number of values, the +/// "stride" is determined as the ratio of the sizes of the output and input integer +/// types. +/// The ``kOffset`` is used to determine which sequence of values picked by the stride. +/// Equivalently, it is the index of the first value selected. +/// +/// For instance given an input with the following values +/// |0|1|2|3|4|5|6|7| +/// and a stride of 2 (e.g. sizeof(uint16_t)/sizeof(uint8_t)), an offset of 0 would +/// return the values: +/// |0|2|4|6| +/// while an offset of 1 would return the values: +/// |1|3|5|7| +template <typename ToInt, int kOffset, typename Int, typename Arch, Int... kShifts> +constexpr auto select_stride(xsimd::batch_constant<Int, Arch, kShifts...>) { + constexpr auto kStridesArr = + select_stride_impl<ToInt, kOffset, sizeof(ToInt) / sizeof(Int)>( + std::array{kShifts...}); + return array_to_batch_constant<kStridesArr, Arch>(); +} + +/// Whether we are compiling for the SSE2 or above in the SSE family. +template <typename Arch> +constexpr bool IsSse2 = std::is_base_of_v<xsimd::sse2, Arch>; + +/// Whether we are compiling for the AVX2 or above in the SSE family. Review Comment: In xsimd, there is `avx2+fma3` that is currently implemented as inheriting from `avx2`. Still I added extra clarifications. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
