AntoinePrv commented on code in PR #47994: URL: https://github.com/apache/arrow/pull/47994#discussion_r2783173930
########## cpp/src/arrow/util/bpacking_simd_kernel_internal.h: ########## @@ -0,0 +1,790 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Simd integer unpacking kernels, that is small functions that efficiently operate over +/// a fixed input size. +/// +/// This a generalization of the algorithm from Daniel Lemire and Leonid Boytsov, +/// Decoding billions of integers per second through vectorization, Software Practice & +/// Experience 45 (1), 2015. +/// http://arxiv.org/abs/1209.2137 +/// https://github.com/fast-pack/LittleIntPacker/blob/master/src/horizontalpacking32.c + +#pragma once + +#include <array> +#include <cstdint> +#include <cstring> +#include <numeric> +#include <utility> + +#include <xsimd/xsimd.hpp> + +#include "arrow/util/bit_util.h" +#include "arrow/util/bpacking_dispatch_internal.h" +#include "arrow/util/type_traits.h" + +namespace arrow::internal { + +/********************* + * xsimd utilities * + *********************/ + +/// Simple constexpr maximum element suited for non empty arrays. +template <typename T, std::size_t N> +constexpr T max_value(const std::array<T, N>& arr) { + static_assert(N > 0); + T out = 0; + for (const T& v : arr) { + if (v > out) { + out = v; + } + } + return out; +} + +template <std::array kArr, typename Arch, std::size_t... Is> +constexpr auto array_to_batch_constant_impl(std::index_sequence<Is...>) { + using Array = std::decay_t<decltype(kArr)>; + using value_type = typename Array::value_type; + + return xsimd::batch_constant<value_type, Arch, kArr[Is]...>{}; +} + +/// Make a ``xsimd::batch_constant`` from a static constexpr array. +template <std::array kArr, typename Arch> +constexpr auto array_to_batch_constant() { + return array_to_batch_constant_impl<kArr, Arch>( + std::make_index_sequence<kArr.size()>()); +} + +template <typename Int, int kOffset, int kLength, typename Arr> +constexpr auto select_stride_impl(Arr shifts) { + std::array<Int, shifts.size() / kLength> out{}; + for (std::size_t i = 0; i < out.size(); ++i) { + out[i] = shifts[kLength * i + kOffset]; + } + return out; +} + +template <typename ToInt, int kOffset, typename Int, typename Arch, Int... kShifts> +constexpr auto select_stride(xsimd::batch_constant<Int, Arch, kShifts...>) { + constexpr auto kStridesArr = + select_stride_impl<ToInt, kOffset, sizeof(ToInt) / sizeof(Int)>( + std::array{kShifts...}); + return array_to_batch_constant<kStridesArr, Arch>(); +} + +template <typename Arch> +constexpr bool HasSse2 = std::is_base_of_v<xsimd::sse2, Arch>; + +template <typename Arch> +constexpr bool HasAvx2 = std::is_base_of_v<xsimd::avx2, Arch>; + +/// Wrapper around ``xsimd::bitwise_lshift`` with optimizations for non implemented sizes. +// +// We replace the variable left shift by a variable multiply with a power of two. +// +// This trick is borrowed from Daniel Lemire and Leonid Boytsov, Decoding billions of +// integers per second through vectorization, Software Practice & Experience 45 (1), 2015. +// http://arxiv.org/abs/1209.2137 +// +/// TODO(xsimd) Tracking in https://github.com/xtensor-stack/xsimd/pull/1220 +template <typename Arch, typename Int, Int... kShifts> +auto left_shift(const xsimd::batch<Int, Arch>& batch, + xsimd::batch_constant<Int, Arch, kShifts...> shifts) + -> xsimd::batch<Int, Arch> { + constexpr bool kHasSse2 = HasSse2<Arch>; + constexpr bool kHasAvx2 = HasAvx2<Arch>; + static_assert(!(kHasSse2 && kHasAvx2), "The hierarchy are different in xsimd"); + + constexpr auto kMults = xsimd::make_batch_constant<Int, 1, Arch>() << shifts; + + constexpr auto IntSize = sizeof(Int); + + // Sizes and architecture for which there is no variable left shift and there is a + // multiplication + if constexpr ( // + (kHasSse2 && (IntSize == sizeof(uint16_t) || IntSize == sizeof(uint32_t))) // + || (kHasAvx2 && (IntSize == sizeof(uint16_t))) // + ) { + return batch * kMults; + } + + // Architecture for which there is no variable left shift on uint8_t but a fallback + // exists for uint16_t. + if constexpr ((kHasSse2 || kHasAvx2) && (IntSize == sizeof(uint8_t))) { + const auto batch16 = xsimd::bitwise_cast<uint16_t>(batch); + + constexpr auto kShifts0 = select_stride<uint16_t, 0>(shifts); + const auto shifted0 = left_shift(batch16, kShifts0) & 0x00FF; + + constexpr auto kShifts1 = select_stride<uint16_t, 1>(shifts); + const auto shifted1 = left_shift(batch16 & 0xFF00, kShifts1); + + return xsimd::bitwise_cast<Int>(shifted0 | shifted1); + } + + return batch << shifts; +} + +/// Fallback for variable shift right. +/// +/// When we know that the relevant bits will not overflow, we can instead shift left all +/// values to align them with the one with the largest right shifts followed by a constant +/// shift on all values. +/// In doing so, we replace the variable left shift by a variable multiply with a power of +/// two. +/// +/// This trick is borrowed from Daniel Lemire and Leonid Boytsov, Decoding billions of +/// integers per second through vectorization, Software Practice & Experience 45 (1), +/// 2015. http://arxiv.org/abs/1209.2137 +template <typename Arch, typename Int, Int... kShifts> +auto right_shift_by_excess(const xsimd::batch<Int, Arch>& batch, Review Comment: It does not work in all cases but works for us. I think this may never be part of `xsimd`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
