This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 366dbe1e73 GH-15281: [C++] Replace bytes_view alias with span (#36334)
366dbe1e73 is described below
commit 366dbe1e737fc2fb838180bd1598f5ccd0482310
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Tue Jul 11 03:29:04 2023 -0700
GH-15281: [C++] Replace bytes_view alias with span (#36334)
### Rationale for this change
basic_string_view was never really intended to support types other than
chars; the template which the STL intends for use in this situation is std::span
### What changes are included in this PR?
Since std::span is added in c++20, a minimal polyfill is added named
util::span.
- doesn't include support for static extents
- some accessors like `front()` are not present in util::span
### User facing changes:
- The broken alias bytes_view and its header are removed. I don't expect
this to trouble anybody much since it isn't used in much of our API, and if
it's really necessary it will be trivial to add it back with `using bytes_view
= span<uint8_t const>;` or so
* Closes: #15281
Lead-authored-by: Benjamin Kietzman <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/buffer.h | 7 +-
cpp/src/arrow/util/CMakeLists.txt | 1 +
cpp/src/arrow/util/bitmap.h | 29 +++---
cpp/src/arrow/util/bytes_view.h | 29 ------
cpp/src/arrow/util/span.h | 132 ++++++++++++++++++++++++
cpp/src/arrow/util/span_test.cc | 205 ++++++++++++++++++++++++++++++++++++++
6 files changed, 352 insertions(+), 51 deletions(-)
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 65f1abda16..238768b815 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -28,7 +28,6 @@
#include "arrow/device.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
-#include "arrow/util/bytes_view.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
@@ -167,13 +166,9 @@ class ARROW_EXPORT Buffer {
/// \brief View buffer contents as a std::string_view
/// \return std::string_view
explicit operator std::string_view() const {
- return std::string_view(reinterpret_cast<const char*>(data_), size_);
+ return {reinterpret_cast<const char*>(data_), static_cast<size_t>(size_)};
}
- /// \brief View buffer contents as a util::bytes_view
- /// \return util::bytes_view
- explicit operator util::bytes_view() const { return util::bytes_view(data_,
size_); }
-
/// \brief Return a pointer to the buffer's data
///
/// The buffer has to be a CPU buffer (`is_cpu()` is true).
diff --git a/cpp/src/arrow/util/CMakeLists.txt
b/cpp/src/arrow/util/CMakeLists.txt
index 4d36465578..3cecab3a63 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -60,6 +60,7 @@ add_arrow_test(utility-test
reflection_test.cc
rows_to_batches_test.cc
small_vector_test.cc
+ span_test.cc
stl_util_test.cc
string_test.cc
tdigest_test.cc
diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h
index a6df1e561e..4750e697fc 100644
--- a/cpp/src/arrow/util/bitmap.h
+++ b/cpp/src/arrow/util/bitmap.h
@@ -33,10 +33,10 @@
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/bitmap_reader.h"
#include "arrow/util/bitmap_writer.h"
-#include "arrow/util/bytes_view.h"
#include "arrow/util/compare.h"
#include "arrow/util/endian.h"
#include "arrow/util/functional.h"
+#include "arrow/util/span.h"
#include "arrow/util/string_builder.h"
#include "arrow/util/visibility.h"
@@ -49,9 +49,6 @@ namespace internal {
class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
public util::EqualityComparable<Bitmap> {
public:
- template <typename Word>
- using View = std::basic_string_view<Word>;
-
Bitmap() = default;
Bitmap(const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length)
@@ -72,17 +69,17 @@ class ARROW_EXPORT Bitmap : public
util::ToStringOstreamable<Bitmap>,
Bitmap Slice(int64_t offset) const {
if (mutable_data_ != NULLPTR) {
- return Bitmap(mutable_data_, offset_ + offset, length_ - offset);
+ return {mutable_data_, offset_ + offset, length_ - offset};
} else {
- return Bitmap(data_, offset_ + offset, length_ - offset);
+ return {data_, offset_ + offset, length_ - offset};
}
}
Bitmap Slice(int64_t offset, int64_t length) const {
if (mutable_data_ != NULLPTR) {
- return Bitmap(mutable_data_, offset_ + offset, length);
+ return {mutable_data_, offset_ + offset, length};
} else {
- return Bitmap(data_, offset_ + offset, length);
+ return {data_, offset_ + offset, length};
}
}
@@ -158,7 +155,7 @@ class ARROW_EXPORT Bitmap : public
util::ToStringOstreamable<Bitmap>,
Bitmap bitmaps[N];
int64_t offsets[N];
int64_t bit_length = BitLength(bitmaps_arg, N);
- View<Word> words[N];
+ util::span<const Word> words[N];
for (size_t i = 0; i < N; ++i) {
bitmaps[i] = bitmaps_arg[i];
offsets[i] = bitmaps[i].template word_offset<Word>();
@@ -386,15 +383,15 @@ class ARROW_EXPORT Bitmap : public
util::ToStringOstreamable<Bitmap>,
/// number of bits in this Bitmap
int64_t length() const { return length_; }
- /// string_view of all bytes which contain any bit in this Bitmap
- util::bytes_view bytes() const {
+ /// span of all bytes which contain any bit in this Bitmap
+ util::span<const uint8_t> bytes() const {
auto byte_offset = offset_ / 8;
auto byte_count = bit_util::CeilDiv(offset_ + length_, 8) - byte_offset;
- return util::bytes_view(data_ + byte_offset, byte_count);
+ return {data_ + byte_offset, static_cast<size_t>(byte_count)};
}
private:
- /// string_view of all Words which contain any bit in this Bitmap
+ /// span of all Words which contain any bit in this Bitmap
///
/// For example, given Word=uint16_t and a bitmap spanning bits [20, 36)
/// words() would span bits [16, 48).
@@ -407,15 +404,15 @@ class ARROW_EXPORT Bitmap : public
util::ToStringOstreamable<Bitmap>,
/// \warning The words may contain bytes which lie outside the buffer or are
/// uninitialized.
template <typename Word>
- View<Word> words() const {
+ util::span<const Word> words() const {
auto bytes_addr = reinterpret_cast<intptr_t>(bytes().data());
auto words_addr = bytes_addr - bytes_addr % sizeof(Word);
auto word_byte_count =
bit_util::RoundUpToPowerOf2(static_cast<int64_t>(bytes_addr +
bytes().size()),
static_cast<int64_t>(sizeof(Word))) -
words_addr;
- return View<Word>(reinterpret_cast<const Word*>(words_addr),
- word_byte_count / sizeof(Word));
+ return {reinterpret_cast<const Word*>(words_addr),
+ static_cast<size_t>(word_byte_count / sizeof(Word))};
}
/// offset of first bit relative to words<Word>().data()
diff --git a/cpp/src/arrow/util/bytes_view.h b/cpp/src/arrow/util/bytes_view.h
deleted file mode 100644
index b1aacc96ed..0000000000
--- a/cpp/src/arrow/util/bytes_view.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <string_view>
-
-namespace arrow {
-namespace util {
-
-using bytes_view = std::basic_string_view<uint8_t>;
-
-} // namespace util
-} // namespace arrow
diff --git a/cpp/src/arrow/util/span.h b/cpp/src/arrow/util/span.h
new file mode 100644
index 0000000000..4254fec75e
--- /dev/null
+++ b/cpp/src/arrow/util/span.h
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <type_traits>
+
+namespace arrow::util {
+
+/// std::span polyfill.
+///
+/// Does not support static extents.
+template <typename T>
+class span {
+ static_assert(sizeof(T),
+ R"(
+std::span allows contiguous_iterators instead of just pointers, the enforcement
+of which requires T to be a complete type. arrow::util::span does not support
+contiguous_iterators, but T is still required to be a complete type to prevent
+writing code which would break when it is replaced by std::span.)");
+
+ public:
+ using element_type = T;
+ using value_type = std::remove_cv_t<T>;
+ using iterator = T*;
+ using const_iterator = T const*;
+
+ span() = default;
+ span(const span&) = default;
+ span& operator=(const span&) = default;
+
+ template <typename M, typename = std::enable_if_t<std::is_same_v<T, M
const>>>
+ // NOLINTNEXTLINE runtime/explicit
+ constexpr span(span<M> mut) : span{mut.data(), mut.size()} {}
+
+ constexpr span(T* data, size_t count) : data_{data}, size_{count} {}
+
+ constexpr span(T* begin, T* end)
+ : data_{begin}, size_{static_cast<size_t>(end - begin)} {}
+
+ template <
+ typename R,
+ typename DisableUnlessConstructibleFromDataAndSize =
+ decltype(span<T>(std::data(std::declval<R>()),
std::size(std::declval<R>()))),
+ typename DisableUnlessSimilarTypes = std::enable_if_t<std::is_same_v<
+
std::decay_t<std::remove_pointer_t<decltype(std::data(std::declval<R>()))>>,
+ std::decay_t<T>>>>
+ // NOLINTNEXTLINE runtime/explicit, non-const reference
+ constexpr span(R&& range) : span{std::data(range), std::size(range)} {}
+
+ constexpr T* begin() const { return data_; }
+ constexpr T* end() const { return data_ + size_; }
+ constexpr T* data() const { return data_; }
+
+ constexpr size_t size() const { return size_; }
+ constexpr size_t size_bytes() const { return size_ * sizeof(T); }
+ constexpr bool empty() const { return size_ == 0; }
+
+ constexpr T& operator[](size_t i) { return data_[i]; }
+ constexpr const T& operator[](size_t i) const { return data_[i]; }
+
+ constexpr span subspan(size_t offset) const {
+ if (offset > size_) return {data_, data_};
+ return {data_ + offset, size_ - offset};
+ }
+
+ constexpr span subspan(size_t offset, size_t count) const {
+ auto out = subspan(offset);
+ if (count < out.size_) {
+ out.size_ = count;
+ }
+ return out;
+ }
+
+ constexpr bool operator==(span const& other) const {
+ if (size_ != other.size_) return false;
+
+ if constexpr (std::is_integral_v<T>) {
+ if (size_ == 0) {
+ return true; // memcmp does not handle null pointers, even if size_
== 0
+ }
+ return std::memcmp(data_, other.data_, size_bytes()) == 0;
+ } else {
+ T* ptr = data_;
+ for (T const& e : other) {
+ if (*ptr++ != e) return false;
+ }
+ return true;
+ }
+ }
+ constexpr bool operator!=(span const& other) const { return !(*this ==
other); }
+
+ private:
+ T* data_{};
+ size_t size_{};
+};
+
+template <typename R>
+span(R& range) -> span<std::remove_pointer_t<decltype(std::data(range))>>;
+
+template <typename T>
+span(T*, size_t) -> span<T>;
+
+template <typename T>
+constexpr span<std::byte const> as_bytes(span<T> s) {
+ return {reinterpret_cast<std::byte const*>(s.data()), s.size_bytes()};
+}
+
+template <typename T>
+constexpr span<std::byte> as_writable_bytes(span<T> s) {
+ return {reinterpret_cast<std::byte*>(s.data()), s.size_bytes()};
+}
+
+} // namespace arrow::util
diff --git a/cpp/src/arrow/util/span_test.cc b/cpp/src/arrow/util/span_test.cc
new file mode 100644
index 0000000000..fcbb49f71e
--- /dev/null
+++ b/cpp/src/arrow/util/span_test.cc
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstddef>
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
+#include "arrow/util/span.h"
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::PrintToString;
+
+namespace arrow::util {
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const span<T>& span) {
+ // Inefficient but good enough for testing
+ os << PrintToString(std::vector(span.begin(), span.end()));
+ return os;
+}
+
+TEST(Span, Construction) {
+ // const spans may be constructed from mutable spans
+ static_assert(std::is_constructible_v<span<const int>, span<int>>);
+ // ... but mutable spans may be constructed from const spans
+ static_assert(!std::is_constructible_v<span<int>, span<const int>>);
+
+ int arr[] = {1, 2, 3};
+ constexpr int const_arr[] = {7, 8, 9};
+
+ static_assert(std::is_constructible_v<span<int>, decltype(arr)&>);
+ static_assert(!std::is_constructible_v<span<int>, decltype(const_arr)&>);
+
+ static_assert(std::is_constructible_v<span<const int>, decltype(arr)&>);
+ static_assert(std::is_constructible_v<span<const int>,
decltype(const_arr)&>);
+ static_assert(std::is_constructible_v<span<const int>, span<int>>);
+
+ static_assert(std::is_constructible_v<span<int>, std::vector<int>&>);
+ static_assert(!std::is_constructible_v<span<int>, const std::vector<int>&>);
+ static_assert(!std::is_constructible_v<span<int>, std::vector<int>&&>);
+
+ static_assert(std::is_constructible_v<span<const int>, std::vector<int>&>);
+ static_assert(std::is_constructible_v<span<const int>, const
std::vector<int>&>);
+ // const spans may even be constructed from rvalue ranges
+ static_assert(std::is_constructible_v<span<const int>, std::vector<int>&&>);
+
+ EXPECT_THAT(span<const int>(const_arr), ElementsAreArray(const_arr));
+ EXPECT_THAT(span<int>(arr), ElementsAreArray(arr));
+
+ static_assert(!std::is_constructible_v<span<const unsigned>,
decltype(const_arr)&>);
+ static_assert(!std::is_constructible_v<span<const std::byte>,
decltype(const_arr)&>);
+}
+
+TEST(Span, TemplateArgumentDeduction) {
+ int arr[3];
+ const int const_arr[] = {1, 2, 3};
+ std::vector<int> vec;
+ const std::vector<int> const_vec;
+ static_assert(std::is_same_v<decltype(span(arr)), span<int>>);
+ static_assert(std::is_same_v<decltype(span(vec)), span<int>>);
+ static_assert(std::is_same_v<decltype(span(const_arr)), span<const int>>);
+ static_assert(std::is_same_v<decltype(span(const_vec)), span<const int>>);
+}
+
+TEST(Span, Size) {
+ int arr[] = {1, 2, 3};
+ EXPECT_EQ(span(arr).size(), 3);
+ EXPECT_EQ(span(arr).size_bytes(), sizeof(int) * 3);
+
+ std::vector<int> vec;
+ EXPECT_TRUE(span(vec).empty());
+ EXPECT_EQ(span(vec).size(), 0);
+ EXPECT_EQ(span(vec).size_bytes(), 0);
+
+ vec.resize(999);
+ EXPECT_FALSE(span(vec).empty());
+ EXPECT_EQ(span(vec).size(), 999);
+ EXPECT_EQ(span(vec).size_bytes(), sizeof(int) * 999);
+}
+
+TEST(Span, Equality) {
+ auto check_eq = [](auto l, auto r) {
+ ARROW_SCOPED_TRACE("l = ", l, ", r = ", r);
+ EXPECT_TRUE(l == r);
+ EXPECT_FALSE(l != r);
+ };
+ auto check_ne = [](auto l, auto r) {
+ ARROW_SCOPED_TRACE("l = ", l, ", r = ", r);
+ EXPECT_TRUE(l != r);
+ EXPECT_FALSE(l == r);
+ };
+
+ {
+ // exercise integral branch with memcmp
+ check_eq(span<int>(), span<int>());
+
+ int arr[] = {1, 2, 3};
+ check_eq(span(arr), span(arr));
+ check_eq(span(arr).subspan(1), span(arr).subspan(1));
+ check_ne(span(arr).subspan(1), span(arr).subspan(2));
+
+ std::vector<int> vec{1, 2, 3};
+ check_eq(span(vec), span(arr));
+ check_eq(span(vec).subspan(1), span(arr).subspan(1));
+
+ vec = {2, 3, 4};
+ check_ne(span(vec), span(arr));
+ check_eq(span(vec).subspan(0, 2), span(arr).subspan(1));
+
+ // 0-sized
+ vec = {};
+ check_ne(span(vec), span(arr));
+ check_eq(span(vec), span(arr).subspan(3));
+ }
+ {
+ // exercise non-integral branch with for loop
+ check_eq(span<std::string>(), span<std::string>());
+
+ std::string arr[] = {"a", "b", "c"};
+ check_eq(span(arr), span(arr));
+ check_eq(span(arr).subspan(1), span(arr).subspan(1));
+
+ std::vector<std::string> vec{"a", "b", "c"};
+ check_eq(span(vec), span(arr));
+ check_eq(span(vec).subspan(1), span(arr).subspan(1));
+
+ vec = {"b", "c", "d"};
+ check_ne(span(vec), span(arr));
+ check_eq(span(vec).subspan(0, 2), span(arr).subspan(1));
+
+ // 0-sized
+ vec = {};
+ check_ne(span(vec), span(arr));
+ check_eq(span(vec), span(arr).subspan(3));
+ }
+}
+
+TEST(Span, SubSpan) {
+ int arr[] = {1, 2, 3};
+ span s(arr);
+
+ auto ExpectIdentical = [](span<int> l, span<int> r) {
+ EXPECT_EQ(l.data(), r.data());
+ EXPECT_EQ(l.size(), r.size());
+ };
+
+ ExpectIdentical(s.subspan(0), s);
+ ExpectIdentical(s.subspan(0, s.size()), s);
+
+ for (size_t offset = 0; offset < s.size(); ++offset) {
+ span expected(arr + offset, s.size() - offset);
+ ExpectIdentical(s.subspan(offset), expected);
+ ExpectIdentical(s.subspan(offset, s.size() * 3), expected);
+ }
+ EXPECT_TRUE(s.subspan(s.size()).empty());
+ EXPECT_TRUE(s.subspan(s.size() * 3).empty());
+
+ for (size_t length = 0; length < s.size(); ++length) {
+ span expected(arr, length);
+ ExpectIdentical(s.subspan(0, length), expected);
+ }
+
+ ExpectIdentical(s.subspan(1, 1), span(arr + 1, 1));
+}
+
+TEST(Span, Mutation) {
+ size_t arr[] = {9, 9, 9, 9, 9};
+
+ span s(arr);
+ for (size_t i = 0; i < s.size(); ++i) {
+ s[i] = i * i;
+ }
+
+ EXPECT_THAT(arr, ElementsAre(0, 1, 4, 9, 16));
+
+ auto set = [](span<size_t> lhs, size_t rhs) {
+ for (size_t& i : lhs) {
+ i = rhs;
+ }
+ };
+ set(span(arr), 0);
+ set(span(arr).subspan(1), 1);
+ set(span(arr).subspan(2, 2), 23);
+ EXPECT_THAT(arr, ElementsAre(0, 1, 23, 23, 1));
+}
+
+} // namespace arrow::util