This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 366dbe1e73 GH-15281: [C++] Replace bytes_view alias with span (#36334)
366dbe1e73 is described below

commit 366dbe1e737fc2fb838180bd1598f5ccd0482310
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Tue Jul 11 03:29:04 2023 -0700

    GH-15281: [C++] Replace bytes_view alias with span (#36334)
    
    
    
    ### Rationale for this change
    
    basic_string_view was never really intended to support types other than 
chars; the template which the STL intends for use in this situation is std::span
    
    ### What changes are included in this PR?
    
    Since std::span is added in c++20, a minimal polyfill is added named 
util::span.
    - doesn't include support for static extents
    - some accessors like `front()` are not present in util::span
    
    ### User facing changes:
    
    - The broken alias bytes_view and its header are removed. I don't expect 
this to trouble anybody much since it isn't used in much of our API, and if 
it's really necessary it will be trivial to add it back with `using bytes_view 
= span<uint8_t const>;` or so
    
    * Closes: #15281
    
    Lead-authored-by: Benjamin Kietzman <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/buffer.h            |   7 +-
 cpp/src/arrow/util/CMakeLists.txt |   1 +
 cpp/src/arrow/util/bitmap.h       |  29 +++---
 cpp/src/arrow/util/bytes_view.h   |  29 ------
 cpp/src/arrow/util/span.h         | 132 ++++++++++++++++++++++++
 cpp/src/arrow/util/span_test.cc   | 205 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 352 insertions(+), 51 deletions(-)

diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 65f1abda16..238768b815 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -28,7 +28,6 @@
 #include "arrow/device.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
-#include "arrow/util/bytes_view.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -167,13 +166,9 @@ class ARROW_EXPORT Buffer {
   /// \brief View buffer contents as a std::string_view
   /// \return std::string_view
   explicit operator std::string_view() const {
-    return std::string_view(reinterpret_cast<const char*>(data_), size_);
+    return {reinterpret_cast<const char*>(data_), static_cast<size_t>(size_)};
   }
 
-  /// \brief View buffer contents as a util::bytes_view
-  /// \return util::bytes_view
-  explicit operator util::bytes_view() const { return util::bytes_view(data_, 
size_); }
-
   /// \brief Return a pointer to the buffer's data
   ///
   /// The buffer has to be a CPU buffer (`is_cpu()` is true).
diff --git a/cpp/src/arrow/util/CMakeLists.txt 
b/cpp/src/arrow/util/CMakeLists.txt
index 4d36465578..3cecab3a63 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -60,6 +60,7 @@ add_arrow_test(utility-test
                reflection_test.cc
                rows_to_batches_test.cc
                small_vector_test.cc
+               span_test.cc
                stl_util_test.cc
                string_test.cc
                tdigest_test.cc
diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h
index a6df1e561e..4750e697fc 100644
--- a/cpp/src/arrow/util/bitmap.h
+++ b/cpp/src/arrow/util/bitmap.h
@@ -33,10 +33,10 @@
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_reader.h"
 #include "arrow/util/bitmap_writer.h"
-#include "arrow/util/bytes_view.h"
 #include "arrow/util/compare.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/functional.h"
+#include "arrow/util/span.h"
 #include "arrow/util/string_builder.h"
 #include "arrow/util/visibility.h"
 
@@ -49,9 +49,6 @@ namespace internal {
 class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
                             public util::EqualityComparable<Bitmap> {
  public:
-  template <typename Word>
-  using View = std::basic_string_view<Word>;
-
   Bitmap() = default;
 
   Bitmap(const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length)
@@ -72,17 +69,17 @@ class ARROW_EXPORT Bitmap : public 
util::ToStringOstreamable<Bitmap>,
 
   Bitmap Slice(int64_t offset) const {
     if (mutable_data_ != NULLPTR) {
-      return Bitmap(mutable_data_, offset_ + offset, length_ - offset);
+      return {mutable_data_, offset_ + offset, length_ - offset};
     } else {
-      return Bitmap(data_, offset_ + offset, length_ - offset);
+      return {data_, offset_ + offset, length_ - offset};
     }
   }
 
   Bitmap Slice(int64_t offset, int64_t length) const {
     if (mutable_data_ != NULLPTR) {
-      return Bitmap(mutable_data_, offset_ + offset, length);
+      return {mutable_data_, offset_ + offset, length};
     } else {
-      return Bitmap(data_, offset_ + offset, length);
+      return {data_, offset_ + offset, length};
     }
   }
 
@@ -158,7 +155,7 @@ class ARROW_EXPORT Bitmap : public 
util::ToStringOstreamable<Bitmap>,
     Bitmap bitmaps[N];
     int64_t offsets[N];
     int64_t bit_length = BitLength(bitmaps_arg, N);
-    View<Word> words[N];
+    util::span<const Word> words[N];
     for (size_t i = 0; i < N; ++i) {
       bitmaps[i] = bitmaps_arg[i];
       offsets[i] = bitmaps[i].template word_offset<Word>();
@@ -386,15 +383,15 @@ class ARROW_EXPORT Bitmap : public 
util::ToStringOstreamable<Bitmap>,
   /// number of bits in this Bitmap
   int64_t length() const { return length_; }
 
-  /// string_view of all bytes which contain any bit in this Bitmap
-  util::bytes_view bytes() const {
+  /// span of all bytes which contain any bit in this Bitmap
+  util::span<const uint8_t> bytes() const {
     auto byte_offset = offset_ / 8;
     auto byte_count = bit_util::CeilDiv(offset_ + length_, 8) - byte_offset;
-    return util::bytes_view(data_ + byte_offset, byte_count);
+    return {data_ + byte_offset, static_cast<size_t>(byte_count)};
   }
 
  private:
-  /// string_view of all Words which contain any bit in this Bitmap
+  /// span of all Words which contain any bit in this Bitmap
   ///
   /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36)
   /// words() would span bits [16, 48).
@@ -407,15 +404,15 @@ class ARROW_EXPORT Bitmap : public 
util::ToStringOstreamable<Bitmap>,
   /// \warning The words may contain bytes which lie outside the buffer or are
   /// uninitialized.
   template <typename Word>
-  View<Word> words() const {
+  util::span<const Word> words() const {
     auto bytes_addr = reinterpret_cast<intptr_t>(bytes().data());
     auto words_addr = bytes_addr - bytes_addr % sizeof(Word);
     auto word_byte_count =
         bit_util::RoundUpToPowerOf2(static_cast<int64_t>(bytes_addr + 
bytes().size()),
                                     static_cast<int64_t>(sizeof(Word))) -
         words_addr;
-    return View<Word>(reinterpret_cast<const Word*>(words_addr),
-                      word_byte_count / sizeof(Word));
+    return {reinterpret_cast<const Word*>(words_addr),
+            static_cast<size_t>(word_byte_count / sizeof(Word))};
   }
 
   /// offset of first bit relative to words<Word>().data()
diff --git a/cpp/src/arrow/util/bytes_view.h b/cpp/src/arrow/util/bytes_view.h
deleted file mode 100644
index b1aacc96ed..0000000000
--- a/cpp/src/arrow/util/bytes_view.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <string_view>
-
-namespace arrow {
-namespace util {
-
-using bytes_view = std::basic_string_view<uint8_t>;
-
-}  // namespace util
-}  // namespace arrow
diff --git a/cpp/src/arrow/util/span.h b/cpp/src/arrow/util/span.h
new file mode 100644
index 0000000000..4254fec75e
--- /dev/null
+++ b/cpp/src/arrow/util/span.h
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <type_traits>
+
+namespace arrow::util {
+
+/// std::span polyfill.
+///
+/// Does not support static extents.
+template <typename T>
+class span {
+  static_assert(sizeof(T),
+                R"(
+std::span allows contiguous_iterators instead of just pointers, the enforcement
+of which requires T to be a complete type. arrow::util::span does not support
+contiguous_iterators, but T is still required to be a complete type to prevent
+writing code which would break when it is replaced by std::span.)");
+
+ public:
+  using element_type = T;
+  using value_type = std::remove_cv_t<T>;
+  using iterator = T*;
+  using const_iterator = T const*;
+
+  span() = default;
+  span(const span&) = default;
+  span& operator=(const span&) = default;
+
+  template <typename M, typename = std::enable_if_t<std::is_same_v<T, M 
const>>>
+  // NOLINTNEXTLINE runtime/explicit
+  constexpr span(span<M> mut) : span{mut.data(), mut.size()} {}
+
+  constexpr span(T* data, size_t count) : data_{data}, size_{count} {}
+
+  constexpr span(T* begin, T* end)
+      : data_{begin}, size_{static_cast<size_t>(end - begin)} {}
+
+  template <
+      typename R,
+      typename DisableUnlessConstructibleFromDataAndSize =
+          decltype(span<T>(std::data(std::declval<R>()), 
std::size(std::declval<R>()))),
+      typename DisableUnlessSimilarTypes = std::enable_if_t<std::is_same_v<
+          
std::decay_t<std::remove_pointer_t<decltype(std::data(std::declval<R>()))>>,
+          std::decay_t<T>>>>
+  // NOLINTNEXTLINE runtime/explicit, non-const reference
+  constexpr span(R&& range) : span{std::data(range), std::size(range)} {}
+
+  constexpr T* begin() const { return data_; }
+  constexpr T* end() const { return data_ + size_; }
+  constexpr T* data() const { return data_; }
+
+  constexpr size_t size() const { return size_; }
+  constexpr size_t size_bytes() const { return size_ * sizeof(T); }
+  constexpr bool empty() const { return size_ == 0; }
+
+  constexpr T& operator[](size_t i) { return data_[i]; }
+  constexpr const T& operator[](size_t i) const { return data_[i]; }
+
+  constexpr span subspan(size_t offset) const {
+    if (offset > size_) return {data_, data_};
+    return {data_ + offset, size_ - offset};
+  }
+
+  constexpr span subspan(size_t offset, size_t count) const {
+    auto out = subspan(offset);
+    if (count < out.size_) {
+      out.size_ = count;
+    }
+    return out;
+  }
+
+  constexpr bool operator==(span const& other) const {
+    if (size_ != other.size_) return false;
+
+    if constexpr (std::is_integral_v<T>) {
+      if (size_ == 0) {
+        return true;  // memcmp does not handle null pointers, even if size_ 
== 0
+      }
+      return std::memcmp(data_, other.data_, size_bytes()) == 0;
+    } else {
+      T* ptr = data_;
+      for (T const& e : other) {
+        if (*ptr++ != e) return false;
+      }
+      return true;
+    }
+  }
+  constexpr bool operator!=(span const& other) const { return !(*this == 
other); }
+
+ private:
+  T* data_{};
+  size_t size_{};
+};
+
+template <typename R>
+span(R& range) -> span<std::remove_pointer_t<decltype(std::data(range))>>;
+
+template <typename T>
+span(T*, size_t) -> span<T>;
+
+template <typename T>
+constexpr span<std::byte const> as_bytes(span<T> s) {
+  return {reinterpret_cast<std::byte const*>(s.data()), s.size_bytes()};
+}
+
+template <typename T>
+constexpr span<std::byte> as_writable_bytes(span<T> s) {
+  return {reinterpret_cast<std::byte*>(s.data()), s.size_bytes()};
+}
+
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/util/span_test.cc b/cpp/src/arrow/util/span_test.cc
new file mode 100644
index 0000000000..fcbb49f71e
--- /dev/null
+++ b/cpp/src/arrow/util/span_test.cc
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstddef>
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
+#include "arrow/util/span.h"
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::PrintToString;
+
+namespace arrow::util {
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const span<T>& span) {
+  // Inefficient but good enough for testing
+  os << PrintToString(std::vector(span.begin(), span.end()));
+  return os;
+}
+
+TEST(Span, Construction) {
+  // const spans may be constructed from mutable spans
+  static_assert(std::is_constructible_v<span<const int>, span<int>>);
+  // ... but mutable spans may be constructed from const spans
+  static_assert(!std::is_constructible_v<span<int>, span<const int>>);
+
+  int arr[] = {1, 2, 3};
+  constexpr int const_arr[] = {7, 8, 9};
+
+  static_assert(std::is_constructible_v<span<int>, decltype(arr)&>);
+  static_assert(!std::is_constructible_v<span<int>, decltype(const_arr)&>);
+
+  static_assert(std::is_constructible_v<span<const int>, decltype(arr)&>);
+  static_assert(std::is_constructible_v<span<const int>, 
decltype(const_arr)&>);
+  static_assert(std::is_constructible_v<span<const int>, span<int>>);
+
+  static_assert(std::is_constructible_v<span<int>, std::vector<int>&>);
+  static_assert(!std::is_constructible_v<span<int>, const std::vector<int>&>);
+  static_assert(!std::is_constructible_v<span<int>, std::vector<int>&&>);
+
+  static_assert(std::is_constructible_v<span<const int>, std::vector<int>&>);
+  static_assert(std::is_constructible_v<span<const int>, const 
std::vector<int>&>);
+  // const spans may even be constructed from rvalue ranges
+  static_assert(std::is_constructible_v<span<const int>, std::vector<int>&&>);
+
+  EXPECT_THAT(span<const int>(const_arr), ElementsAreArray(const_arr));
+  EXPECT_THAT(span<int>(arr), ElementsAreArray(arr));
+
+  static_assert(!std::is_constructible_v<span<const unsigned>, 
decltype(const_arr)&>);
+  static_assert(!std::is_constructible_v<span<const std::byte>, 
decltype(const_arr)&>);
+}
+
+TEST(Span, TemplateArgumentDeduction) {
+  int arr[3];
+  const int const_arr[] = {1, 2, 3};
+  std::vector<int> vec;
+  const std::vector<int> const_vec;
+  static_assert(std::is_same_v<decltype(span(arr)), span<int>>);
+  static_assert(std::is_same_v<decltype(span(vec)), span<int>>);
+  static_assert(std::is_same_v<decltype(span(const_arr)), span<const int>>);
+  static_assert(std::is_same_v<decltype(span(const_vec)), span<const int>>);
+}
+
+TEST(Span, Size) {
+  int arr[] = {1, 2, 3};
+  EXPECT_EQ(span(arr).size(), 3);
+  EXPECT_EQ(span(arr).size_bytes(), sizeof(int) * 3);
+
+  std::vector<int> vec;
+  EXPECT_TRUE(span(vec).empty());
+  EXPECT_EQ(span(vec).size(), 0);
+  EXPECT_EQ(span(vec).size_bytes(), 0);
+
+  vec.resize(999);
+  EXPECT_FALSE(span(vec).empty());
+  EXPECT_EQ(span(vec).size(), 999);
+  EXPECT_EQ(span(vec).size_bytes(), sizeof(int) * 999);
+}
+
+TEST(Span, Equality) {
+  auto check_eq = [](auto l, auto r) {
+    ARROW_SCOPED_TRACE("l = ", l, ", r = ", r);
+    EXPECT_TRUE(l == r);
+    EXPECT_FALSE(l != r);
+  };
+  auto check_ne = [](auto l, auto r) {
+    ARROW_SCOPED_TRACE("l = ", l, ", r = ", r);
+    EXPECT_TRUE(l != r);
+    EXPECT_FALSE(l == r);
+  };
+
+  {
+    // exercise integral branch with memcmp
+    check_eq(span<int>(), span<int>());
+
+    int arr[] = {1, 2, 3};
+    check_eq(span(arr), span(arr));
+    check_eq(span(arr).subspan(1), span(arr).subspan(1));
+    check_ne(span(arr).subspan(1), span(arr).subspan(2));
+
+    std::vector<int> vec{1, 2, 3};
+    check_eq(span(vec), span(arr));
+    check_eq(span(vec).subspan(1), span(arr).subspan(1));
+
+    vec = {2, 3, 4};
+    check_ne(span(vec), span(arr));
+    check_eq(span(vec).subspan(0, 2), span(arr).subspan(1));
+
+    // 0-sized
+    vec = {};
+    check_ne(span(vec), span(arr));
+    check_eq(span(vec), span(arr).subspan(3));
+  }
+  {
+    // exercise non-integral branch with for loop
+    check_eq(span<std::string>(), span<std::string>());
+
+    std::string arr[] = {"a", "b", "c"};
+    check_eq(span(arr), span(arr));
+    check_eq(span(arr).subspan(1), span(arr).subspan(1));
+
+    std::vector<std::string> vec{"a", "b", "c"};
+    check_eq(span(vec), span(arr));
+    check_eq(span(vec).subspan(1), span(arr).subspan(1));
+
+    vec = {"b", "c", "d"};
+    check_ne(span(vec), span(arr));
+    check_eq(span(vec).subspan(0, 2), span(arr).subspan(1));
+
+    // 0-sized
+    vec = {};
+    check_ne(span(vec), span(arr));
+    check_eq(span(vec), span(arr).subspan(3));
+  }
+}
+
+TEST(Span, SubSpan) {
+  int arr[] = {1, 2, 3};
+  span s(arr);
+
+  auto ExpectIdentical = [](span<int> l, span<int> r) {
+    EXPECT_EQ(l.data(), r.data());
+    EXPECT_EQ(l.size(), r.size());
+  };
+
+  ExpectIdentical(s.subspan(0), s);
+  ExpectIdentical(s.subspan(0, s.size()), s);
+
+  for (size_t offset = 0; offset < s.size(); ++offset) {
+    span expected(arr + offset, s.size() - offset);
+    ExpectIdentical(s.subspan(offset), expected);
+    ExpectIdentical(s.subspan(offset, s.size() * 3), expected);
+  }
+  EXPECT_TRUE(s.subspan(s.size()).empty());
+  EXPECT_TRUE(s.subspan(s.size() * 3).empty());
+
+  for (size_t length = 0; length < s.size(); ++length) {
+    span expected(arr, length);
+    ExpectIdentical(s.subspan(0, length), expected);
+  }
+
+  ExpectIdentical(s.subspan(1, 1), span(arr + 1, 1));
+}
+
+TEST(Span, Mutation) {
+  size_t arr[] = {9, 9, 9, 9, 9};
+
+  span s(arr);
+  for (size_t i = 0; i < s.size(); ++i) {
+    s[i] = i * i;
+  }
+
+  EXPECT_THAT(arr, ElementsAre(0, 1, 4, 9, 16));
+
+  auto set = [](span<size_t> lhs, size_t rhs) {
+    for (size_t& i : lhs) {
+      i = rhs;
+    }
+  };
+  set(span(arr), 0);
+  set(span(arr).subspan(1), 1);
+  set(span(arr).subspan(2, 2), 23);
+  EXPECT_THAT(arr, ElementsAre(0, 1, 23, 23, 1));
+}
+
+}  // namespace arrow::util

Reply via email to