paleolimbot commented on code in PR #404:
URL: https://github.com/apache/arrow-nanoarrow/pull/404#discussion_r1531439477


##########
src/nanoarrow/nanoarrow.hpp:
##########
@@ -548,6 +558,366 @@ class VectorArrayStream {
 
 /// @}
 
+struct Nothing {};
+constexpr Nothing NA{};
+
+template <typename T>
+class Maybe {
+ public:
+  Maybe() : nothing_{Nothing{}}, is_something_{false} {}
+  Maybe(Nothing) : Maybe{} {}
+
+  Maybe(T something)  // NOLINT(google-explicit-constructor)
+      : something_{something}, is_something_{true} {}
+
+  explicit constexpr operator bool() const { return is_something_; }
+
+  const T& operator*() const { return something_; }
+
+  friend inline bool operator==(Maybe l, Maybe r) {
+    if (l.is_something_ != r.is_something_) return false;
+    return l.is_something_ ? l.something_ == r.something_ : true;
+  }
+  friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); }
+
+  T value_or(T val) const { return is_something_ ? something_ : val; }
+
+ private:
+  static_assert(std::is_trivially_copyable<T>::value, "");
+  static_assert(std::is_trivially_destructible<T>::value, "");
+
+  union {
+    Nothing nothing_;
+    T something_;
+  };
+  bool is_something_;
+};
+
+template <typename Get>
+struct RandomAccessRange {
+  Get get;
+  int64_t size;
+
+  using value_type = decltype(std::declval<Get>()(0));
+
+  struct const_iterator {
+    int64_t i;
+    const RandomAccessRange* range;
+    bool operator==(const_iterator other) const { return i == other.i; }
+    bool operator!=(const_iterator other) const { return i != other.i; }
+    const_iterator& operator++() { return ++i, *this; }
+    value_type operator*() const { return range->get(i); }
+  };
+
+  const_iterator begin() const { return {0, this}; }
+  const_iterator end() const { return {size, this}; }
+};
+
+template <typename Next>
+struct InputRange {
+  Next next;
+  using ValueOrFalsy = decltype(std::declval<Next>()());
+
+  static_assert(std::is_convertible<ValueOrFalsy, bool>::value, "");
+  using value_type = decltype(*std::declval<ValueOrFalsy>());
+
+  struct iterator {
+    InputRange* range;
+    ValueOrFalsy stashed;
+
+    bool operator==(iterator other) const {
+      if (stashed) false;
+      // The stashed value is falsy, so the LHS iterator is at the end.
+      // Check if RHS iterator is the end sentinel.
+      return other.range == nullptr;
+    }
+    bool operator!=(iterator other) const { return !(*this == other); }
+
+    iterator& operator++() {
+      stashed = range->next();
+      return *this;
+    }
+    value_type operator*() const { return *stashed; }
+  };
+
+  iterator begin() { return {this}; }
+  iterator end() { return {}; }
+};
+
+// C++17 could do all of this with a lambda
+template <typename T>
+class ViewAs {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const void* values;
+    int64_t offset;
+
+    Maybe<T> operator()(int64_t i) const {
+      i += offset;
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        if (std::is_same<T, bool>::value) {
+          return ArrowBitGet(static_cast<const uint8_t*>(values), i);
+        } else {
+          return static_cast<const T*>(values)[i];
+        }
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAs(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAs(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <int OffsetSize = 0>
+class ViewAsBytes {
+ private:
+  using OffsetType = typename std::conditional<OffsetSize == 32, int32_t, 
int64_t>::type;
+
+  struct Get {
+    const uint8_t* validity;
+    const void* offsets;
+    const char* data;
+    int64_t offset;
+
+    Maybe<ArrowStringView> operator()(int64_t i) const {
+      i += offset;
+      auto* offsets = static_cast<const OffsetType*>(this->offsets);
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        return ArrowStringView{data + offsets[i], offsets[i + 1] - offsets[i]};
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAsBytes(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->buffer_views[2].data.as_char,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAsBytes(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                static_cast<const char*>(array->buffers[2]),
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <>
+class ViewAsBytes<0> {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const char* data;
+    int64_t offset;
+    int fixed_size;
+
+    Maybe<ArrowStringView> operator()(int64_t i) const {
+      i += offset;
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        return ArrowStringView{data + i * fixed_size, fixed_size};
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAsBytes(const ArrowArrayView* array_view, int fixed_size)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.as_char,
+                array_view->offset,
+                fixed_size,
+            },
+            array_view->length,
+        } {}
+
+  ViewAsBytes(const ArrowArray* array, int fixed_size)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                static_cast<const char*>(array->buffers[1]),
+                /*offset=*/0,
+                fixed_size,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+struct ErrorWithCode : ArrowError {
+  ErrorWithCode() : ArrowError{}, code{NANOARROW_OK} {}
+  ArrowErrorCode code;
+  constexpr operator bool() const { return code != NANOARROW_OK; }
+};
+
+class ViewStream {
+ public:
+  ViewStream(ArrowArrayStream* stream, ArrowErrorCode* code, ArrowError* error)
+      : range_{Next{this, stream, ArrowArray{}}}, code_{code}, error_{error} {}
+
+  ViewStream(ArrowArrayStream* stream, ErrorWithCode* error)
+      : ViewStream{stream, &error->code, error} {}
+
+ private:
+  struct Next {
+    ViewStream* self;
+    ArrowArrayStream* stream;
+    ArrowArray array;

Review Comment:
   Could this be `UniqueArray`? That would collapse your `if (array.release) 
...` to `array.reset()`. If it can't be, does `Next` need a deleter to ensure 
that `array` doesn't leak?



##########
src/nanoarrow/nanoarrow.hpp:
##########
@@ -548,6 +558,366 @@ class VectorArrayStream {
 
 /// @}
 
+struct Nothing {};
+constexpr Nothing NA{};
+
+template <typename T>
+class Maybe {
+ public:
+  Maybe() : nothing_{Nothing{}}, is_something_{false} {}
+  Maybe(Nothing) : Maybe{} {}
+
+  Maybe(T something)  // NOLINT(google-explicit-constructor)
+      : something_{something}, is_something_{true} {}
+
+  explicit constexpr operator bool() const { return is_something_; }
+
+  const T& operator*() const { return something_; }
+
+  friend inline bool operator==(Maybe l, Maybe r) {
+    if (l.is_something_ != r.is_something_) return false;
+    return l.is_something_ ? l.something_ == r.something_ : true;
+  }
+  friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); }
+
+  T value_or(T val) const { return is_something_ ? something_ : val; }
+
+ private:
+  static_assert(std::is_trivially_copyable<T>::value, "");
+  static_assert(std::is_trivially_destructible<T>::value, "");
+
+  union {
+    Nothing nothing_;
+    T something_;
+  };
+  bool is_something_;
+};
+
+template <typename Get>
+struct RandomAccessRange {
+  Get get;
+  int64_t size;
+
+  using value_type = decltype(std::declval<Get>()(0));
+
+  struct const_iterator {
+    int64_t i;
+    const RandomAccessRange* range;
+    bool operator==(const_iterator other) const { return i == other.i; }
+    bool operator!=(const_iterator other) const { return i != other.i; }
+    const_iterator& operator++() { return ++i, *this; }
+    value_type operator*() const { return range->get(i); }
+  };
+
+  const_iterator begin() const { return {0, this}; }
+  const_iterator end() const { return {size, this}; }
+};
+
+template <typename Next>
+struct InputRange {
+  Next next;
+  using ValueOrFalsy = decltype(std::declval<Next>()());
+
+  static_assert(std::is_convertible<ValueOrFalsy, bool>::value, "");
+  using value_type = decltype(*std::declval<ValueOrFalsy>());
+
+  struct iterator {
+    InputRange* range;
+    ValueOrFalsy stashed;
+
+    bool operator==(iterator other) const {
+      if (stashed) false;
+      // The stashed value is falsy, so the LHS iterator is at the end.
+      // Check if RHS iterator is the end sentinel.
+      return other.range == nullptr;
+    }
+    bool operator!=(iterator other) const { return !(*this == other); }
+
+    iterator& operator++() {
+      stashed = range->next();
+      return *this;
+    }
+    value_type operator*() const { return *stashed; }
+  };
+
+  iterator begin() { return {this}; }
+  iterator end() { return {}; }
+};
+
+// C++17 could do all of this with a lambda
+template <typename T>
+class ViewAs {

Review Comment:
   At first glance I wonder if `ViewAs` is too generic but I also don't have a 
better idea and I also love how it simplifies the tests (`ViewArrayAs`? But 
maybe this could also be used to view a `ArrowBufferView` or `ArrowBuffer`?)



##########
src/nanoarrow/nanoarrow.hpp:
##########
@@ -15,11 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <stdexcept>
+#include <sys/wait.h>
 #include <string>
 #include <vector>
 
 #include "nanoarrow.h"
+#include "nanoarrow/nanoarrow_types.h"

Review Comment:
   I think you can remove this one (clangd often gets confused and adds it in). 
sys/wait.h also seems like it may have been added by accident?



##########
src/nanoarrow/nanoarrow.hpp:
##########
@@ -548,6 +558,366 @@ class VectorArrayStream {
 
 /// @}
 
+struct Nothing {};
+constexpr Nothing NA{};
+
+template <typename T>
+class Maybe {
+ public:
+  Maybe() : nothing_{Nothing{}}, is_something_{false} {}
+  Maybe(Nothing) : Maybe{} {}
+
+  Maybe(T something)  // NOLINT(google-explicit-constructor)
+      : something_{something}, is_something_{true} {}
+
+  explicit constexpr operator bool() const { return is_something_; }
+
+  const T& operator*() const { return something_; }
+
+  friend inline bool operator==(Maybe l, Maybe r) {
+    if (l.is_something_ != r.is_something_) return false;
+    return l.is_something_ ? l.something_ == r.something_ : true;
+  }
+  friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); }
+
+  T value_or(T val) const { return is_something_ ? something_ : val; }
+
+ private:
+  static_assert(std::is_trivially_copyable<T>::value, "");
+  static_assert(std::is_trivially_destructible<T>::value, "");
+
+  union {
+    Nothing nothing_;
+    T something_;
+  };
+  bool is_something_;
+};
+
+template <typename Get>
+struct RandomAccessRange {
+  Get get;
+  int64_t size;
+
+  using value_type = decltype(std::declval<Get>()(0));
+
+  struct const_iterator {
+    int64_t i;
+    const RandomAccessRange* range;
+    bool operator==(const_iterator other) const { return i == other.i; }
+    bool operator!=(const_iterator other) const { return i != other.i; }
+    const_iterator& operator++() { return ++i, *this; }
+    value_type operator*() const { return range->get(i); }
+  };
+
+  const_iterator begin() const { return {0, this}; }
+  const_iterator end() const { return {size, this}; }
+};
+
+template <typename Next>
+struct InputRange {
+  Next next;
+  using ValueOrFalsy = decltype(std::declval<Next>()());
+
+  static_assert(std::is_convertible<ValueOrFalsy, bool>::value, "");
+  using value_type = decltype(*std::declval<ValueOrFalsy>());
+
+  struct iterator {
+    InputRange* range;
+    ValueOrFalsy stashed;
+
+    bool operator==(iterator other) const {
+      if (stashed) false;
+      // The stashed value is falsy, so the LHS iterator is at the end.
+      // Check if RHS iterator is the end sentinel.
+      return other.range == nullptr;
+    }
+    bool operator!=(iterator other) const { return !(*this == other); }
+
+    iterator& operator++() {
+      stashed = range->next();
+      return *this;
+    }
+    value_type operator*() const { return *stashed; }
+  };
+
+  iterator begin() { return {this}; }
+  iterator end() { return {}; }
+};
+
+// C++17 could do all of this with a lambda
+template <typename T>
+class ViewAs {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const void* values;
+    int64_t offset;
+
+    Maybe<T> operator()(int64_t i) const {
+      i += offset;
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        if (std::is_same<T, bool>::value) {
+          return ArrowBitGet(static_cast<const uint8_t*>(values), i);
+        } else {
+          return static_cast<const T*>(values)[i];
+        }
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAs(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAs(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <int OffsetSize = 0>
+class ViewAsBytes {
+ private:
+  using OffsetType = typename std::conditional<OffsetSize == 32, int32_t, 
int64_t>::type;
+
+  struct Get {
+    const uint8_t* validity;
+    const void* offsets;
+    const char* data;
+    int64_t offset;
+
+    Maybe<ArrowStringView> operator()(int64_t i) const {
+      i += offset;
+      auto* offsets = static_cast<const OffsetType*>(this->offsets);
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        return ArrowStringView{data + offsets[i], offsets[i + 1] - offsets[i]};
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAsBytes(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->buffer_views[2].data.as_char,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAsBytes(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                static_cast<const char*>(array->buffers[2]),
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <>
+class ViewAsBytes<0> {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const char* data;
+    int64_t offset;
+    int fixed_size;
+
+    Maybe<ArrowStringView> operator()(int64_t i) const {
+      i += offset;
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        return ArrowStringView{data + i * fixed_size, fixed_size};
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAsBytes(const ArrowArrayView* array_view, int fixed_size)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.as_char,
+                array_view->offset,
+                fixed_size,
+            },
+            array_view->length,
+        } {}
+
+  ViewAsBytes(const ArrowArray* array, int fixed_size)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                static_cast<const char*>(array->buffers[1]),
+                /*offset=*/0,
+                fixed_size,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+struct ErrorWithCode : ArrowError {
+  ErrorWithCode() : ArrowError{}, code{NANOARROW_OK} {}
+  ArrowErrorCode code;
+  constexpr operator bool() const { return code != NANOARROW_OK; }
+};
+
+class ViewStream {
+ public:
+  ViewStream(ArrowArrayStream* stream, ArrowErrorCode* code, ArrowError* error)
+      : range_{Next{this, stream, ArrowArray{}}}, code_{code}, error_{error} {}
+
+  ViewStream(ArrowArrayStream* stream, ErrorWithCode* error)
+      : ViewStream{stream, &error->code, error} {}
+
+ private:
+  struct Next {
+    ViewStream* self;
+    ArrowArrayStream* stream;
+    ArrowArray array;
+
+    ArrowArray* operator()() {
+      if (array.release) {
+        ArrowArrayRelease(&array);
+      }
+      *self->code_ = ArrowArrayStreamGetNext(stream, &array, self->error_);
+      if (*self->code_ != NANOARROW_OK) {
+        NANOARROW_DCHECK(array.release == nullptr);
+        return nullptr;
+      }
+      if (array.release == nullptr) {
+        return nullptr;
+      }
+      return &array;
+    }
+  };
+
+  InputRange<Next> range_;
+  ArrowError* error_;
+  ArrowErrorCode* code_;
+
+ public:
+  using iterator = typename InputRange<Next>::iterator;
+  iterator begin() { return range_.begin(); }
+  iterator end() { return range_.end(); }
+};
+
+#if NANOARROW_HAS_ZIP

Review Comment:
   I love the Zip! I wonder if `dev/benchmarks/c/utils.h` would be a better 
home for this at the moment since that's where it can be used. The range-based 
iterator and the `ViewAs` have direct connections to nanoarrow C structs and so 
this header feels like a good place for them...these are generic helpers and 
I'm not sure that *this* header is the best home (for now).



##########
src/nanoarrow/nanoarrow.hpp:
##########
@@ -548,6 +558,366 @@ class VectorArrayStream {
 
 /// @}
 
+struct Nothing {};
+constexpr Nothing NA{};
+
+template <typename T>
+class Maybe {
+ public:
+  Maybe() : nothing_{Nothing{}}, is_something_{false} {}
+  Maybe(Nothing) : Maybe{} {}
+
+  Maybe(T something)  // NOLINT(google-explicit-constructor)
+      : something_{something}, is_something_{true} {}
+
+  explicit constexpr operator bool() const { return is_something_; }
+
+  const T& operator*() const { return something_; }
+
+  friend inline bool operator==(Maybe l, Maybe r) {
+    if (l.is_something_ != r.is_something_) return false;
+    return l.is_something_ ? l.something_ == r.something_ : true;
+  }
+  friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); }
+
+  T value_or(T val) const { return is_something_ ? something_ : val; }
+
+ private:
+  static_assert(std::is_trivially_copyable<T>::value, "");
+  static_assert(std::is_trivially_destructible<T>::value, "");
+
+  union {
+    Nothing nothing_;
+    T something_;
+  };
+  bool is_something_;
+};
+
+template <typename Get>
+struct RandomAccessRange {
+  Get get;
+  int64_t size;
+
+  using value_type = decltype(std::declval<Get>()(0));
+
+  struct const_iterator {
+    int64_t i;
+    const RandomAccessRange* range;
+    bool operator==(const_iterator other) const { return i == other.i; }
+    bool operator!=(const_iterator other) const { return i != other.i; }
+    const_iterator& operator++() { return ++i, *this; }
+    value_type operator*() const { return range->get(i); }
+  };
+
+  const_iterator begin() const { return {0, this}; }
+  const_iterator end() const { return {size, this}; }
+};
+
+template <typename Next>
+struct InputRange {
+  Next next;
+  using ValueOrFalsy = decltype(std::declval<Next>()());
+
+  static_assert(std::is_convertible<ValueOrFalsy, bool>::value, "");
+  using value_type = decltype(*std::declval<ValueOrFalsy>());
+
+  struct iterator {
+    InputRange* range;
+    ValueOrFalsy stashed;
+
+    bool operator==(iterator other) const {
+      if (stashed) false;
+      // The stashed value is falsy, so the LHS iterator is at the end.
+      // Check if RHS iterator is the end sentinel.
+      return other.range == nullptr;
+    }
+    bool operator!=(iterator other) const { return !(*this == other); }
+
+    iterator& operator++() {
+      stashed = range->next();
+      return *this;
+    }
+    value_type operator*() const { return *stashed; }
+  };
+
+  iterator begin() { return {this}; }
+  iterator end() { return {}; }
+};
+
+// C++17 could do all of this with a lambda
+template <typename T>
+class ViewAs {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const void* values;
+    int64_t offset;
+
+    Maybe<T> operator()(int64_t i) const {
+      i += offset;
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        if (std::is_same<T, bool>::value) {
+          return ArrowBitGet(static_cast<const uint8_t*>(values), i);
+        } else {
+          return static_cast<const T*>(values)[i];
+        }
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAs(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAs(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <int OffsetSize = 0>
+class ViewAsBytes {
+ private:
+  using OffsetType = typename std::conditional<OffsetSize == 32, int32_t, 
int64_t>::type;
+
+  struct Get {
+    const uint8_t* validity;
+    const void* offsets;
+    const char* data;
+    int64_t offset;
+
+    Maybe<ArrowStringView> operator()(int64_t i) const {
+      i += offset;
+      auto* offsets = static_cast<const OffsetType*>(this->offsets);
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        return ArrowStringView{data + offsets[i], offsets[i + 1] - offsets[i]};
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAsBytes(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->buffer_views[2].data.as_char,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAsBytes(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                static_cast<const char*>(array->buffers[2]),
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <>
+class ViewAsBytes<0> {

Review Comment:
   I'm not sure I understand the significance of the 0 here?



##########
src/nanoarrow/nanoarrow.hpp:
##########
@@ -548,6 +558,366 @@ class VectorArrayStream {
 
 /// @}
 
+struct Nothing {};
+constexpr Nothing NA{};
+
+template <typename T>
+class Maybe {
+ public:
+  Maybe() : nothing_{Nothing{}}, is_something_{false} {}
+  Maybe(Nothing) : Maybe{} {}
+
+  Maybe(T something)  // NOLINT(google-explicit-constructor)
+      : something_{something}, is_something_{true} {}
+
+  explicit constexpr operator bool() const { return is_something_; }
+
+  const T& operator*() const { return something_; }
+
+  friend inline bool operator==(Maybe l, Maybe r) {
+    if (l.is_something_ != r.is_something_) return false;
+    return l.is_something_ ? l.something_ == r.something_ : true;
+  }
+  friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); }
+
+  T value_or(T val) const { return is_something_ ? something_ : val; }
+
+ private:
+  static_assert(std::is_trivially_copyable<T>::value, "");
+  static_assert(std::is_trivially_destructible<T>::value, "");
+
+  union {
+    Nothing nothing_;
+    T something_;
+  };
+  bool is_something_;
+};
+
+template <typename Get>
+struct RandomAccessRange {
+  Get get;
+  int64_t size;
+
+  using value_type = decltype(std::declval<Get>()(0));
+
+  struct const_iterator {
+    int64_t i;
+    const RandomAccessRange* range;
+    bool operator==(const_iterator other) const { return i == other.i; }
+    bool operator!=(const_iterator other) const { return i != other.i; }
+    const_iterator& operator++() { return ++i, *this; }
+    value_type operator*() const { return range->get(i); }
+  };
+
+  const_iterator begin() const { return {0, this}; }
+  const_iterator end() const { return {size, this}; }
+};
+
+template <typename Next>
+struct InputRange {
+  Next next;
+  using ValueOrFalsy = decltype(std::declval<Next>()());
+
+  static_assert(std::is_convertible<ValueOrFalsy, bool>::value, "");
+  using value_type = decltype(*std::declval<ValueOrFalsy>());
+
+  struct iterator {
+    InputRange* range;
+    ValueOrFalsy stashed;
+
+    bool operator==(iterator other) const {
+      if (stashed) false;
+      // The stashed value is falsy, so the LHS iterator is at the end.
+      // Check if RHS iterator is the end sentinel.
+      return other.range == nullptr;
+    }
+    bool operator!=(iterator other) const { return !(*this == other); }
+
+    iterator& operator++() {
+      stashed = range->next();
+      return *this;
+    }
+    value_type operator*() const { return *stashed; }
+  };
+
+  iterator begin() { return {this}; }
+  iterator end() { return {}; }
+};
+
+// C++17 could do all of this with a lambda
+template <typename T>
+class ViewAs {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const void* values;
+    int64_t offset;
+
+    Maybe<T> operator()(int64_t i) const {
+      i += offset;
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        if (std::is_same<T, bool>::value) {
+          return ArrowBitGet(static_cast<const uint8_t*>(values), i);
+        } else {
+          return static_cast<const T*>(values)[i];
+        }
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAs(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAs(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <int OffsetSize = 0>
+class ViewAsBytes {
+ private:
+  using OffsetType = typename std::conditional<OffsetSize == 32, int32_t, 
int64_t>::type;
+
+  struct Get {
+    const uint8_t* validity;
+    const void* offsets;
+    const char* data;
+    int64_t offset;
+
+    Maybe<ArrowStringView> operator()(int64_t i) const {
+      i += offset;
+      auto* offsets = static_cast<const OffsetType*>(this->offsets);
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        return ArrowStringView{data + offsets[i], offsets[i + 1] - offsets[i]};
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAsBytes(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.data,
+                array_view->buffer_views[2].data.as_char,
+                array_view->offset,
+            },
+            array_view->length,
+        } {}
+
+  ViewAsBytes(const ArrowArray* array)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                array->buffers[1],
+                static_cast<const char*>(array->buffers[2]),
+                /*offset=*/0,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+template <>
+class ViewAsBytes<0> {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const char* data;
+    int64_t offset;
+    int fixed_size;
+
+    Maybe<ArrowStringView> operator()(int64_t i) const {
+      i += offset;
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        return ArrowStringView{data + i * fixed_size, fixed_size};
+      }
+      return Nothing{};
+    }
+  };
+
+  RandomAccessRange<Get> range_;
+
+ public:
+  ViewAsBytes(const ArrowArrayView* array_view, int fixed_size)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.as_char,
+                array_view->offset,
+                fixed_size,
+            },
+            array_view->length,
+        } {}
+
+  ViewAsBytes(const ArrowArray* array, int fixed_size)
+      : range_{
+            Get{
+                static_cast<const uint8_t*>(array->buffers[0]),
+                static_cast<const char*>(array->buffers[1]),
+                /*offset=*/0,
+                fixed_size,
+            },
+            array->length,
+        } {}
+
+  using value_type = typename RandomAccessRange<Get>::value_type;
+  using const_iterator = typename RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
+struct ErrorWithCode : ArrowError {
+  ErrorWithCode() : ArrowError{}, code{NANOARROW_OK} {}
+  ArrowErrorCode code;
+  constexpr operator bool() const { return code != NANOARROW_OK; }
+};

Review Comment:
   The error handling is definitely the most annoying part of having the 
range-based for be perfect (in the absence of exceptions). It's not perfect, but
   
   ```cpp
   ViewStream viewer(stream, &error);
   for (ArrowArray* item : viewer) {
     // stuff
   }
   NANOARROW_RETURN_NOT_OK(viewer.code());
   ```
   
   ...is the same number of lines as 
   
   ```cpp
   ErrorWithCode error;
   for (ArrowArray* item : ViewStream(stream, &error)) {
     // stuff
   }
   NANOARROW_RETURN_NOT_OK(error.code);
   ```
   
   ...and invents one less concept. It would be a mega bonus if we could crash 
(in debug mode) if somebody forgets to check the return code (not checking the 
return code is a frequent source of crashes at unrelated lines of code that has 
been a bit of a struggle to communicate to users).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to