pitrou commented on code in PR #13364: URL: https://github.com/apache/arrow/pull/13364#discussion_r896663585
########## cpp/src/arrow/array/data.h: ########## @@ -242,6 +245,131 @@ struct ARROW_EXPORT ArrayData { std::shared_ptr<ArrayData> dictionary; }; +/// \brief A non-owning Buffer reference +struct ARROW_EXPORT BufferSpan { + // It is the user of this class's responsibility to ensure that + // buffers that were const originally are not written to + // accidentally. + uint8_t* data = NULLPTR; + int64_t size = 0; + // Pointer back to buffer that owns this memory + const std::shared_ptr<Buffer>* owner = NULLPTR; +}; + +/// \brief EXPERIMENTAL: A non-owning ArrayData reference that is cheaply +/// copyable and does not contain any shared_ptr objects. Do not use in public +/// APIs aside from compute kernels for now +struct ARROW_EXPORT ArraySpan { + const DataType* type = NULLPTR; + int64_t length = 0; + mutable int64_t null_count = kUnknownNullCount; + int64_t offset = 0; + BufferSpan buffers[3]; + + ArraySpan() = default; + + explicit ArraySpan(const DataType* type, int64_t length) : type(type), length(length) {} + + ArraySpan(const ArrayData& data) { // NOLINT implicit conversion + SetMembers(data); + } + ArraySpan(const Scalar& data) { // NOLINT implicit converstion + FillFromScalar(data); + } + + /// If dictionary-encoded, put dictionary in the first entry + std::vector<ArraySpan> child_data; + + /// \brief Populate ArraySpan to look like an array of length 1 pointing at + /// the data members of a Scalar value + void FillFromScalar(const Scalar& value); + + void SetMembers(const ArrayData& data); + + void SetBuffer(int index, const std::shared_ptr<Buffer>& buffer) { + this->buffers[index].data = const_cast<uint8_t*>(buffer->data()); + this->buffers[index].size = buffer->size(); + this->buffers[index].owner = &buffer; + } + + void ClearBuffer(int index) { + this->buffers[index].data = NULLPTR; + this->buffers[index].size = 0; + this->buffers[index].owner = NULLPTR; + } + + const ArraySpan& dictionary() const { return child_data[0]; } + + /// \brief Return the number of buffers (out of 3) that are used to + /// constitute this array + int num_buffers() const; + + // Access a buffer's data as a typed C pointer + template <typename T> + inline T* GetValues(int i, int64_t absolute_offset) { + return reinterpret_cast<T*>(buffers[i].data) + absolute_offset; + } + + template <typename T> + inline T* GetValues(int i) { + return GetValues<T>(i, this->offset); + } + + // Access a buffer's data as a typed C pointer + template <typename T> + inline const T* GetValues(int i, int64_t absolute_offset) const { + return reinterpret_cast<const T*>(buffers[i].data) + absolute_offset; + } + + template <typename T> + inline const T* GetValues(int i) const { + return GetValues<T>(i, this->offset); + } + + bool IsNull(int64_t i) const { + return ((this->buffers[0].data != NULLPTR) + ? !bit_util::GetBit(this->buffers[0].data, i + this->offset) + : this->null_count == this->length); + } + + bool IsValid(int64_t i) const { + return ((this->buffers[0].data != NULLPTR) + ? bit_util::GetBit(this->buffers[0].data, i + this->offset) + : this->null_count != this->length); + } Review Comment: This is incorrect for union arrays whether the first buffer contains the union type codes. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org