Repository: arrow Updated Branches: refs/heads/master a68af9d16 -> a90b5f363
ARROW-512: C++: Add method to check for primitive types Also includes some documentation updates. Author: Uwe L. Korn <uw...@xhochy.com> Closes #304 from xhochy/ARROW-512 and squashes the following commits: cfe9205 [Uwe L. Korn] ARROW-512: C++: Add method to check for primitive types Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/a90b5f36 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/a90b5f36 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/a90b5f36 Branch: refs/heads/master Commit: a90b5f3634bdbd6af01967f288457d07d5f2e2eb Parents: a68af9d Author: Uwe L. Korn <uw...@xhochy.com> Authored: Thu Jan 26 13:20:17 2017 -0500 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Thu Jan 26 13:20:17 2017 -0500 ---------------------------------------------------------------------- cpp/apidoc/Doxyfile | 2 +- cpp/apidoc/index.md | 4 +- cpp/src/arrow/buffer.h | 54 +++++++------- cpp/src/arrow/builder.h | 150 ++++++++++++++++++++------------------- cpp/src/arrow/memory_pool.h | 21 ++++++ cpp/src/arrow/type.h | 37 ++++++++++ 6 files changed, 167 insertions(+), 101 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/a90b5f36/cpp/apidoc/Doxyfile ---------------------------------------------------------------------- diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile index 7dc55fe..51f5543 100644 --- a/cpp/apidoc/Doxyfile +++ b/cpp/apidoc/Doxyfile @@ -204,7 +204,7 @@ SHORT_NAMES = NO # description.) # The default value is: NO. -JAVADOC_AUTOBRIEF = NO +JAVADOC_AUTOBRIEF = YES # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If http://git-wip-us.apache.org/repos/asf/arrow/blob/a90b5f36/cpp/apidoc/index.md ---------------------------------------------------------------------- diff --git a/cpp/apidoc/index.md b/cpp/apidoc/index.md index 080f848..fdac496 100644 --- a/cpp/apidoc/index.md +++ b/cpp/apidoc/index.md @@ -38,8 +38,8 @@ this bitmap. As Arrow objects are immutable, there are classes provided that should help you build these objects. To build an array of `int64_t` elements, we can use the -`Int64Builder`. In the following example, we build an array of the range 1 to 8 -where the element that should hold the number 4 is nulled. +`arrow::Int64Builder`. In the following example, we build an array of the range +1 to 8 where the element that should hold the number 4 is nulled. Int64Builder builder(arrow::default_memory_pool(), arrow::int64()); builder.Append(1); http://git-wip-us.apache.org/repos/asf/arrow/blob/a90b5f36/cpp/src/arrow/buffer.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index ac78808..d43ab03 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -35,33 +35,35 @@ class Status; // ---------------------------------------------------------------------- // Buffer classes -// Immutable API for a chunk of bytes which may or may not be owned by the -// class instance. Buffers have two related notions of length: size and -// capacity. Size is the number of bytes that might have valid data. -// Capacity is the number of bytes that where allocated for the buffer in -// total. -// The following invariant is always true: Size < Capacity +/// Immutable API for a chunk of bytes which may or may not be owned by the +/// class instance. +/// +/// Buffers have two related notions of length: size and capacity. Size is +/// the number of bytes that might have valid data. Capacity is the number +/// of bytes that where allocated for the buffer in total. +/// +/// The following invariant is always true: Size < Capacity class ARROW_EXPORT Buffer : public std::enable_shared_from_this<Buffer> { public: Buffer(const uint8_t* data, int64_t size) : is_mutable_(false), data_(data), size_(size), capacity_(size) {} virtual ~Buffer(); - // An offset into data that is owned by another buffer, but we want to be - // able to retain a valid pointer to it even after other shared_ptr's to the - // parent buffer have been destroyed - // - // This method makes no assertions about alignment or padding of the buffer but - // in general we expected buffers to be aligned and padded to 64 bytes. In the future - // we might add utility methods to help determine if a buffer satisfies this contract. + /// An offset into data that is owned by another buffer, but we want to be + /// able to retain a valid pointer to it even after other shared_ptr's to the + /// parent buffer have been destroyed + /// + /// This method makes no assertions about alignment or padding of the buffer but + /// in general we expected buffers to be aligned and padded to 64 bytes. In the future + /// we might add utility methods to help determine if a buffer satisfies this contract. Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size); std::shared_ptr<Buffer> get_shared_ptr() { return shared_from_this(); } bool is_mutable() const { return is_mutable_; } - // Return true if both buffers are the same size and contain the same bytes - // up to the number of compared bytes + /// Return true if both buffers are the same size and contain the same bytes + /// up to the number of compared bytes bool Equals(const Buffer& other, int64_t nbytes) const { return this == &other || (size_ >= nbytes && other.size_ >= nbytes && @@ -74,11 +76,11 @@ class ARROW_EXPORT Buffer : public std::enable_shared_from_this<Buffer> { (data_ == other.data_ || !memcmp(data_, other.data_, size_))); } - // Copy section of buffer into a new Buffer + /// Copy a section of the buffer into a new Buffer. Status Copy(int64_t start, int64_t nbytes, MemoryPool* pool, std::shared_ptr<Buffer>* out) const; - // Default memory pool + /// Copy a section of the buffer using the default memory pool into a new Buffer. Status Copy(int64_t start, int64_t nbytes, std::shared_ptr<Buffer>* out) const; int64_t capacity() const { return capacity_; } @@ -101,12 +103,12 @@ class ARROW_EXPORT Buffer : public std::enable_shared_from_this<Buffer> { DISALLOW_COPY_AND_ASSIGN(Buffer); }; -// Construct a view on passed buffer at the indicated offset and length. This -// function cannot fail and does not error checking (except in debug builds) +/// Construct a view on passed buffer at the indicated offset and length. This +/// function cannot fail and does not error checking (except in debug builds) ARROW_EXPORT std::shared_ptr<Buffer> SliceBuffer( const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length); -// A Buffer whose contents can be mutated. May or may not own its data. +/// A Buffer whose contents can be mutated. May or may not own its data. class ARROW_EXPORT MutableBuffer : public Buffer { public: MutableBuffer(uint8_t* data, int64_t size) : Buffer(data, size) { @@ -116,7 +118,7 @@ class ARROW_EXPORT MutableBuffer : public Buffer { uint8_t* mutable_data() { return mutable_data_; } - // Get a read-only view of this buffer + /// Get a read-only view of this buffer std::shared_ptr<Buffer> GetImmutableView(); protected: @@ -135,16 +137,16 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer { /// decrease. virtual Status Resize(int64_t new_size, bool shrink_to_fit = true) = 0; - // Ensure that buffer has enough memory allocated to fit the indicated - // capacity (and meets the 64 byte padding requirement in Layout.md). - // It does not change buffer's reported size. + /// Ensure that buffer has enough memory allocated to fit the indicated + /// capacity (and meets the 64 byte padding requirement in Layout.md). + /// It does not change buffer's reported size. virtual Status Reserve(int64_t new_capacity) = 0; protected: ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {} }; -// A Buffer whose lifetime is tied to a particular MemoryPool +/// A Buffer whose lifetime is tied to a particular MemoryPool class ARROW_EXPORT PoolBuffer : public ResizableBuffer { public: explicit PoolBuffer(MemoryPool* pool = nullptr); @@ -162,7 +164,7 @@ class ARROW_EXPORT BufferBuilder { explicit BufferBuilder(MemoryPool* pool) : pool_(pool), data_(nullptr), capacity_(0), size_(0) {} - // Resizes the buffer to the nearest multiple of 64 bytes per Layout.md + /// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md Status Resize(int32_t elements) { if (capacity_ == 0) { buffer_ = std::make_shared<PoolBuffer>(pool_); } RETURN_NOT_OK(buffer_->Resize(elements)); http://git-wip-us.apache.org/repos/asf/arrow/blob/a90b5f36/cpp/src/arrow/builder.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 735bca1..747da7c 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -37,10 +37,11 @@ class Array; static constexpr int32_t kMinBuilderCapacity = 1 << 5; -// Base class for all data array builders. -// This class provides a facilities for incrementally building the null bitmap -// (see Append methods) and as a side effect the current number of slots and -// the null count. +/// Base class for all data array builders. +// +/// This class provides a facilities for incrementally building the null bitmap +/// (see Append methods) and as a side effect the current number of slots and +/// the null count. class ARROW_EXPORT ArrayBuilder { public: explicit ArrayBuilder(MemoryPool* pool, const TypePtr& type) @@ -54,8 +55,8 @@ class ARROW_EXPORT ArrayBuilder { virtual ~ArrayBuilder() = default; - // For nested types. Since the objects are owned by this class instance, we - // skip shared pointers and just return a raw pointer + /// For nested types. Since the objects are owned by this class instance, we + /// skip shared pointers and just return a raw pointer ArrayBuilder* child(int i) { return children_[i].get(); } int num_children() const { return children_.size(); } @@ -64,37 +65,37 @@ class ARROW_EXPORT ArrayBuilder { int32_t null_count() const { return null_count_; } int32_t capacity() const { return capacity_; } - // Append to null bitmap + /// Append to null bitmap Status AppendToBitmap(bool is_valid); - // Vector append. Treat each zero byte as a null. If valid_bytes is null - // assume all of length bits are valid. + /// Vector append. Treat each zero byte as a null. If valid_bytes is null + /// assume all of length bits are valid. Status AppendToBitmap(const uint8_t* valid_bytes, int32_t length); - // Set the next length bits to not null (i.e. valid). + /// Set the next length bits to not null (i.e. valid). Status SetNotNull(int32_t length); - // Allocates initial capacity requirements for the builder. In most - // cases subclasses should override and call there parent classes - // method as well. + /// Allocates initial capacity requirements for the builder. In most + /// cases subclasses should override and call there parent classes + /// method as well. virtual Status Init(int32_t capacity); - // Resizes the null_bitmap array. In most - // cases subclasses should override and call there parent classes - // method as well. + /// Resizes the null_bitmap array. In most + /// cases subclasses should override and call there parent classes + /// method as well. virtual Status Resize(int32_t new_bits); - // Ensures there is enough space for adding the number of elements by checking - // capacity and calling Resize if necessary. + /// Ensures there is enough space for adding the number of elements by checking + /// capacity and calling Resize if necessary. Status Reserve(int32_t elements); - // For cases where raw data was memcpy'd into the internal buffers, allows us - // to advance the length of the builder. It is your responsibility to use - // this function responsibly. + /// For cases where raw data was memcpy'd into the internal buffers, allows us + /// to advance the length of the builder. It is your responsibility to use + /// this function responsibly. Status Advance(int32_t elements); std::shared_ptr<PoolBuffer> null_bitmap() const { return null_bitmap_; } - // Creates new array object to hold the contents of the builder and transfers - // ownership of the data. This resets all variables on the builder. + /// Creates new Array object to hold the contents of the builder and transfers + /// ownership of the data. This resets all variables on the builder. virtual Status Finish(std::shared_ptr<Array>* out) = 0; std::shared_ptr<DataType> type() const { return type_; } @@ -144,7 +145,7 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { using ArrayBuilder::Advance; - // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory + /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { RETURN_NOT_OK(Reserve(length)); UnsafeAppendToBitmap(valid_bytes, length); @@ -159,18 +160,18 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { std::shared_ptr<Buffer> data() const { return data_; } - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot + /// Vector append + /// + /// If passed, valid_bytes is of equal length to values, and any zero byte + /// will be considered as a null for that slot Status Append( const value_type* values, int32_t length, const uint8_t* valid_bytes = nullptr); Status Finish(std::shared_ptr<Array>* out) override; Status Init(int32_t capacity) override; - // Increase the capacity of the builder to accommodate at least the indicated - // number of elements + /// Increase the capacity of the builder to accommodate at least the indicated + /// number of elements Status Resize(int32_t capacity) override; protected: @@ -178,6 +179,7 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { value_type* raw_data_; }; +/// Base class for all Builders that emit an Array of a scalar numerical type. template <typename T> class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> { public: @@ -189,14 +191,18 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> { using PrimitiveBuilder<T>::Resize; using PrimitiveBuilder<T>::Reserve; - // Scalar append. + /// Append a single scalar and increase the size if necessary. Status Append(value_type val) { RETURN_NOT_OK(ArrayBuilder::Reserve(1)); UnsafeAppend(val); return Status::OK(); } - // Does not capacity-check; make sure to call Reserve beforehand + /// Append a single scalar under the assumption that the underlying Buffer is + /// large enough. + /// + /// This method does not capacity-check; make sure to call Reserve + /// beforehand. void UnsafeAppend(value_type val) { BitUtil::SetBit(null_bitmap_data_, length_); raw_data_[length_++] = val; @@ -235,7 +241,7 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { using ArrayBuilder::Advance; - // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory + /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { RETURN_NOT_OK(Reserve(length)); UnsafeAppendToBitmap(valid_bytes, length); @@ -250,7 +256,7 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { std::shared_ptr<Buffer> data() const { return data_; } - // Scalar append + /// Scalar append Status Append(bool val) { Reserve(1); BitUtil::SetBit(null_bitmap_data_, length_); @@ -263,18 +269,18 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { return Status::OK(); } - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot + /// Vector append + /// + /// If passed, valid_bytes is of equal length to values, and any zero byte + /// will be considered as a null for that slot Status Append( const uint8_t* values, int32_t length, const uint8_t* valid_bytes = nullptr); Status Finish(std::shared_ptr<Array>* out) override; Status Init(int32_t capacity) override; - // Increase the capacity of the builder to accommodate at least the indicated - // number of elements + /// Increase the capacity of the builder to accommodate at least the indicated + /// number of elements Status Resize(int32_t capacity) override; protected: @@ -285,26 +291,26 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { // ---------------------------------------------------------------------- // List builder -// Builder class for variable-length list array value types -// -// To use this class, you must append values to the child array builder and use -// the Append function to delimit each distinct list value (once the values -// have been appended to the child array) or use the bulk API to append -// a sequence of offests and null values. -// -// A note on types. Per arrow/type.h all types in the c++ implementation are -// logical so even though this class always builds list array, this can -// represent multiple different logical types. If no logical type is provided -// at construction time, the class defaults to List<T> where t is taken from the -// value_builder/values that the object is constructed with. +/// Builder class for variable-length list array value types +/// +/// To use this class, you must append values to the child array builder and use +/// the Append function to delimit each distinct list value (once the values +/// have been appended to the child array) or use the bulk API to append +/// a sequence of offests and null values. +/// +/// A note on types. Per arrow/type.h all types in the c++ implementation are +/// logical so even though this class always builds list array, this can +/// represent multiple different logical types. If no logical type is provided +/// at construction time, the class defaults to List<T> where t is taken from the +/// value_builder/values that the object is constructed with. class ARROW_EXPORT ListBuilder : public ArrayBuilder { public: - // Use this constructor to incrementally build the value array along with offsets and - // null bitmap. + /// Use this constructor to incrementally build the value array along with offsets and + /// null bitmap. ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder, const TypePtr& type = nullptr); - // Use this constructor to build the list with a pre-existing values array + /// Use this constructor to build the list with a pre-existing values array ListBuilder( MemoryPool* pool, std::shared_ptr<Array> values, const TypePtr& type = nullptr); @@ -314,10 +320,10 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder { Status Resize(int32_t capacity) override; Status Finish(std::shared_ptr<Array>* out) override; - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot + /// Vector append + /// + /// If passed, valid_bytes is of equal length to values, and any zero byte + /// will be considered as a null for that slot Status Append( const int32_t* offsets, int32_t length, const uint8_t* valid_bytes = nullptr) { RETURN_NOT_OK(Reserve(length)); @@ -326,10 +332,10 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder { return Status::OK(); } - // Start a new variable-length list slot - // - // This function should be called before beginning to append elements to the - // value builder + /// Start a new variable-length list slot + /// + /// This function should be called before beginning to append elements to the + /// value builder Status Append(bool is_valid = true) { RETURN_NOT_OK(Reserve(1)); UnsafeAppendToBitmap(is_valid); @@ -396,9 +402,9 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder { // --------------------------------------------------------------------------------- // StructArray builder -// Append, Resize and Reserve methods are acting on StructBuilder. -// Please make sure all these methods of all child-builders' are consistently -// called to maintain data-structure consistency. +/// Append, Resize and Reserve methods are acting on StructBuilder. +/// Please make sure all these methods of all child-builders' are consistently +/// called to maintain data-structure consistency. class ARROW_EXPORT StructBuilder : public ArrayBuilder { public: StructBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type, @@ -409,18 +415,18 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder { Status Finish(std::shared_ptr<Array>* out) override; - // Null bitmap is of equal length to every child field, and any zero byte - // will be considered as a null for that field, but users must using app- - // end methods or advance methods of the child builders' independently to - // insert data. + /// Null bitmap is of equal length to every child field, and any zero byte + /// will be considered as a null for that field, but users must using app- + /// end methods or advance methods of the child builders' independently to + /// insert data. Status Append(int32_t length, const uint8_t* valid_bytes) { RETURN_NOT_OK(Reserve(length)); UnsafeAppendToBitmap(valid_bytes, length); return Status::OK(); } - // Append an element to the Struct. All child-builders' Append method must - // be called independently to maintain data-structure consistency. + /// Append an element to the Struct. All child-builders' Append method must + /// be called independently to maintain data-structure consistency. Status Append(bool is_valid = true) { RETURN_NOT_OK(Reserve(1)); UnsafeAppendToBitmap(is_valid); http://git-wip-us.apache.org/repos/asf/arrow/blob/a90b5f36/cpp/src/arrow/memory_pool.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 13a3f12..89477b6 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -26,14 +26,35 @@ namespace arrow { class Status; +/// Base class for memory allocation. +/// +/// Besides tracking the number of allocated bytes, the allocator also should +/// take care of the required 64-byte alignment. class ARROW_EXPORT MemoryPool { public: virtual ~MemoryPool(); + /// Allocate a new memory region of at least size bytes. + /// + /// The allocated region shall be 64-byte aligned. virtual Status Allocate(int64_t size, uint8_t** out) = 0; + + /// Resize an already allocated memory section. + /// + /// As by default most default allocators on a platform don't support aligned + /// reallocation, this function can involve a copy of the underlying data. virtual Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) = 0; + + /// Free an allocated region. + /// + /// @param buffer Pointer to the start of the allocated memory region + /// @param size Allocated size located at buffer. An allocator implementation + /// may use this for tracking the amount of allocated bytes as well as for + /// faster deallocation if supported by its backend. virtual void Free(uint8_t* buffer, int64_t size) = 0; + /// The number of bytes that were allocated and not yet free'd through + /// this allocator. virtual int64_t bytes_allocated() const = 0; }; http://git-wip-us.apache.org/repos/asf/arrow/blob/a90b5f36/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index c2a762d..77a70d1 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -608,6 +608,43 @@ static inline bool is_floating(Type::type type_id) { return false; } +static inline bool is_primitive(Type::type type_id) { + switch (type_id) { + case Type::NA: + case Type::BOOL: + case Type::UINT8: + case Type::INT8: + case Type::UINT16: + case Type::INT16: + case Type::UINT32: + case Type::INT32: + case Type::UINT64: + case Type::INT64: + case Type::HALF_FLOAT: + case Type::FLOAT: + case Type::DOUBLE: + case Type::DATE: + case Type::TIMESTAMP: + case Type::TIME: + case Type::INTERVAL: + return true; + default: + break; + } + return false; +} + +static inline bool is_binary_like(Type::type type_id) { + switch (type_id) { + case Type::BINARY: + case Type::STRING: + return true; + default: + break; + } + return false; +} + } // namespace arrow #endif // ARROW_TYPE_H