Repository: arrow Updated Branches: refs/heads/master 841709627 -> ed6ec3b76
http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 4fd50b7..589bdad 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -20,6 +20,8 @@ #include <sstream> #include <string> +#include "arrow/util/status.h" + namespace arrow { std::string Field::ToString() const { @@ -44,9 +46,24 @@ bool DataType::Equals(const DataType* other) const { return equals; } +std::string BooleanType::ToString() const { + return name(); +} + +FloatingPointMeta::Precision HalfFloatType::precision() const { + return FloatingPointMeta::HALF; +} + +FloatingPointMeta::Precision FloatType::precision() const { + return FloatingPointMeta::SINGLE; +} + +FloatingPointMeta::Precision DoubleType::precision() const { + return FloatingPointMeta::DOUBLE; +} + std::string StringType::ToString() const { - std::string result(name()); - return result; + return std::string("string"); } std::string ListType::ToString() const { @@ -56,7 +73,7 @@ std::string ListType::ToString() const { } std::string BinaryType::ToString() const { - return std::string(name()); + return std::string("binary"); } std::string StructType::ToString() const { @@ -71,4 +88,103 @@ std::string StructType::ToString() const { return s.str(); } +std::string UnionType::ToString() const { + std::stringstream s; + + if (mode == UnionMode::SPARSE) { + s << "union[sparse]<"; + } else { + s << "union[dense]<"; + } + + for (size_t i = 0; i < children_.size(); ++i) { + if (i) { s << ", "; } + s << children_[i]->ToString(); + } + s << ">"; + return s.str(); +} + +int NullType::bit_width() const { + return 0; +} + +std::string NullType::ToString() const { + return name(); +} + +// Visitors and template instantiation + +#define ACCEPT_VISITOR(TYPE) \ + Status TYPE::Accept(TypeVisitor* visitor) const { return visitor->Visit(*this); } + +ACCEPT_VISITOR(NullType); +ACCEPT_VISITOR(BooleanType); +ACCEPT_VISITOR(BinaryType); +ACCEPT_VISITOR(StringType); +ACCEPT_VISITOR(ListType); +ACCEPT_VISITOR(StructType); +ACCEPT_VISITOR(DecimalType); +ACCEPT_VISITOR(UnionType); +ACCEPT_VISITOR(DateType); +ACCEPT_VISITOR(TimeType); +ACCEPT_VISITOR(TimestampType); +ACCEPT_VISITOR(IntervalType); + +#define TYPE_FACTORY(NAME, KLASS) \ + std::shared_ptr<DataType> NAME() { \ + static std::shared_ptr<DataType> result = std::make_shared<KLASS>(); \ + return result; \ + } + +TYPE_FACTORY(null, NullType); +TYPE_FACTORY(boolean, BooleanType); +TYPE_FACTORY(int8, Int8Type); +TYPE_FACTORY(uint8, UInt8Type); +TYPE_FACTORY(int16, Int16Type); +TYPE_FACTORY(uint16, UInt16Type); +TYPE_FACTORY(int32, Int32Type); +TYPE_FACTORY(uint32, UInt32Type); +TYPE_FACTORY(int64, Int64Type); +TYPE_FACTORY(uint64, UInt64Type); +TYPE_FACTORY(float16, HalfFloatType); +TYPE_FACTORY(float32, FloatType); +TYPE_FACTORY(float64, DoubleType); +TYPE_FACTORY(utf8, StringType); +TYPE_FACTORY(binary, BinaryType); +TYPE_FACTORY(date, DateType); + +std::shared_ptr<DataType> timestamp(TimeUnit unit) { + static std::shared_ptr<DataType> result = std::make_shared<TimestampType>(); + return result; +} + +std::shared_ptr<DataType> time(TimeUnit unit) { + static std::shared_ptr<DataType> result = std::make_shared<TimeType>(); + return result; +} + +std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) { + return std::make_shared<ListType>(value_type); +} + +std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_field) { + return std::make_shared<ListType>(value_field); +} + +std::shared_ptr<DataType> struct_(const std::vector<std::shared_ptr<Field>>& fields) { + return std::make_shared<StructType>(fields); +} + +std::shared_ptr<DataType> ARROW_EXPORT union_( + const std::vector<std::shared_ptr<Field>>& child_fields, + const std::vector<uint8_t>& type_ids, UnionMode mode) { + return std::make_shared<UnionType>(child_fields, type_ids, mode); +} + +std::shared_ptr<Field> field( + const std::string& name, const TypePtr& type, bool nullable, int64_t dictionary) { + return std::make_shared<Field>(name, type, nullable, dictionary); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index ea8516f..5b4d7bc 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -23,7 +23,9 @@ #include <string> #include <vector> +#include "arrow/type_fwd.h" #include "arrow/util/macros.h" +#include "arrow/util/status.h" #include "arrow/util/visibility.h" namespace arrow { @@ -50,17 +52,20 @@ struct Type { UINT64 = 8, INT64 = 9, + // 2-byte floating point value + HALF_FLOAT = 10, + // 4-byte floating point value - FLOAT = 10, + FLOAT = 11, // 8-byte floating point value - DOUBLE = 11, + DOUBLE = 12, // UTF8 variable-length string as List<Char> STRING = 13, // Variable-length bytes (no guarantee of UTF8-ness) - BINARY = 15, + BINARY = 14, // By default, int32 days since the UNIX epoch DATE = 16, @@ -69,19 +74,16 @@ struct Type { // Default unit millisecond TIMESTAMP = 17, - // Timestamp as double seconds since the UNIX epoch - TIMESTAMP_DOUBLE = 18, - // Exact time encoded with int64, default unit millisecond - TIME = 19, + TIME = 18, + + // YEAR_MONTH or DAY_TIME interval in SQL style + INTERVAL = 19, // Precision- and scale-based decimal type. Storage type depends on the // parameters. DECIMAL = 20, - // Decimal value encoded as a text string - DECIMAL_TEXT = 21, - // A list of some logical data type LIST = 30, @@ -89,19 +91,16 @@ struct Type { STRUCT = 31, // Unions of logical types - DENSE_UNION = 32, - SPARSE_UNION = 33, + UNION = 32, - // Union<Null, Int32, Double, String, Bool> - JSON_SCALAR = 50, + // Timestamp as double seconds since the UNIX epoch + TIMESTAMP_DOUBLE = 33, - // User-defined type - USER = 60 + // Decimal value encoded as a text string + DECIMAL_TEXT = 34, }; }; -struct Field; - struct ARROW_EXPORT DataType { Type::type type; @@ -123,15 +122,32 @@ struct ARROW_EXPORT DataType { const std::shared_ptr<Field>& child(int i) const { return children_[i]; } + const std::vector<std::shared_ptr<Field>>& children() const { return children_; } + int num_children() const { return children_.size(); } - virtual int value_size() const { return -1; } + virtual Status Accept(TypeVisitor* visitor) const = 0; virtual std::string ToString() const = 0; }; typedef std::shared_ptr<DataType> TypePtr; +struct ARROW_EXPORT FixedWidthMeta { + virtual int bit_width() const = 0; +}; + +struct ARROW_EXPORT IntegerMeta { + virtual bool is_signed() const = 0; +}; + +struct ARROW_EXPORT FloatingPointMeta { + enum Precision { HALF, SINGLE, DOUBLE }; + virtual Precision precision() const = 0; +}; + +struct NoExtraMeta {}; + // A field is a piece of metadata that includes (for now) a name and a data // type struct ARROW_EXPORT Field { @@ -139,7 +155,7 @@ struct ARROW_EXPORT Field { std::string name; // The field's data type - TypePtr type; + std::shared_ptr<DataType> type; // Fields can be nullable bool nullable; @@ -148,8 +164,8 @@ struct ARROW_EXPORT Field { // 0 means it's not dictionary encoded int64_t dictionary; - Field(const std::string& name, const TypePtr& type, bool nullable = true, - int64_t dictionary = 0) + Field(const std::string& name, const std::shared_ptr<DataType>& type, + bool nullable = true, int64_t dictionary = 0) : name(name), type(type), nullable(nullable), dictionary(dictionary) {} bool operator==(const Field& other) const { return this->Equals(other); } @@ -168,78 +184,112 @@ struct ARROW_EXPORT Field { }; typedef std::shared_ptr<Field> FieldPtr; -template <typename Derived> -struct ARROW_EXPORT PrimitiveType : public DataType { - PrimitiveType() : DataType(Derived::type_enum) {} +struct PrimitiveCType : public DataType { + using DataType::DataType; +}; + +template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE> +struct ARROW_EXPORT CTypeImpl : public PrimitiveCType, public FixedWidthMeta { + using c_type = C_TYPE; + static constexpr Type::type type_id = TYPE_ID; + + CTypeImpl() : PrimitiveCType(TYPE_ID) {} + int bit_width() const override { return sizeof(C_TYPE) * 8; } + + Status Accept(TypeVisitor* visitor) const override { + return visitor->Visit(*static_cast<const DERIVED*>(this)); + } + + std::string ToString() const override { return std::string(DERIVED::name()); } +}; + +struct ARROW_EXPORT NullType : public DataType, public FixedWidthMeta { + static constexpr Type::type type_id = Type::NA; + + NullType() : DataType(Type::NA) {} + + int bit_width() const override; + Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; + + static std::string name() { return "null"; } +}; + +template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE> +struct IntegerTypeImpl : public CTypeImpl<DERIVED, TYPE_ID, C_TYPE>, public IntegerMeta { + bool is_signed() const override { return std::is_signed<C_TYPE>::value; } }; -template <typename Derived> -inline std::string PrimitiveType<Derived>::ToString() const { - std::string result(static_cast<const Derived*>(this)->name()); - return result; -} +struct ARROW_EXPORT BooleanType : public DataType, FixedWidthMeta { + static constexpr Type::type type_id = Type::BOOL; -#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \ - typedef C_TYPE c_type; \ - static constexpr Type::type type_enum = Type::ENUM; \ - \ - TYPENAME() : PrimitiveType<TYPENAME>() {} \ - \ - virtual int value_size() const { return SIZE; } \ - \ - static const char* name() { return NAME; } + BooleanType() : DataType(Type::BOOL) {} -struct ARROW_EXPORT NullType : public PrimitiveType<NullType> { - PRIMITIVE_DECL(NullType, void, NA, 0, "null"); + Status Accept(TypeVisitor* visitor) const override; + std::string ToString() const override; + + int bit_width() const override { return 1; } + static std::string name() { return "bool"; } }; -struct ARROW_EXPORT BooleanType : public PrimitiveType<BooleanType> { - PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool"); +struct ARROW_EXPORT UInt8Type : public IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> { + static std::string name() { return "uint8"; } }; -struct ARROW_EXPORT UInt8Type : public PrimitiveType<UInt8Type> { - PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8"); +struct ARROW_EXPORT Int8Type : public IntegerTypeImpl<Int8Type, Type::INT8, int8_t> { + static std::string name() { return "int8"; } }; -struct ARROW_EXPORT Int8Type : public PrimitiveType<Int8Type> { - PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8"); +struct ARROW_EXPORT UInt16Type + : public IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> { + static std::string name() { return "uint16"; } }; -struct ARROW_EXPORT UInt16Type : public PrimitiveType<UInt16Type> { - PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16"); +struct ARROW_EXPORT Int16Type : public IntegerTypeImpl<Int16Type, Type::INT16, int16_t> { + static std::string name() { return "int16"; } }; -struct ARROW_EXPORT Int16Type : public PrimitiveType<Int16Type> { - PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16"); +struct ARROW_EXPORT UInt32Type + : public IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> { + static std::string name() { return "uint32"; } }; -struct ARROW_EXPORT UInt32Type : public PrimitiveType<UInt32Type> { - PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32"); +struct ARROW_EXPORT Int32Type : public IntegerTypeImpl<Int32Type, Type::INT32, int32_t> { + static std::string name() { return "int32"; } }; -struct ARROW_EXPORT Int32Type : public PrimitiveType<Int32Type> { - PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32"); +struct ARROW_EXPORT UInt64Type + : public IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> { + static std::string name() { return "uint64"; } }; -struct ARROW_EXPORT UInt64Type : public PrimitiveType<UInt64Type> { - PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64"); +struct ARROW_EXPORT Int64Type : public IntegerTypeImpl<Int64Type, Type::INT64, int64_t> { + static std::string name() { return "int64"; } }; -struct ARROW_EXPORT Int64Type : public PrimitiveType<Int64Type> { - PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64"); +struct ARROW_EXPORT HalfFloatType + : public CTypeImpl<HalfFloatType, Type::HALF_FLOAT, uint16_t>, + public FloatingPointMeta { + Precision precision() const override; + static std::string name() { return "halffloat"; } }; -struct ARROW_EXPORT FloatType : public PrimitiveType<FloatType> { - PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float"); +struct ARROW_EXPORT FloatType : public CTypeImpl<FloatType, Type::FLOAT, float>, + public FloatingPointMeta { + Precision precision() const override; + static std::string name() { return "float"; } }; -struct ARROW_EXPORT DoubleType : public PrimitiveType<DoubleType> { - PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double"); +struct ARROW_EXPORT DoubleType : public CTypeImpl<DoubleType, Type::DOUBLE, double>, + public FloatingPointMeta { + Precision precision() const override; + static std::string name() { return "double"; } }; -struct ARROW_EXPORT ListType : public DataType { +struct ARROW_EXPORT ListType : public DataType, public NoExtraMeta { + static constexpr Type::type type_id = Type::LIST; + // List can contain any other logical value type explicit ListType(const std::shared_ptr<DataType>& value_type) : ListType(std::make_shared<Field>("item", value_type)) {} @@ -252,16 +302,21 @@ struct ARROW_EXPORT ListType : public DataType { const std::shared_ptr<DataType>& value_type() const { return children_[0]->type; } - static char const* name() { return "list"; } - + Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; + + static std::string name() { return "list"; } }; // BinaryType type is reprsents lists of 1-byte values. -struct ARROW_EXPORT BinaryType : public DataType { +struct ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta { + static constexpr Type::type type_id = Type::BINARY; + BinaryType() : BinaryType(Type::BINARY) {} - static char const* name() { return "binary"; } + + Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; + static std::string name() { return "binary"; } protected: // Allow subclasses to change the logical type. @@ -270,25 +325,160 @@ struct ARROW_EXPORT BinaryType : public DataType { // UTF encoded strings struct ARROW_EXPORT StringType : public BinaryType { - StringType() : BinaryType(Type::STRING) {} + static constexpr Type::type type_id = Type::STRING; - static char const* name() { return "string"; } + StringType() : BinaryType(Type::STRING) {} + Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; + static std::string name() { return "utf8"; } }; -struct ARROW_EXPORT StructType : public DataType { +struct ARROW_EXPORT StructType : public DataType, public NoExtraMeta { + static constexpr Type::type type_id = Type::STRUCT; + explicit StructType(const std::vector<std::shared_ptr<Field>>& fields) : DataType(Type::STRUCT) { children_ = fields; } + Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; + static std::string name() { return "struct"; } +}; + +struct ARROW_EXPORT DecimalType : public DataType { + static constexpr Type::type type_id = Type::DECIMAL; + + explicit DecimalType(int precision_, int scale_) + : DataType(Type::DECIMAL), precision(precision_), scale(scale_) {} + int precision; + int scale; + + Status Accept(TypeVisitor* visitor) const override; + std::string ToString() const override; + static std::string name() { return "decimal"; } +}; + +enum class UnionMode : char { SPARSE, DENSE }; + +struct ARROW_EXPORT UnionType : public DataType { + static constexpr Type::type type_id = Type::UNION; + + UnionType(const std::vector<std::shared_ptr<Field>>& child_fields, + const std::vector<uint8_t>& type_ids, UnionMode mode = UnionMode::SPARSE) + : DataType(Type::UNION), mode(mode), type_ids(type_ids) { + children_ = child_fields; + } + + std::string ToString() const override; + static std::string name() { return "union"; } + Status Accept(TypeVisitor* visitor) const override; + + UnionMode mode; + std::vector<uint8_t> type_ids; +}; + +struct ARROW_EXPORT DateType : public DataType, public NoExtraMeta { + static constexpr Type::type type_id = Type::DATE; + + DateType() : DataType(Type::DATE) {} + + Status Accept(TypeVisitor* visitor) const override; + std::string ToString() const override { return name(); } + static std::string name() { return "date"; } +}; + +enum class TimeUnit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 }; + +struct ARROW_EXPORT TimeType : public DataType { + static constexpr Type::type type_id = Type::TIME; + using Unit = TimeUnit; + + TimeUnit unit; + + explicit TimeType(TimeUnit unit = TimeUnit::MILLI) : DataType(Type::TIME), unit(unit) {} + TimeType(const TimeType& other) : TimeType(other.unit) {} + + Status Accept(TypeVisitor* visitor) const override; + std::string ToString() const override { return name(); } + static std::string name() { return "time"; } +}; + +struct ARROW_EXPORT TimestampType : public DataType, public FixedWidthMeta { + using Unit = TimeUnit; + + typedef int64_t c_type; + static constexpr Type::type type_id = Type::TIMESTAMP; + + int bit_width() const override { return sizeof(int64_t) * 8; } + + TimeUnit unit; + + explicit TimestampType(TimeUnit unit = TimeUnit::MILLI) + : DataType(Type::TIMESTAMP), unit(unit) {} + + TimestampType(const TimestampType& other) : TimestampType(other.unit) {} + + Status Accept(TypeVisitor* visitor) const override; + std::string ToString() const override { return name(); } + static std::string name() { return "timestamp"; } +}; + +struct ARROW_EXPORT IntervalType : public DataType, public FixedWidthMeta { + enum class Unit : char { YEAR_MONTH = 0, DAY_TIME = 1 }; + + typedef int64_t c_type; + static constexpr Type::type type_id = Type::INTERVAL; + + int bit_width() const override { return sizeof(int64_t) * 8; } + + Unit unit; + + explicit IntervalType(Unit unit = Unit::YEAR_MONTH) + : DataType(Type::INTERVAL), unit(unit) {} + + IntervalType(const IntervalType& other) : IntervalType(other.unit) {} + + Status Accept(TypeVisitor* visitor) const override; + std::string ToString() const override { return name(); } + static std::string name() { return "date"; } }; -// These will be defined elsewhere -template <typename T> -struct TypeTraits {}; +// Factory functions + +std::shared_ptr<DataType> ARROW_EXPORT null(); +std::shared_ptr<DataType> ARROW_EXPORT boolean(); +std::shared_ptr<DataType> ARROW_EXPORT int8(); +std::shared_ptr<DataType> ARROW_EXPORT int16(); +std::shared_ptr<DataType> ARROW_EXPORT int32(); +std::shared_ptr<DataType> ARROW_EXPORT int64(); +std::shared_ptr<DataType> ARROW_EXPORT uint8(); +std::shared_ptr<DataType> ARROW_EXPORT uint16(); +std::shared_ptr<DataType> ARROW_EXPORT uint32(); +std::shared_ptr<DataType> ARROW_EXPORT uint64(); +std::shared_ptr<DataType> ARROW_EXPORT float16(); +std::shared_ptr<DataType> ARROW_EXPORT float32(); +std::shared_ptr<DataType> ARROW_EXPORT float64(); +std::shared_ptr<DataType> ARROW_EXPORT utf8(); +std::shared_ptr<DataType> ARROW_EXPORT binary(); + +std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<Field>& value_type); +std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& value_type); + +std::shared_ptr<DataType> ARROW_EXPORT date(); +std::shared_ptr<DataType> ARROW_EXPORT timestamp(TimeUnit unit); +std::shared_ptr<DataType> ARROW_EXPORT time(TimeUnit unit); + +std::shared_ptr<DataType> ARROW_EXPORT struct_( + const std::vector<std::shared_ptr<Field>>& fields); + +std::shared_ptr<DataType> ARROW_EXPORT union_( + const std::vector<std::shared_ptr<Field>>& child_fields, + const std::vector<uint8_t>& type_ids, UnionMode mode = UnionMode::SPARSE); + +std::shared_ptr<Field> ARROW_EXPORT field(const std::string& name, + const std::shared_ptr<DataType>& type, bool nullable = true, int64_t dictionary = 0); } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type_fwd.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h new file mode 100644 index 0000000..6d660f4 --- /dev/null +++ b/cpp/src/arrow/type_fwd.h @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_TYPE_FWD_H +#define ARROW_TYPE_FWD_H + +namespace arrow { + +class Status; + +struct DataType; +class Array; +class ArrayBuilder; +struct Field; + +class Buffer; +class MemoryPool; +class RecordBatch; +class Schema; + +struct NullType; +class NullArray; + +struct BooleanType; +class BooleanArray; +class BooleanBuilder; + +struct BinaryType; +class BinaryArray; +class BinaryBuilder; + +struct StringType; +class StringArray; +class StringBuilder; + +struct ListType; +class ListArray; +class ListBuilder; + +struct StructType; +class StructArray; +class StructBuilder; + +struct DecimalType; +class DecimalArray; + +struct UnionType; +class UnionArray; + +template <typename TypeClass> +class NumericArray; + +template <typename TypeClass> +class NumericBuilder; + +#define _NUMERIC_TYPE_DECL(KLASS) \ + struct KLASS##Type; \ + using KLASS##Array = NumericArray<KLASS##Type>; \ + using KLASS##Builder = NumericBuilder<KLASS##Type>; + +_NUMERIC_TYPE_DECL(Int8); +_NUMERIC_TYPE_DECL(Int16); +_NUMERIC_TYPE_DECL(Int32); +_NUMERIC_TYPE_DECL(Int64); +_NUMERIC_TYPE_DECL(UInt8); +_NUMERIC_TYPE_DECL(UInt16); +_NUMERIC_TYPE_DECL(UInt32); +_NUMERIC_TYPE_DECL(UInt64); +_NUMERIC_TYPE_DECL(HalfFloat); +_NUMERIC_TYPE_DECL(Float); +_NUMERIC_TYPE_DECL(Double); + +#undef _NUMERIC_TYPE_DECL + +struct DateType; +class DateArray; + +struct TimeType; +class TimeArray; + +struct TimestampType; +using TimestampArray = NumericArray<TimestampType>; + +struct IntervalType; +using IntervalArray = NumericArray<IntervalType>; + +class TypeVisitor { + public: + virtual Status Visit(const NullType& type) = 0; + virtual Status Visit(const BooleanType& type) = 0; + virtual Status Visit(const Int8Type& type) = 0; + virtual Status Visit(const Int16Type& type) = 0; + virtual Status Visit(const Int32Type& type) = 0; + virtual Status Visit(const Int64Type& type) = 0; + virtual Status Visit(const UInt8Type& type) = 0; + virtual Status Visit(const UInt16Type& type) = 0; + virtual Status Visit(const UInt32Type& type) = 0; + virtual Status Visit(const UInt64Type& type) = 0; + virtual Status Visit(const HalfFloatType& type) = 0; + virtual Status Visit(const FloatType& type) = 0; + virtual Status Visit(const DoubleType& type) = 0; + virtual Status Visit(const StringType& type) = 0; + virtual Status Visit(const BinaryType& type) = 0; + virtual Status Visit(const DateType& type) = 0; + virtual Status Visit(const TimeType& type) = 0; + virtual Status Visit(const TimestampType& type) = 0; + virtual Status Visit(const IntervalType& type) = 0; + virtual Status Visit(const DecimalType& type) = 0; + virtual Status Visit(const ListType& type) = 0; + virtual Status Visit(const StructType& type) = 0; + virtual Status Visit(const UnionType& type) = 0; +}; + +class ArrayVisitor { + public: + virtual Status Visit(const NullArray& array) = 0; + virtual Status Visit(const BooleanArray& array) = 0; + virtual Status Visit(const Int8Array& array) = 0; + virtual Status Visit(const Int16Array& array) = 0; + virtual Status Visit(const Int32Array& array) = 0; + virtual Status Visit(const Int64Array& array) = 0; + virtual Status Visit(const UInt8Array& array) = 0; + virtual Status Visit(const UInt16Array& array) = 0; + virtual Status Visit(const UInt32Array& array) = 0; + virtual Status Visit(const UInt64Array& array) = 0; + virtual Status Visit(const HalfFloatArray& array) = 0; + virtual Status Visit(const FloatArray& array) = 0; + virtual Status Visit(const DoubleArray& array) = 0; + virtual Status Visit(const StringArray& array) = 0; + virtual Status Visit(const BinaryArray& array) = 0; + virtual Status Visit(const DateArray& array) = 0; + virtual Status Visit(const TimeArray& array) = 0; + virtual Status Visit(const TimestampArray& array) = 0; + virtual Status Visit(const IntervalArray& array) = 0; + virtual Status Visit(const DecimalArray& array) = 0; + virtual Status Visit(const ListArray& array) = 0; + virtual Status Visit(const StructArray& array) = 0; + virtual Status Visit(const UnionArray& array) = 0; +}; + +} // namespace arrow + +#endif // ARROW_TYPE_FWD_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type_traits.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h new file mode 100644 index 0000000..bbb8074 --- /dev/null +++ b/cpp/src/arrow/type_traits.h @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_TYPE_TRAITS_H +#define ARROW_TYPE_TRAITS_H + +#include <type_traits> + +#include "arrow/type_fwd.h" +#include "arrow/util/bit-util.h" + +namespace arrow { + +template <typename T> +struct TypeTraits {}; + +template <> +struct TypeTraits<UInt8Type> { + using ArrayType = UInt8Array; + using BuilderType = UInt8Builder; + static inline int bytes_required(int elements) { return elements; } +}; + +template <> +struct TypeTraits<Int8Type> { + using ArrayType = Int8Array; + using BuilderType = Int8Builder; + static inline int bytes_required(int elements) { return elements; } +}; + +template <> +struct TypeTraits<UInt16Type> { + using ArrayType = UInt16Array; + using BuilderType = UInt16Builder; + + static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); } +}; + +template <> +struct TypeTraits<Int16Type> { + using ArrayType = Int16Array; + using BuilderType = Int16Builder; + + static inline int bytes_required(int elements) { return elements * sizeof(int16_t); } +}; + +template <> +struct TypeTraits<UInt32Type> { + using ArrayType = UInt32Array; + using BuilderType = UInt32Builder; + + static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); } +}; + +template <> +struct TypeTraits<Int32Type> { + using ArrayType = Int32Array; + using BuilderType = Int32Builder; + + static inline int bytes_required(int elements) { return elements * sizeof(int32_t); } +}; + +template <> +struct TypeTraits<UInt64Type> { + using ArrayType = UInt64Array; + using BuilderType = UInt64Builder; + + static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); } +}; + +template <> +struct TypeTraits<Int64Type> { + using ArrayType = Int64Array; + using BuilderType = Int64Builder; + + static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } +}; + +template <> +struct TypeTraits<TimestampType> { + using ArrayType = TimestampArray; + // using BuilderType = TimestampBuilder; + + static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } +}; + +template <> +struct TypeTraits<HalfFloatType> { + using ArrayType = HalfFloatArray; + using BuilderType = HalfFloatBuilder; + + static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); } +}; + +template <> +struct TypeTraits<FloatType> { + using ArrayType = FloatArray; + using BuilderType = FloatBuilder; + + static inline int bytes_required(int elements) { return elements * sizeof(float); } +}; + +template <> +struct TypeTraits<DoubleType> { + using ArrayType = DoubleArray; + using BuilderType = DoubleBuilder; + + static inline int bytes_required(int elements) { return elements * sizeof(double); } +}; + +template <> +struct TypeTraits<BooleanType> { + using ArrayType = BooleanArray; + using BuilderType = BooleanBuilder; + + static inline int bytes_required(int elements) { + return BitUtil::BytesForBits(elements); + } +}; + +template <> +struct TypeTraits<StringType> { + using ArrayType = StringArray; + using BuilderType = StringBuilder; +}; + +template <> +struct TypeTraits<BinaryType> { + using ArrayType = BinaryArray; + using BuilderType = BinaryBuilder; +}; + +// Not all type classes have a c_type +template <typename T> +struct as_void { + using type = void; +}; + +// The partial specialization will match if T has the ATTR_NAME member +#define GET_ATTR(ATTR_NAME, DEFAULT) \ + template <typename T, typename Enable = void> \ + struct GetAttr_##ATTR_NAME { \ + using type = DEFAULT; \ + }; \ + \ + template <typename T> \ + struct GetAttr_##ATTR_NAME<T, typename as_void<typename T::ATTR_NAME>::type> { \ + using type = typename T::ATTR_NAME; \ + }; + +GET_ATTR(c_type, void); +GET_ATTR(TypeClass, void); + +#undef GET_ATTR + +#define PRIMITIVE_TRAITS(T) \ + using TypeClass = typename std::conditional<std::is_base_of<DataType, T>::value, T, \ + typename GetAttr_TypeClass<T>::type>::type; \ + using c_type = typename GetAttr_c_type<TypeClass>::type; + +template <typename T> +struct IsUnsignedInt { + PRIMITIVE_TRAITS(T); + static constexpr bool value = + std::is_integral<c_type>::value && std::is_unsigned<c_type>::value; +}; + +template <typename T> +struct IsSignedInt { + PRIMITIVE_TRAITS(T); + static constexpr bool value = + std::is_integral<c_type>::value && std::is_signed<c_type>::value; +}; + +template <typename T> +struct IsFloatingPoint { + PRIMITIVE_TRAITS(T); + static constexpr bool value = std::is_floating_point<c_type>::value; +}; + +} // namespace arrow + +#endif // ARROW_TYPE_TRAITS_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt index 9f78169..6d59acf 100644 --- a/cpp/src/arrow/types/CMakeLists.txt +++ b/cpp/src/arrow/types/CMakeLists.txt @@ -21,7 +21,6 @@ # Headers: top level install(FILES - collection.h construct.h datetime.h decimal.h http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/collection.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h deleted file mode 100644 index 1712030..0000000 --- a/cpp/src/arrow/types/collection.h +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_COLLECTION_H -#define ARROW_TYPES_COLLECTION_H - -#include <string> -#include <vector> - -#include "arrow/type.h" - -namespace arrow { - -template <Type::type T> -struct CollectionType : public DataType { - std::vector<TypePtr> child_types_; - - CollectionType() : DataType(T) {} - - const TypePtr& child(int i) const { return child_types_[i]; } - - int num_children() const { return child_types_.size(); } -}; - -} // namespace arrow - -#endif // ARROW_TYPES_COLLECTION_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/datetime.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h index 241a126..a8f8639 100644 --- a/cpp/src/arrow/types/datetime.h +++ b/cpp/src/arrow/types/datetime.h @@ -22,41 +22,6 @@ #include "arrow/type.h" -namespace arrow { - -struct DateType : public DataType { - enum class Unit : char { DAY = 0, MONTH = 1, YEAR = 2 }; - - Unit unit; - - explicit DateType(Unit unit = Unit::DAY) : DataType(Type::DATE), unit(unit) {} - - DateType(const DateType& other) : DateType(other.unit) {} - - static char const* name() { return "date"; } -}; - -struct ARROW_EXPORT TimestampType : public DataType { - enum class Unit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 }; - - typedef int64_t c_type; - static constexpr Type::type type_enum = Type::TIMESTAMP; - - int value_size() const override { return sizeof(int64_t); } - - Unit unit; - - explicit TimestampType(Unit unit = Unit::MILLI) - : DataType(Type::TIMESTAMP), unit(unit) {} - - TimestampType(const TimestampType& other) : TimestampType(other.unit) {} - virtual ~TimestampType() {} - - std::string ToString() const override { return "timestamp"; } - - static char const* name() { return "timestamp"; } -}; - -} // namespace arrow +namespace arrow {} // namespace arrow #endif // ARROW_TYPES_DATETIME_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/decimal.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/decimal.h b/cpp/src/arrow/types/decimal.h index 6c497c5..b3ea3a5 100644 --- a/cpp/src/arrow/types/decimal.h +++ b/cpp/src/arrow/types/decimal.h @@ -23,18 +23,6 @@ #include "arrow/type.h" #include "arrow/util/visibility.h" -namespace arrow { - -struct ARROW_EXPORT DecimalType : public DataType { - explicit DecimalType(int precision_, int scale_) - : DataType(Type::DECIMAL), precision(precision_), scale(scale_) {} - int precision; - int scale; - static char const* name() { return "decimal"; } - - std::string ToString() const override; -}; - -} // namespace arrow +namespace arrow {} // namespace arrow #endif // ARROW_TYPES_DECIMAL_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index 12c5394..cb9a8c1 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -141,7 +141,7 @@ TEST_F(TestListBuilder, TestAppendNull) { ASSERT_TRUE(result_->IsNull(0)); ASSERT_TRUE(result_->IsNull(1)); - ASSERT_EQ(0, result_->offsets()[0]); + ASSERT_EQ(0, result_->raw_offsets()[0]); ASSERT_EQ(0, result_->offset(1)); ASSERT_EQ(0, result_->offset(2)); http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 4b1e821..d865632 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -155,4 +155,8 @@ void ListBuilder::Reset() { null_bitmap_ = nullptr; } +Status ListArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index 9440ffe..bd93e8f 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -39,6 +39,8 @@ class MemoryPool; class ARROW_EXPORT ListArray : public Array { public: + using TypeClass = ListType; + ListArray(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets, const ArrayPtr& values, int32_t null_count = 0, std::shared_ptr<Buffer> null_bitmap = nullptr) @@ -56,13 +58,13 @@ class ARROW_EXPORT ListArray : public Array { // Return a shared pointer in case the requestor desires to share ownership // with this array. const std::shared_ptr<Array>& values() const { return values_; } - const std::shared_ptr<Buffer> offset_buffer() const { + std::shared_ptr<Buffer> offsets() const { return std::static_pointer_cast<Buffer>(offset_buffer_); } const std::shared_ptr<DataType>& value_type() const { return values_->type(); } - const int32_t* offsets() const { return offsets_; } + const int32_t* raw_offsets() const { return offsets_; } int32_t offset(int i) const { return offsets_[i]; } @@ -76,6 +78,8 @@ class ARROW_EXPORT ListArray : public Array { bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const ArrayPtr& arr) const override; + Status Accept(ArrayVisitor* visitor) const override; + protected: std::shared_ptr<Buffer> offset_buffer_; const int32_t* offsets_; http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index e47f6dc..bdc8ec0 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -25,6 +25,7 @@ #include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" +#include "arrow/type_traits.h" #include "arrow/types/construct.h" #include "arrow/types/primitive.h" #include "arrow/types/test-common.h" @@ -41,15 +42,15 @@ namespace arrow { class Array; -#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \ - TEST(TypesTest, TestPrimitive_##ENUM) { \ - KLASS tp; \ - \ - ASSERT_EQ(tp.type, Type::ENUM); \ - ASSERT_EQ(tp.name(), string(NAME)); \ - \ - KLASS tp_copy = tp; \ - ASSERT_EQ(tp_copy.type, Type::ENUM); \ +#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \ + TEST(TypesTest, TestPrimitive_##ENUM) { \ + KLASS tp; \ + \ + ASSERT_EQ(tp.type, Type::ENUM); \ + ASSERT_EQ(tp.ToString(), string(NAME)); \ + \ + KLASS tp_copy = tp; \ + ASSERT_EQ(tp_copy.type, Type::ENUM); \ } PRIMITIVE_TEST(Int8Type, INT8, "int8"); @@ -243,7 +244,8 @@ void TestPrimitiveBuilder<PBoolean>::Check( } typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16, - PInt32, PInt64, PFloat, PDouble> Primitives; + PInt32, PInt64, PFloat, PDouble> + Primitives; TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives); @@ -311,20 +313,6 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { ASSERT_EQ(memory_before, this->pool_->bytes_allocated()); } -template <class T, class Builder> -Status MakeArray(const vector<uint8_t>& valid_bytes, const vector<T>& draws, int size, - Builder* builder, ArrayPtr* out) { - // Append the first 1000 - for (int i = 0; i < size; ++i) { - if (valid_bytes[i] > 0) { - RETURN_NOT_OK(builder->Append(draws[i])); - } else { - RETURN_NOT_OK(builder->AppendNull()); - } - } - return builder->Finish(out); -} - TYPED_TEST(TestPrimitiveBuilder, Equality) { DECL_T(); http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index d2288ba..14667ee 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -19,6 +19,7 @@ #include <memory> +#include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/logging.h" @@ -48,13 +49,14 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const { const uint8_t* this_data = raw_data_; const uint8_t* other_data = other.raw_data_; - int value_size = type_->value_size(); - DCHECK_GT(value_size, 0); + auto size_meta = dynamic_cast<const FixedWidthMeta*>(type_.get()); + int value_byte_size = size_meta->bit_width() / 8; + DCHECK_GT(value_byte_size, 0); for (int i = 0; i < length_; ++i) { - if (!IsNull(i) && memcmp(this_data, other_data, value_size)) { return false; } - this_data += value_size; - other_data += value_size; + if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { return false; } + this_data += value_byte_size; + other_data += value_byte_size; } return true; } else { @@ -70,6 +72,11 @@ bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const { return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get())); } +template <typename T> +Status NumericArray<T>::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + template class NumericArray<UInt8Type>; template class NumericArray<UInt16Type>; template class NumericArray<UInt32Type>; @@ -79,9 +86,9 @@ template class NumericArray<Int16Type>; template class NumericArray<Int32Type>; template class NumericArray<Int64Type>; template class NumericArray<TimestampType>; +template class NumericArray<HalfFloatType>; template class NumericArray<FloatType>; template class NumericArray<DoubleType>; -template class NumericArray<BooleanType>; template <typename T> Status PrimitiveBuilder<T>::Init(int32_t capacity) { @@ -145,8 +152,65 @@ Status PrimitiveBuilder<T>::Finish(std::shared_ptr<Array>* out) { return Status::OK(); } -template <> -Status PrimitiveBuilder<BooleanType>::Append( +template class PrimitiveBuilder<UInt8Type>; +template class PrimitiveBuilder<UInt16Type>; +template class PrimitiveBuilder<UInt32Type>; +template class PrimitiveBuilder<UInt64Type>; +template class PrimitiveBuilder<Int8Type>; +template class PrimitiveBuilder<Int16Type>; +template class PrimitiveBuilder<Int32Type>; +template class PrimitiveBuilder<Int64Type>; +template class PrimitiveBuilder<TimestampType>; +template class PrimitiveBuilder<HalfFloatType>; +template class PrimitiveBuilder<FloatType>; +template class PrimitiveBuilder<DoubleType>; + +Status BooleanBuilder::Init(int32_t capacity) { + RETURN_NOT_OK(ArrayBuilder::Init(capacity)); + data_ = std::make_shared<PoolBuffer>(pool_); + + int64_t nbytes = BitUtil::BytesForBits(capacity); + RETURN_NOT_OK(data_->Resize(nbytes)); + // TODO(emkornfield) valgrind complains without this + memset(data_->mutable_data(), 0, nbytes); + + raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data()); + return Status::OK(); +} + +Status BooleanBuilder::Resize(int32_t capacity) { + // XXX: Set floor size for now + if (capacity < kMinBuilderCapacity) { capacity = kMinBuilderCapacity; } + + if (capacity_ == 0) { + RETURN_NOT_OK(Init(capacity)); + } else { + RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); + const int64_t old_bytes = data_->size(); + const int64_t new_bytes = BitUtil::BytesForBits(capacity); + + RETURN_NOT_OK(data_->Resize(new_bytes)); + raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data()); + memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); + } + return Status::OK(); +} + +Status BooleanBuilder::Finish(std::shared_ptr<Array>* out) { + const int64_t bytes_required = BitUtil::BytesForBits(length_); + + if (bytes_required > 0 && bytes_required < data_->size()) { + // Trim buffers + RETURN_NOT_OK(data_->Resize(bytes_required)); + } + *out = std::make_shared<BooleanArray>(type_, length_, data_, null_count_, null_bitmap_); + + data_ = null_bitmap_ = nullptr; + capacity_ = length_ = null_count_ = 0; + return Status::OK(); +} + +Status BooleanBuilder::Append( const uint8_t* values, int32_t length, const uint8_t* valid_bytes) { RETURN_NOT_OK(Reserve(length)); @@ -168,19 +232,6 @@ Status PrimitiveBuilder<BooleanType>::Append( return Status::OK(); } -template class PrimitiveBuilder<UInt8Type>; -template class PrimitiveBuilder<UInt16Type>; -template class PrimitiveBuilder<UInt32Type>; -template class PrimitiveBuilder<UInt64Type>; -template class PrimitiveBuilder<Int8Type>; -template class PrimitiveBuilder<Int16Type>; -template class PrimitiveBuilder<Int32Type>; -template class PrimitiveBuilder<Int64Type>; -template class PrimitiveBuilder<TimestampType>; -template class PrimitiveBuilder<FloatType>; -template class PrimitiveBuilder<DoubleType>; -template class PrimitiveBuilder<BooleanType>; - BooleanArray::BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data, int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap) : PrimitiveArray( @@ -235,4 +286,8 @@ bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx, return true; } +Status BooleanArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index c71df58..a5a3704 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -26,6 +26,7 @@ #include "arrow/array.h" #include "arrow/builder.h" #include "arrow/type.h" +#include "arrow/type_fwd.h" #include "arrow/types/datetime.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" @@ -54,9 +55,10 @@ class ARROW_EXPORT PrimitiveArray : public Array { const uint8_t* raw_data_; }; -template <class TypeClass> +template <class TYPE> class ARROW_EXPORT NumericArray : public PrimitiveArray { public: + using TypeClass = TYPE; using value_type = typename TypeClass::c_type; NumericArray(int32_t length, const std::shared_ptr<Buffer>& data, int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr) @@ -88,29 +90,15 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray { return reinterpret_cast<const value_type*>(raw_data_); } + Status Accept(ArrayVisitor* visitor) const override; + value_type Value(int i) const { return raw_data()[i]; } }; -#define NUMERIC_ARRAY_DECL(NAME, TypeClass) \ - using NAME = NumericArray<TypeClass>; \ - extern template class ARROW_EXPORT NumericArray<TypeClass>; - -NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type); -NUMERIC_ARRAY_DECL(Int8Array, Int8Type); -NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type); -NUMERIC_ARRAY_DECL(Int16Array, Int16Type); -NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type); -NUMERIC_ARRAY_DECL(Int32Array, Int32Type); -NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type); -NUMERIC_ARRAY_DECL(Int64Array, Int64Type); -NUMERIC_ARRAY_DECL(TimestampArray, TimestampType); -NUMERIC_ARRAY_DECL(FloatArray, FloatType); -NUMERIC_ARRAY_DECL(DoubleArray, DoubleType); - template <typename Type> class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { public: - typedef typename Type::c_type value_type; + using value_type = typename Type::c_type; explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) : ArrayBuilder(pool, type), data_(nullptr) {} @@ -183,101 +171,27 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> { using PrimitiveBuilder<T>::raw_data_; }; -template <> -struct TypeTraits<UInt8Type> { - typedef UInt8Array ArrayType; - - static inline int bytes_required(int elements) { return elements; } -}; - -template <> -struct TypeTraits<Int8Type> { - typedef Int8Array ArrayType; - - static inline int bytes_required(int elements) { return elements; } -}; - -template <> -struct TypeTraits<UInt16Type> { - typedef UInt16Array ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); } -}; - -template <> -struct TypeTraits<Int16Type> { - typedef Int16Array ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(int16_t); } -}; - -template <> -struct TypeTraits<UInt32Type> { - typedef UInt32Array ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); } -}; - -template <> -struct TypeTraits<Int32Type> { - typedef Int32Array ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(int32_t); } -}; - -template <> -struct TypeTraits<UInt64Type> { - typedef UInt64Array ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); } -}; - -template <> -struct TypeTraits<Int64Type> { - typedef Int64Array ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } -}; - -template <> -struct TypeTraits<TimestampType> { - typedef TimestampArray ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } -}; -template <> - -struct TypeTraits<FloatType> { - typedef FloatArray ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(float); } -}; - -template <> -struct TypeTraits<DoubleType> { - typedef DoubleArray ArrayType; - - static inline int bytes_required(int elements) { return elements * sizeof(double); } -}; - // Builders -typedef NumericBuilder<UInt8Type> UInt8Builder; -typedef NumericBuilder<UInt16Type> UInt16Builder; -typedef NumericBuilder<UInt32Type> UInt32Builder; -typedef NumericBuilder<UInt64Type> UInt64Builder; +using UInt8Builder = NumericBuilder<UInt8Type>; +using UInt16Builder = NumericBuilder<UInt16Type>; +using UInt32Builder = NumericBuilder<UInt32Type>; +using UInt64Builder = NumericBuilder<UInt64Type>; -typedef NumericBuilder<Int8Type> Int8Builder; -typedef NumericBuilder<Int16Type> Int16Builder; -typedef NumericBuilder<Int32Type> Int32Builder; -typedef NumericBuilder<Int64Type> Int64Builder; -typedef NumericBuilder<TimestampType> TimestampBuilder; +using Int8Builder = NumericBuilder<Int8Type>; +using Int16Builder = NumericBuilder<Int16Type>; +using Int32Builder = NumericBuilder<Int32Type>; +using Int64Builder = NumericBuilder<Int64Type>; +using TimestampBuilder = NumericBuilder<TimestampType>; -typedef NumericBuilder<FloatType> FloatBuilder; -typedef NumericBuilder<DoubleType> DoubleBuilder; +using HalfFloatBuilder = NumericBuilder<HalfFloatType>; +using FloatBuilder = NumericBuilder<FloatType>; +using DoubleBuilder = NumericBuilder<DoubleType>; class ARROW_EXPORT BooleanArray : public PrimitiveArray { public: + using TypeClass = BooleanType; + BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data, int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr); BooleanArray(const TypePtr& type, int32_t length, const std::shared_ptr<Buffer>& data, @@ -288,28 +202,36 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray { bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const ArrayPtr& arr) const override; + Status Accept(ArrayVisitor* visitor) const override; + const uint8_t* raw_data() const { return reinterpret_cast<const uint8_t*>(raw_data_); } bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); } }; -template <> -struct TypeTraits<BooleanType> { - typedef BooleanArray ArrayType; - - static inline int bytes_required(int elements) { - return BitUtil::BytesForBits(elements); - } -}; - -class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType> { +class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { public: explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type) - : PrimitiveBuilder<BooleanType>(pool, type) {} + : ArrayBuilder(pool, type), data_(nullptr) {} virtual ~BooleanBuilder() {} - using PrimitiveBuilder<BooleanType>::Append; + using ArrayBuilder::Advance; + + // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory + Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { + RETURN_NOT_OK(Reserve(length)); + UnsafeAppendToBitmap(valid_bytes, length); + return Status::OK(); + } + + Status AppendNull() { + RETURN_NOT_OK(Reserve(1)); + UnsafeAppendToBitmap(false); + return Status::OK(); + } + + std::shared_ptr<Buffer> data() const { return data_; } // Scalar append Status Append(bool val) { @@ -324,9 +246,39 @@ class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType> { return Status::OK(); } - Status Append(uint8_t val) { return Append(static_cast<bool>(val)); } + // Vector append + // + // If passed, valid_bytes is of equal length to values, and any zero byte + // will be considered as a null for that slot + Status Append( + const uint8_t* values, int32_t length, const uint8_t* valid_bytes = nullptr); + + Status Finish(std::shared_ptr<Array>* out) override; + Status Init(int32_t capacity) override; + + // Increase the capacity of the builder to accommodate at least the indicated + // number of elements + Status Resize(int32_t capacity) override; + + protected: + std::shared_ptr<PoolBuffer> data_; + uint8_t* raw_data_; }; +// Only instantiate these templates once +extern template class ARROW_EXPORT NumericArray<Int8Type>; +extern template class ARROW_EXPORT NumericArray<UInt8Type>; +extern template class ARROW_EXPORT NumericArray<Int16Type>; +extern template class ARROW_EXPORT NumericArray<UInt16Type>; +extern template class ARROW_EXPORT NumericArray<Int32Type>; +extern template class ARROW_EXPORT NumericArray<UInt32Type>; +extern template class ARROW_EXPORT NumericArray<Int64Type>; +extern template class ARROW_EXPORT NumericArray<UInt64Type>; +extern template class ARROW_EXPORT NumericArray<HalfFloatType>; +extern template class ARROW_EXPORT NumericArray<FloatType>; +extern template class ARROW_EXPORT NumericArray<DoubleType>; +extern template class ARROW_EXPORT NumericArray<TimestampType>; + } // namespace arrow #endif // ARROW_TYPES_PRIMITIVE_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc index af87a14..3c4b12b 100644 --- a/cpp/src/arrow/types/string-test.cc +++ b/cpp/src/arrow/types/string-test.cc @@ -47,7 +47,7 @@ TEST(TypesTest, BinaryType) { TEST(TypesTest, TestStringType) { StringType str; ASSERT_EQ(str.type, Type::STRING); - ASSERT_EQ(str.name(), std::string("string")); + ASSERT_EQ(str.ToString(), std::string("string")); } // ---------------------------------------------------------------------- @@ -66,8 +66,8 @@ class TestStringContainer : public ::testing::Test { void MakeArray() { length_ = offsets_.size() - 1; - value_buf_ = test::to_buffer(chars_); - offsets_buf_ = test::to_buffer(offsets_); + value_buf_ = test::GetBufferFromVector(chars_); + offsets_buf_ = test::GetBufferFromVector(offsets_); null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_); null_count_ = test::null_count(valid_bytes_); @@ -131,7 +131,7 @@ TEST_F(TestStringContainer, TestGetString) { TEST_F(TestStringContainer, TestEmptyStringComparison) { offsets_ = {0, 0, 0, 0, 0, 0}; - offsets_buf_ = test::to_buffer(offsets_); + offsets_buf_ = test::GetBufferFromVector(offsets_); length_ = offsets_.size() - 1; auto strings_a = std::make_shared<StringArray>( @@ -227,8 +227,8 @@ class TestBinaryContainer : public ::testing::Test { void MakeArray() { length_ = offsets_.size() - 1; - value_buf_ = test::to_buffer(chars_); - offsets_buf_ = test::to_buffer(offsets_); + value_buf_ = test::GetBufferFromVector(chars_); + offsets_buf_ = test::GetBufferFromVector(offsets_); null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_); null_count_ = test::null_count(valid_bytes_); http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc index f6d26df..db963df 100644 --- a/cpp/src/arrow/types/string.cc +++ b/cpp/src/arrow/types/string.cc @@ -94,6 +94,10 @@ bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_ return true; } +Status BinaryArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + StringArray::StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets, const std::shared_ptr<Buffer>& data, int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap) @@ -104,6 +108,10 @@ Status StringArray::Validate() const { return BinaryArray::Validate(); } +Status StringArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + // This used to be a static member variable of BinaryBuilder, but it can cause // valgrind to report a (spurious?) memory leak when needed in other shared // libraries. The problem came up while adding explicit visibility to libarrow @@ -122,8 +130,8 @@ Status BinaryBuilder::Finish(std::shared_ptr<Array>* out) { const auto list = std::dynamic_pointer_cast<ListArray>(result); auto values = std::dynamic_pointer_cast<UInt8Array>(list->values()); - *out = std::make_shared<BinaryArray>(list->length(), list->offset_buffer(), - values->data(), list->null_count(), list->null_bitmap()); + *out = std::make_shared<BinaryArray>(list->length(), list->offsets(), values->data(), + list->null_count(), list->null_bitmap()); return Status::OK(); } @@ -134,8 +142,8 @@ Status StringBuilder::Finish(std::shared_ptr<Array>* out) { const auto list = std::dynamic_pointer_cast<ListArray>(result); auto values = std::dynamic_pointer_cast<UInt8Array>(list->values()); - *out = std::make_shared<StringArray>(list->length(), list->offset_buffer(), - values->data(), list->null_count(), list->null_bitmap()); + *out = std::make_shared<StringArray>(list->length(), list->offsets(), values->data(), + list->null_count(), list->null_bitmap()); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h index aaba49c..c875243 100644 --- a/cpp/src/arrow/types/string.h +++ b/cpp/src/arrow/types/string.h @@ -37,6 +37,8 @@ class MemoryPool; class ARROW_EXPORT BinaryArray : public Array { public: + using TypeClass = BinaryType; + BinaryArray(int32_t length, const std::shared_ptr<Buffer>& offsets, const std::shared_ptr<Buffer>& data, int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr); @@ -60,6 +62,8 @@ class ARROW_EXPORT BinaryArray : public Array { std::shared_ptr<Buffer> data() const { return data_buffer_; } std::shared_ptr<Buffer> offsets() const { return offset_buffer_; } + const int32_t* raw_offsets() const { return offsets_; } + int32_t offset(int i) const { return offsets_[i]; } // Neither of these functions will perform boundschecking @@ -73,6 +77,8 @@ class ARROW_EXPORT BinaryArray : public Array { Status Validate() const override; + Status Accept(ArrayVisitor* visitor) const override; + private: std::shared_ptr<Buffer> offset_buffer_; const int32_t* offsets_; @@ -83,6 +89,8 @@ class ARROW_EXPORT BinaryArray : public Array { class ARROW_EXPORT StringArray : public BinaryArray { public: + using TypeClass = StringType; + StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets, const std::shared_ptr<Buffer>& data, int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr); @@ -96,6 +104,8 @@ class ARROW_EXPORT StringArray : public BinaryArray { } Status Validate() const override; + + Status Accept(ArrayVisitor* visitor) const override; }; // BinaryBuilder : public ListBuilder @@ -109,6 +119,12 @@ class ARROW_EXPORT BinaryBuilder : public ListBuilder { return byte_builder_->Append(value, length); } + Status Append(const char* value, int32_t length) { + return Append(reinterpret_cast<const uint8_t*>(value), length); + } + + Status Append(const std::string& value) { return Append(value.c_str(), value.size()); } + Status Finish(std::shared_ptr<Array>* out) override; protected: @@ -121,13 +137,9 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder { explicit StringBuilder(MemoryPool* pool, const TypePtr& type) : BinaryBuilder(pool, type) {} - Status Finish(std::shared_ptr<Array>* out) override; - - Status Append(const std::string& value) { return Append(value.c_str(), value.size()); } + using BinaryBuilder::Append; - Status Append(const char* value, int32_t length) { - return BinaryBuilder::Append(reinterpret_cast<const uint8_t*>(value), length); - } + Status Finish(std::shared_ptr<Array>* out) override; Status Append(const std::vector<std::string>& values, uint8_t* null_bytes); }; http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc index 8e82c38..197d7d4 100644 --- a/cpp/src/arrow/types/struct-test.cc +++ b/cpp/src/arrow/types/struct-test.cc @@ -80,7 +80,7 @@ void ValidateBasicStructArray(const StructArray* result, ASSERT_EQ(4, list_char_arr->length()); ASSERT_EQ(10, list_char_arr->values()->length()); for (size_t i = 0; i < list_offsets.size(); ++i) { - ASSERT_EQ(list_offsets[i], list_char_arr->offsets()[i]); + ASSERT_EQ(list_offsets[i], list_char_arr->raw_offsets()[i]); } for (size_t i = 0; i < list_values.size(); ++i) { ASSERT_EQ(list_values[i], char_arr->Value(i)); http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc index 369c29d..0e0db23 100644 --- a/cpp/src/arrow/types/struct.cc +++ b/cpp/src/arrow/types/struct.cc @@ -87,6 +87,10 @@ Status StructArray::Validate() const { return Status::OK(); } +Status StructArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + Status StructBuilder::Finish(std::shared_ptr<Array>* out) { std::vector<std::shared_ptr<Array>> fields(field_builders_.size()); for (size_t i = 0; i < field_builders_.size(); ++i) { http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h index 65b8daf..035af05 100644 --- a/cpp/src/arrow/types/struct.h +++ b/cpp/src/arrow/types/struct.h @@ -31,6 +31,8 @@ namespace arrow { class ARROW_EXPORT StructArray : public Array { public: + using TypeClass = StructType; + StructArray(const TypePtr& type, int32_t length, std::vector<ArrayPtr>& field_arrays, int32_t null_count = 0, std::shared_ptr<Buffer> null_bitmap = nullptr) : Array(type, length, null_count, null_bitmap) { @@ -55,6 +57,8 @@ class ARROW_EXPORT StructArray : public Array { bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const std::shared_ptr<Array>& arr) const override; + Status Accept(ArrayVisitor* visitor) const override; + protected: // The child arrays corresponding to each field of the struct data type. std::vector<ArrayPtr> field_arrays_; http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h index 1957636..6e6ab85 100644 --- a/cpp/src/arrow/types/test-common.h +++ b/cpp/src/arrow/types/test-common.h @@ -24,6 +24,8 @@ #include "gtest/gtest.h" +#include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/util/memory-pool.h" @@ -49,6 +51,20 @@ class TestBuilder : public ::testing::Test { unique_ptr<ArrayBuilder> builder_nn_; }; +template <class T, class Builder> +Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values, + int size, Builder* builder, ArrayPtr* out) { + // Append the first 1000 + for (int i = 0; i < size; ++i) { + if (valid_bytes[i] > 0) { + RETURN_NOT_OK(builder->Append(values[i])); + } else { + RETURN_NOT_OK(builder->AppendNull()); + } + } + return builder->Finish(out); +} + } // namespace arrow #endif // ARROW_TYPES_TEST_COMMON_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/union.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/union.cc b/cpp/src/arrow/types/union.cc index c891b4a..cc2934b 100644 --- a/cpp/src/arrow/types/union.cc +++ b/cpp/src/arrow/types/union.cc @@ -24,25 +24,4 @@ #include "arrow/type.h" -namespace arrow { - -static inline std::string format_union(const std::vector<TypePtr>& child_types) { - std::stringstream s; - s << "union<"; - for (size_t i = 0; i < child_types.size(); ++i) { - if (i) { s << ", "; } - s << child_types[i]->ToString(); - } - s << ">"; - return s.str(); -} - -std::string DenseUnionType::ToString() const { - return format_union(child_types_); -} - -std::string SparseUnionType::ToString() const { - return format_union(child_types_); -} - -} // namespace arrow +namespace arrow {} // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/union.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h index d2ee9bd..44f39cc 100644 --- a/cpp/src/arrow/types/union.h +++ b/cpp/src/arrow/types/union.h @@ -24,32 +24,11 @@ #include "arrow/array.h" #include "arrow/type.h" -#include "arrow/types/collection.h" namespace arrow { class Buffer; -struct DenseUnionType : public CollectionType<Type::DENSE_UNION> { - typedef CollectionType<Type::DENSE_UNION> Base; - - explicit DenseUnionType(const std::vector<TypePtr>& child_types) : Base() { - child_types_ = child_types; - } - - virtual std::string ToString() const; -}; - -struct SparseUnionType : public CollectionType<Type::SPARSE_UNION> { - typedef CollectionType<Type::SPARSE_UNION> Base; - - explicit SparseUnionType(const std::vector<TypePtr>& child_types) : Base() { - child_types_ = child_types; - } - - virtual std::string ToString() const; -}; - class UnionArray : public Array { protected: // The data are types encoded as int16 http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/util/logging.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h index 06ee841..b22f07d 100644 --- a/cpp/src/arrow/util/logging.h +++ b/cpp/src/arrow/util/logging.h @@ -118,9 +118,9 @@ class CerrLog { class FatalLog : public CerrLog { public: explicit FatalLog(int /* severity */) // NOLINT - : CerrLog(ARROW_FATAL) {} // NOLINT + : CerrLog(ARROW_FATAL){} // NOLINT - [[noreturn]] ~FatalLog() { + [[noreturn]] ~FatalLog() { if (has_logged_) { std::cerr << std::endl; } std::exit(1); } http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/format/Metadata.md ---------------------------------------------------------------------- diff --git a/format/Metadata.md b/format/Metadata.md index 653a4c7..a4878f3 100644 --- a/format/Metadata.md +++ b/format/Metadata.md @@ -98,6 +98,11 @@ Union: "typeIds" : [ /* integer */ ] } ``` + +The `typeIds` field in the Union are the codes used to denote each type, which +may be different from the index of the child array. This is so that the union +type ids do not have to be enumerated from 0. + Int: ``` {
