Repository: arrow
Updated Branches:
  refs/heads/master 841709627 -> ed6ec3b76


http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 4fd50b7..589bdad 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -20,6 +20,8 @@
 #include <sstream>
 #include <string>
 
+#include "arrow/util/status.h"
+
 namespace arrow {
 
 std::string Field::ToString() const {
@@ -44,9 +46,24 @@ bool DataType::Equals(const DataType* other) const {
   return equals;
 }
 
+std::string BooleanType::ToString() const {
+  return name();
+}
+
+FloatingPointMeta::Precision HalfFloatType::precision() const {
+  return FloatingPointMeta::HALF;
+}
+
+FloatingPointMeta::Precision FloatType::precision() const {
+  return FloatingPointMeta::SINGLE;
+}
+
+FloatingPointMeta::Precision DoubleType::precision() const {
+  return FloatingPointMeta::DOUBLE;
+}
+
 std::string StringType::ToString() const {
-  std::string result(name());
-  return result;
+  return std::string("string");
 }
 
 std::string ListType::ToString() const {
@@ -56,7 +73,7 @@ std::string ListType::ToString() const {
 }
 
 std::string BinaryType::ToString() const {
-  return std::string(name());
+  return std::string("binary");
 }
 
 std::string StructType::ToString() const {
@@ -71,4 +88,103 @@ std::string StructType::ToString() const {
   return s.str();
 }
 
+std::string UnionType::ToString() const {
+  std::stringstream s;
+
+  if (mode == UnionMode::SPARSE) {
+    s << "union[sparse]<";
+  } else {
+    s << "union[dense]<";
+  }
+
+  for (size_t i = 0; i < children_.size(); ++i) {
+    if (i) { s << ", "; }
+    s << children_[i]->ToString();
+  }
+  s << ">";
+  return s.str();
+}
+
+int NullType::bit_width() const {
+  return 0;
+}
+
+std::string NullType::ToString() const {
+  return name();
+}
+
+// Visitors and template instantiation
+
+#define ACCEPT_VISITOR(TYPE) \
+  Status TYPE::Accept(TypeVisitor* visitor) const { return 
visitor->Visit(*this); }
+
+ACCEPT_VISITOR(NullType);
+ACCEPT_VISITOR(BooleanType);
+ACCEPT_VISITOR(BinaryType);
+ACCEPT_VISITOR(StringType);
+ACCEPT_VISITOR(ListType);
+ACCEPT_VISITOR(StructType);
+ACCEPT_VISITOR(DecimalType);
+ACCEPT_VISITOR(UnionType);
+ACCEPT_VISITOR(DateType);
+ACCEPT_VISITOR(TimeType);
+ACCEPT_VISITOR(TimestampType);
+ACCEPT_VISITOR(IntervalType);
+
+#define TYPE_FACTORY(NAME, KLASS)                                        \
+  std::shared_ptr<DataType> NAME() {                                     \
+    static std::shared_ptr<DataType> result = std::make_shared<KLASS>(); \
+    return result;                                                       \
+  }
+
+TYPE_FACTORY(null, NullType);
+TYPE_FACTORY(boolean, BooleanType);
+TYPE_FACTORY(int8, Int8Type);
+TYPE_FACTORY(uint8, UInt8Type);
+TYPE_FACTORY(int16, Int16Type);
+TYPE_FACTORY(uint16, UInt16Type);
+TYPE_FACTORY(int32, Int32Type);
+TYPE_FACTORY(uint32, UInt32Type);
+TYPE_FACTORY(int64, Int64Type);
+TYPE_FACTORY(uint64, UInt64Type);
+TYPE_FACTORY(float16, HalfFloatType);
+TYPE_FACTORY(float32, FloatType);
+TYPE_FACTORY(float64, DoubleType);
+TYPE_FACTORY(utf8, StringType);
+TYPE_FACTORY(binary, BinaryType);
+TYPE_FACTORY(date, DateType);
+
+std::shared_ptr<DataType> timestamp(TimeUnit unit) {
+  static std::shared_ptr<DataType> result = std::make_shared<TimestampType>();
+  return result;
+}
+
+std::shared_ptr<DataType> time(TimeUnit unit) {
+  static std::shared_ptr<DataType> result = std::make_shared<TimeType>();
+  return result;
+}
+
+std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) {
+  return std::make_shared<ListType>(value_type);
+}
+
+std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_field) {
+  return std::make_shared<ListType>(value_field);
+}
+
+std::shared_ptr<DataType> struct_(const std::vector<std::shared_ptr<Field>>& 
fields) {
+  return std::make_shared<StructType>(fields);
+}
+
+std::shared_ptr<DataType> ARROW_EXPORT union_(
+    const std::vector<std::shared_ptr<Field>>& child_fields,
+    const std::vector<uint8_t>& type_ids, UnionMode mode) {
+  return std::make_shared<UnionType>(child_fields, type_ids, mode);
+}
+
+std::shared_ptr<Field> field(
+    const std::string& name, const TypePtr& type, bool nullable, int64_t 
dictionary) {
+  return std::make_shared<Field>(name, type, nullable, dictionary);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index ea8516f..5b4d7bc 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -23,7 +23,9 @@
 #include <string>
 #include <vector>
 
+#include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/status.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -50,17 +52,20 @@ struct Type {
     UINT64 = 8,
     INT64 = 9,
 
+    // 2-byte floating point value
+    HALF_FLOAT = 10,
+
     // 4-byte floating point value
-    FLOAT = 10,
+    FLOAT = 11,
 
     // 8-byte floating point value
-    DOUBLE = 11,
+    DOUBLE = 12,
 
     // UTF8 variable-length string as List<Char>
     STRING = 13,
 
     // Variable-length bytes (no guarantee of UTF8-ness)
-    BINARY = 15,
+    BINARY = 14,
 
     // By default, int32 days since the UNIX epoch
     DATE = 16,
@@ -69,19 +74,16 @@ struct Type {
     // Default unit millisecond
     TIMESTAMP = 17,
 
-    // Timestamp as double seconds since the UNIX epoch
-    TIMESTAMP_DOUBLE = 18,
-
     // Exact time encoded with int64, default unit millisecond
-    TIME = 19,
+    TIME = 18,
+
+    // YEAR_MONTH or DAY_TIME interval in SQL style
+    INTERVAL = 19,
 
     // Precision- and scale-based decimal type. Storage type depends on the
     // parameters.
     DECIMAL = 20,
 
-    // Decimal value encoded as a text string
-    DECIMAL_TEXT = 21,
-
     // A list of some logical data type
     LIST = 30,
 
@@ -89,19 +91,16 @@ struct Type {
     STRUCT = 31,
 
     // Unions of logical types
-    DENSE_UNION = 32,
-    SPARSE_UNION = 33,
+    UNION = 32,
 
-    // Union<Null, Int32, Double, String, Bool>
-    JSON_SCALAR = 50,
+    // Timestamp as double seconds since the UNIX epoch
+    TIMESTAMP_DOUBLE = 33,
 
-    // User-defined type
-    USER = 60
+    // Decimal value encoded as a text string
+    DECIMAL_TEXT = 34,
   };
 };
 
-struct Field;
-
 struct ARROW_EXPORT DataType {
   Type::type type;
 
@@ -123,15 +122,32 @@ struct ARROW_EXPORT DataType {
 
   const std::shared_ptr<Field>& child(int i) const { return children_[i]; }
 
+  const std::vector<std::shared_ptr<Field>>& children() const { return 
children_; }
+
   int num_children() const { return children_.size(); }
 
-  virtual int value_size() const { return -1; }
+  virtual Status Accept(TypeVisitor* visitor) const = 0;
 
   virtual std::string ToString() const = 0;
 };
 
 typedef std::shared_ptr<DataType> TypePtr;
 
+struct ARROW_EXPORT FixedWidthMeta {
+  virtual int bit_width() const = 0;
+};
+
+struct ARROW_EXPORT IntegerMeta {
+  virtual bool is_signed() const = 0;
+};
+
+struct ARROW_EXPORT FloatingPointMeta {
+  enum Precision { HALF, SINGLE, DOUBLE };
+  virtual Precision precision() const = 0;
+};
+
+struct NoExtraMeta {};
+
 // A field is a piece of metadata that includes (for now) a name and a data
 // type
 struct ARROW_EXPORT Field {
@@ -139,7 +155,7 @@ struct ARROW_EXPORT Field {
   std::string name;
 
   // The field's data type
-  TypePtr type;
+  std::shared_ptr<DataType> type;
 
   // Fields can be nullable
   bool nullable;
@@ -148,8 +164,8 @@ struct ARROW_EXPORT Field {
   // 0 means it's not dictionary encoded
   int64_t dictionary;
 
-  Field(const std::string& name, const TypePtr& type, bool nullable = true,
-      int64_t dictionary = 0)
+  Field(const std::string& name, const std::shared_ptr<DataType>& type,
+      bool nullable = true, int64_t dictionary = 0)
       : name(name), type(type), nullable(nullable), dictionary(dictionary) {}
 
   bool operator==(const Field& other) const { return this->Equals(other); }
@@ -168,78 +184,112 @@ struct ARROW_EXPORT Field {
 };
 typedef std::shared_ptr<Field> FieldPtr;
 
-template <typename Derived>
-struct ARROW_EXPORT PrimitiveType : public DataType {
-  PrimitiveType() : DataType(Derived::type_enum) {}
+struct PrimitiveCType : public DataType {
+  using DataType::DataType;
+};
+
+template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
+struct ARROW_EXPORT CTypeImpl : public PrimitiveCType, public FixedWidthMeta {
+  using c_type = C_TYPE;
+  static constexpr Type::type type_id = TYPE_ID;
+
+  CTypeImpl() : PrimitiveCType(TYPE_ID) {}
 
+  int bit_width() const override { return sizeof(C_TYPE) * 8; }
+
+  Status Accept(TypeVisitor* visitor) const override {
+    return visitor->Visit(*static_cast<const DERIVED*>(this));
+  }
+
+  std::string ToString() const override { return std::string(DERIVED::name()); 
}
+};
+
+struct ARROW_EXPORT NullType : public DataType, public FixedWidthMeta {
+  static constexpr Type::type type_id = Type::NA;
+
+  NullType() : DataType(Type::NA) {}
+
+  int bit_width() const override;
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+
+  static std::string name() { return "null"; }
+};
+
+template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
+struct IntegerTypeImpl : public CTypeImpl<DERIVED, TYPE_ID, C_TYPE>, public 
IntegerMeta {
+  bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
 };
 
-template <typename Derived>
-inline std::string PrimitiveType<Derived>::ToString() const {
-  std::string result(static_cast<const Derived*>(this)->name());
-  return result;
-}
+struct ARROW_EXPORT BooleanType : public DataType, FixedWidthMeta {
+  static constexpr Type::type type_id = Type::BOOL;
 
-#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
-  typedef C_TYPE c_type;                                   \
-  static constexpr Type::type type_enum = Type::ENUM;      \
-                                                           \
-  TYPENAME() : PrimitiveType<TYPENAME>() {}                \
-                                                           \
-  virtual int value_size() const { return SIZE; }          \
-                                                           \
-  static const char* name() { return NAME; }
+  BooleanType() : DataType(Type::BOOL) {}
 
-struct ARROW_EXPORT NullType : public PrimitiveType<NullType> {
-  PRIMITIVE_DECL(NullType, void, NA, 0, "null");
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override;
+
+  int bit_width() const override { return 1; }
+  static std::string name() { return "bool"; }
 };
 
-struct ARROW_EXPORT BooleanType : public PrimitiveType<BooleanType> {
-  PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
+struct ARROW_EXPORT UInt8Type : public IntegerTypeImpl<UInt8Type, Type::UINT8, 
uint8_t> {
+  static std::string name() { return "uint8"; }
 };
 
-struct ARROW_EXPORT UInt8Type : public PrimitiveType<UInt8Type> {
-  PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8");
+struct ARROW_EXPORT Int8Type : public IntegerTypeImpl<Int8Type, Type::INT8, 
int8_t> {
+  static std::string name() { return "int8"; }
 };
 
-struct ARROW_EXPORT Int8Type : public PrimitiveType<Int8Type> {
-  PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8");
+struct ARROW_EXPORT UInt16Type
+    : public IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
+  static std::string name() { return "uint16"; }
 };
 
-struct ARROW_EXPORT UInt16Type : public PrimitiveType<UInt16Type> {
-  PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16");
+struct ARROW_EXPORT Int16Type : public IntegerTypeImpl<Int16Type, Type::INT16, 
int16_t> {
+  static std::string name() { return "int16"; }
 };
 
-struct ARROW_EXPORT Int16Type : public PrimitiveType<Int16Type> {
-  PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16");
+struct ARROW_EXPORT UInt32Type
+    : public IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
+  static std::string name() { return "uint32"; }
 };
 
-struct ARROW_EXPORT UInt32Type : public PrimitiveType<UInt32Type> {
-  PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32");
+struct ARROW_EXPORT Int32Type : public IntegerTypeImpl<Int32Type, Type::INT32, 
int32_t> {
+  static std::string name() { return "int32"; }
 };
 
-struct ARROW_EXPORT Int32Type : public PrimitiveType<Int32Type> {
-  PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32");
+struct ARROW_EXPORT UInt64Type
+    : public IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
+  static std::string name() { return "uint64"; }
 };
 
-struct ARROW_EXPORT UInt64Type : public PrimitiveType<UInt64Type> {
-  PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64");
+struct ARROW_EXPORT Int64Type : public IntegerTypeImpl<Int64Type, Type::INT64, 
int64_t> {
+  static std::string name() { return "int64"; }
 };
 
-struct ARROW_EXPORT Int64Type : public PrimitiveType<Int64Type> {
-  PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64");
+struct ARROW_EXPORT HalfFloatType
+    : public CTypeImpl<HalfFloatType, Type::HALF_FLOAT, uint16_t>,
+      public FloatingPointMeta {
+  Precision precision() const override;
+  static std::string name() { return "halffloat"; }
 };
 
-struct ARROW_EXPORT FloatType : public PrimitiveType<FloatType> {
-  PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float");
+struct ARROW_EXPORT FloatType : public CTypeImpl<FloatType, Type::FLOAT, 
float>,
+                                public FloatingPointMeta {
+  Precision precision() const override;
+  static std::string name() { return "float"; }
 };
 
-struct ARROW_EXPORT DoubleType : public PrimitiveType<DoubleType> {
-  PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
+struct ARROW_EXPORT DoubleType : public CTypeImpl<DoubleType, Type::DOUBLE, 
double>,
+                                 public FloatingPointMeta {
+  Precision precision() const override;
+  static std::string name() { return "double"; }
 };
 
-struct ARROW_EXPORT ListType : public DataType {
+struct ARROW_EXPORT ListType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::LIST;
+
   // List can contain any other logical value type
   explicit ListType(const std::shared_ptr<DataType>& value_type)
       : ListType(std::make_shared<Field>("item", value_type)) {}
@@ -252,16 +302,21 @@ struct ARROW_EXPORT ListType : public DataType {
 
   const std::shared_ptr<DataType>& value_type() const { return 
children_[0]->type; }
 
-  static char const* name() { return "list"; }
-
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+
+  static std::string name() { return "list"; }
 };
 
 // BinaryType type is reprsents lists of 1-byte values.
-struct ARROW_EXPORT BinaryType : public DataType {
+struct ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::BINARY;
+
   BinaryType() : BinaryType(Type::BINARY) {}
-  static char const* name() { return "binary"; }
+
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+  static std::string name() { return "binary"; }
 
  protected:
   // Allow subclasses to change the logical type.
@@ -270,25 +325,160 @@ struct ARROW_EXPORT BinaryType : public DataType {
 
 // UTF encoded strings
 struct ARROW_EXPORT StringType : public BinaryType {
-  StringType() : BinaryType(Type::STRING) {}
+  static constexpr Type::type type_id = Type::STRING;
 
-  static char const* name() { return "string"; }
+  StringType() : BinaryType(Type::STRING) {}
 
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+  static std::string name() { return "utf8"; }
 };
 
-struct ARROW_EXPORT StructType : public DataType {
+struct ARROW_EXPORT StructType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::STRUCT;
+
   explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
       : DataType(Type::STRUCT) {
     children_ = fields;
   }
 
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+  static std::string name() { return "struct"; }
+};
+
+struct ARROW_EXPORT DecimalType : public DataType {
+  static constexpr Type::type type_id = Type::DECIMAL;
+
+  explicit DecimalType(int precision_, int scale_)
+      : DataType(Type::DECIMAL), precision(precision_), scale(scale_) {}
+  int precision;
+  int scale;
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override;
+  static std::string name() { return "decimal"; }
+};
+
+enum class UnionMode : char { SPARSE, DENSE };
+
+struct ARROW_EXPORT UnionType : public DataType {
+  static constexpr Type::type type_id = Type::UNION;
+
+  UnionType(const std::vector<std::shared_ptr<Field>>& child_fields,
+      const std::vector<uint8_t>& type_ids, UnionMode mode = UnionMode::SPARSE)
+      : DataType(Type::UNION), mode(mode), type_ids(type_ids) {
+    children_ = child_fields;
+  }
+
+  std::string ToString() const override;
+  static std::string name() { return "union"; }
+  Status Accept(TypeVisitor* visitor) const override;
+
+  UnionMode mode;
+  std::vector<uint8_t> type_ids;
+};
+
+struct ARROW_EXPORT DateType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::DATE;
+
+  DateType() : DataType(Type::DATE) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "date"; }
+};
+
+enum class TimeUnit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
+
+struct ARROW_EXPORT TimeType : public DataType {
+  static constexpr Type::type type_id = Type::TIME;
+  using Unit = TimeUnit;
+
+  TimeUnit unit;
+
+  explicit TimeType(TimeUnit unit = TimeUnit::MILLI) : DataType(Type::TIME), 
unit(unit) {}
+  TimeType(const TimeType& other) : TimeType(other.unit) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "time"; }
+};
+
+struct ARROW_EXPORT TimestampType : public DataType, public FixedWidthMeta {
+  using Unit = TimeUnit;
+
+  typedef int64_t c_type;
+  static constexpr Type::type type_id = Type::TIMESTAMP;
+
+  int bit_width() const override { return sizeof(int64_t) * 8; }
+
+  TimeUnit unit;
+
+  explicit TimestampType(TimeUnit unit = TimeUnit::MILLI)
+      : DataType(Type::TIMESTAMP), unit(unit) {}
+
+  TimestampType(const TimestampType& other) : TimestampType(other.unit) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "timestamp"; }
+};
+
+struct ARROW_EXPORT IntervalType : public DataType, public FixedWidthMeta {
+  enum class Unit : char { YEAR_MONTH = 0, DAY_TIME = 1 };
+
+  typedef int64_t c_type;
+  static constexpr Type::type type_id = Type::INTERVAL;
+
+  int bit_width() const override { return sizeof(int64_t) * 8; }
+
+  Unit unit;
+
+  explicit IntervalType(Unit unit = Unit::YEAR_MONTH)
+      : DataType(Type::INTERVAL), unit(unit) {}
+
+  IntervalType(const IntervalType& other) : IntervalType(other.unit) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "date"; }
 };
 
-// These will be defined elsewhere
-template <typename T>
-struct TypeTraits {};
+// Factory functions
+
+std::shared_ptr<DataType> ARROW_EXPORT null();
+std::shared_ptr<DataType> ARROW_EXPORT boolean();
+std::shared_ptr<DataType> ARROW_EXPORT int8();
+std::shared_ptr<DataType> ARROW_EXPORT int16();
+std::shared_ptr<DataType> ARROW_EXPORT int32();
+std::shared_ptr<DataType> ARROW_EXPORT int64();
+std::shared_ptr<DataType> ARROW_EXPORT uint8();
+std::shared_ptr<DataType> ARROW_EXPORT uint16();
+std::shared_ptr<DataType> ARROW_EXPORT uint32();
+std::shared_ptr<DataType> ARROW_EXPORT uint64();
+std::shared_ptr<DataType> ARROW_EXPORT float16();
+std::shared_ptr<DataType> ARROW_EXPORT float32();
+std::shared_ptr<DataType> ARROW_EXPORT float64();
+std::shared_ptr<DataType> ARROW_EXPORT utf8();
+std::shared_ptr<DataType> ARROW_EXPORT binary();
+
+std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<Field>& 
value_type);
+std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& 
value_type);
+
+std::shared_ptr<DataType> ARROW_EXPORT date();
+std::shared_ptr<DataType> ARROW_EXPORT timestamp(TimeUnit unit);
+std::shared_ptr<DataType> ARROW_EXPORT time(TimeUnit unit);
+
+std::shared_ptr<DataType> ARROW_EXPORT struct_(
+    const std::vector<std::shared_ptr<Field>>& fields);
+
+std::shared_ptr<DataType> ARROW_EXPORT union_(
+    const std::vector<std::shared_ptr<Field>>& child_fields,
+    const std::vector<uint8_t>& type_ids, UnionMode mode = UnionMode::SPARSE);
+
+std::shared_ptr<Field> ARROW_EXPORT field(const std::string& name,
+    const std::shared_ptr<DataType>& type, bool nullable = true, int64_t 
dictionary = 0);
 
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type_fwd.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
new file mode 100644
index 0000000..6d660f4
--- /dev/null
+++ b/cpp/src/arrow/type_fwd.h
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPE_FWD_H
+#define ARROW_TYPE_FWD_H
+
+namespace arrow {
+
+class Status;
+
+struct DataType;
+class Array;
+class ArrayBuilder;
+struct Field;
+
+class Buffer;
+class MemoryPool;
+class RecordBatch;
+class Schema;
+
+struct NullType;
+class NullArray;
+
+struct BooleanType;
+class BooleanArray;
+class BooleanBuilder;
+
+struct BinaryType;
+class BinaryArray;
+class BinaryBuilder;
+
+struct StringType;
+class StringArray;
+class StringBuilder;
+
+struct ListType;
+class ListArray;
+class ListBuilder;
+
+struct StructType;
+class StructArray;
+class StructBuilder;
+
+struct DecimalType;
+class DecimalArray;
+
+struct UnionType;
+class UnionArray;
+
+template <typename TypeClass>
+class NumericArray;
+
+template <typename TypeClass>
+class NumericBuilder;
+
+#define _NUMERIC_TYPE_DECL(KLASS)                 \
+  struct KLASS##Type;                             \
+  using KLASS##Array = NumericArray<KLASS##Type>; \
+  using KLASS##Builder = NumericBuilder<KLASS##Type>;
+
+_NUMERIC_TYPE_DECL(Int8);
+_NUMERIC_TYPE_DECL(Int16);
+_NUMERIC_TYPE_DECL(Int32);
+_NUMERIC_TYPE_DECL(Int64);
+_NUMERIC_TYPE_DECL(UInt8);
+_NUMERIC_TYPE_DECL(UInt16);
+_NUMERIC_TYPE_DECL(UInt32);
+_NUMERIC_TYPE_DECL(UInt64);
+_NUMERIC_TYPE_DECL(HalfFloat);
+_NUMERIC_TYPE_DECL(Float);
+_NUMERIC_TYPE_DECL(Double);
+
+#undef _NUMERIC_TYPE_DECL
+
+struct DateType;
+class DateArray;
+
+struct TimeType;
+class TimeArray;
+
+struct TimestampType;
+using TimestampArray = NumericArray<TimestampType>;
+
+struct IntervalType;
+using IntervalArray = NumericArray<IntervalType>;
+
+class TypeVisitor {
+ public:
+  virtual Status Visit(const NullType& type) = 0;
+  virtual Status Visit(const BooleanType& type) = 0;
+  virtual Status Visit(const Int8Type& type) = 0;
+  virtual Status Visit(const Int16Type& type) = 0;
+  virtual Status Visit(const Int32Type& type) = 0;
+  virtual Status Visit(const Int64Type& type) = 0;
+  virtual Status Visit(const UInt8Type& type) = 0;
+  virtual Status Visit(const UInt16Type& type) = 0;
+  virtual Status Visit(const UInt32Type& type) = 0;
+  virtual Status Visit(const UInt64Type& type) = 0;
+  virtual Status Visit(const HalfFloatType& type) = 0;
+  virtual Status Visit(const FloatType& type) = 0;
+  virtual Status Visit(const DoubleType& type) = 0;
+  virtual Status Visit(const StringType& type) = 0;
+  virtual Status Visit(const BinaryType& type) = 0;
+  virtual Status Visit(const DateType& type) = 0;
+  virtual Status Visit(const TimeType& type) = 0;
+  virtual Status Visit(const TimestampType& type) = 0;
+  virtual Status Visit(const IntervalType& type) = 0;
+  virtual Status Visit(const DecimalType& type) = 0;
+  virtual Status Visit(const ListType& type) = 0;
+  virtual Status Visit(const StructType& type) = 0;
+  virtual Status Visit(const UnionType& type) = 0;
+};
+
+class ArrayVisitor {
+ public:
+  virtual Status Visit(const NullArray& array) = 0;
+  virtual Status Visit(const BooleanArray& array) = 0;
+  virtual Status Visit(const Int8Array& array) = 0;
+  virtual Status Visit(const Int16Array& array) = 0;
+  virtual Status Visit(const Int32Array& array) = 0;
+  virtual Status Visit(const Int64Array& array) = 0;
+  virtual Status Visit(const UInt8Array& array) = 0;
+  virtual Status Visit(const UInt16Array& array) = 0;
+  virtual Status Visit(const UInt32Array& array) = 0;
+  virtual Status Visit(const UInt64Array& array) = 0;
+  virtual Status Visit(const HalfFloatArray& array) = 0;
+  virtual Status Visit(const FloatArray& array) = 0;
+  virtual Status Visit(const DoubleArray& array) = 0;
+  virtual Status Visit(const StringArray& array) = 0;
+  virtual Status Visit(const BinaryArray& array) = 0;
+  virtual Status Visit(const DateArray& array) = 0;
+  virtual Status Visit(const TimeArray& array) = 0;
+  virtual Status Visit(const TimestampArray& array) = 0;
+  virtual Status Visit(const IntervalArray& array) = 0;
+  virtual Status Visit(const DecimalArray& array) = 0;
+  virtual Status Visit(const ListArray& array) = 0;
+  virtual Status Visit(const StructArray& array) = 0;
+  virtual Status Visit(const UnionArray& array) = 0;
+};
+
+}  // namespace arrow
+
+#endif  // ARROW_TYPE_FWD_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type_traits.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
new file mode 100644
index 0000000..bbb8074
--- /dev/null
+++ b/cpp/src/arrow/type_traits.h
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPE_TRAITS_H
+#define ARROW_TYPE_TRAITS_H
+
+#include <type_traits>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/bit-util.h"
+
+namespace arrow {
+
+template <typename T>
+struct TypeTraits {};
+
+template <>
+struct TypeTraits<UInt8Type> {
+  using ArrayType = UInt8Array;
+  using BuilderType = UInt8Builder;
+  static inline int bytes_required(int elements) { return elements; }
+};
+
+template <>
+struct TypeTraits<Int8Type> {
+  using ArrayType = Int8Array;
+  using BuilderType = Int8Builder;
+  static inline int bytes_required(int elements) { return elements; }
+};
+
+template <>
+struct TypeTraits<UInt16Type> {
+  using ArrayType = UInt16Array;
+  using BuilderType = UInt16Builder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(uint16_t); }
+};
+
+template <>
+struct TypeTraits<Int16Type> {
+  using ArrayType = Int16Array;
+  using BuilderType = Int16Builder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(int16_t); }
+};
+
+template <>
+struct TypeTraits<UInt32Type> {
+  using ArrayType = UInt32Array;
+  using BuilderType = UInt32Builder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(uint32_t); }
+};
+
+template <>
+struct TypeTraits<Int32Type> {
+  using ArrayType = Int32Array;
+  using BuilderType = Int32Builder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(int32_t); }
+};
+
+template <>
+struct TypeTraits<UInt64Type> {
+  using ArrayType = UInt64Array;
+  using BuilderType = UInt64Builder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(uint64_t); }
+};
+
+template <>
+struct TypeTraits<Int64Type> {
+  using ArrayType = Int64Array;
+  using BuilderType = Int64Builder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(int64_t); }
+};
+
+template <>
+struct TypeTraits<TimestampType> {
+  using ArrayType = TimestampArray;
+  // using BuilderType = TimestampBuilder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(int64_t); }
+};
+
+template <>
+struct TypeTraits<HalfFloatType> {
+  using ArrayType = HalfFloatArray;
+  using BuilderType = HalfFloatBuilder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(uint16_t); }
+};
+
+template <>
+struct TypeTraits<FloatType> {
+  using ArrayType = FloatArray;
+  using BuilderType = FloatBuilder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(float); }
+};
+
+template <>
+struct TypeTraits<DoubleType> {
+  using ArrayType = DoubleArray;
+  using BuilderType = DoubleBuilder;
+
+  static inline int bytes_required(int elements) { return elements * 
sizeof(double); }
+};
+
+template <>
+struct TypeTraits<BooleanType> {
+  using ArrayType = BooleanArray;
+  using BuilderType = BooleanBuilder;
+
+  static inline int bytes_required(int elements) {
+    return BitUtil::BytesForBits(elements);
+  }
+};
+
+template <>
+struct TypeTraits<StringType> {
+  using ArrayType = StringArray;
+  using BuilderType = StringBuilder;
+};
+
+template <>
+struct TypeTraits<BinaryType> {
+  using ArrayType = BinaryArray;
+  using BuilderType = BinaryBuilder;
+};
+
+// Not all type classes have a c_type
+template <typename T>
+struct as_void {
+  using type = void;
+};
+
+// The partial specialization will match if T has the ATTR_NAME member
+#define GET_ATTR(ATTR_NAME, DEFAULT)                                           
  \
+  template <typename T, typename Enable = void>                                
  \
+  struct GetAttr_##ATTR_NAME {                                                 
  \
+    using type = DEFAULT;                                                      
  \
+  };                                                                           
  \
+                                                                               
  \
+  template <typename T>                                                        
  \
+  struct GetAttr_##ATTR_NAME<T, typename as_void<typename T::ATTR_NAME>::type> 
{ \
+    using type = typename T::ATTR_NAME;                                        
  \
+  };
+
+GET_ATTR(c_type, void);
+GET_ATTR(TypeClass, void);
+
+#undef GET_ATTR
+
+#define PRIMITIVE_TRAITS(T)                                                    
       \
+  using TypeClass = typename std::conditional<std::is_base_of<DataType, 
T>::value, T, \
+      typename GetAttr_TypeClass<T>::type>::type;                              
       \
+  using c_type = typename GetAttr_c_type<TypeClass>::type;
+
+template <typename T>
+struct IsUnsignedInt {
+  PRIMITIVE_TRAITS(T);
+  static constexpr bool value =
+      std::is_integral<c_type>::value && std::is_unsigned<c_type>::value;
+};
+
+template <typename T>
+struct IsSignedInt {
+  PRIMITIVE_TRAITS(T);
+  static constexpr bool value =
+      std::is_integral<c_type>::value && std::is_signed<c_type>::value;
+};
+
+template <typename T>
+struct IsFloatingPoint {
+  PRIMITIVE_TRAITS(T);
+  static constexpr bool value = std::is_floating_point<c_type>::value;
+};
+
+}  // namespace arrow
+
+#endif  // ARROW_TYPE_TRAITS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt 
b/cpp/src/arrow/types/CMakeLists.txt
index 9f78169..6d59acf 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -21,7 +21,6 @@
 
 # Headers: top level
 install(FILES
-  collection.h
   construct.h
   datetime.h
   decimal.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/collection.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h
deleted file mode 100644
index 1712030..0000000
--- a/cpp/src/arrow/types/collection.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_COLLECTION_H
-#define ARROW_TYPES_COLLECTION_H
-
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-template <Type::type T>
-struct CollectionType : public DataType {
-  std::vector<TypePtr> child_types_;
-
-  CollectionType() : DataType(T) {}
-
-  const TypePtr& child(int i) const { return child_types_[i]; }
-
-  int num_children() const { return child_types_.size(); }
-};
-
-}  // namespace arrow
-
-#endif  // ARROW_TYPES_COLLECTION_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h
index 241a126..a8f8639 100644
--- a/cpp/src/arrow/types/datetime.h
+++ b/cpp/src/arrow/types/datetime.h
@@ -22,41 +22,6 @@
 
 #include "arrow/type.h"
 
-namespace arrow {
-
-struct DateType : public DataType {
-  enum class Unit : char { DAY = 0, MONTH = 1, YEAR = 2 };
-
-  Unit unit;
-
-  explicit DateType(Unit unit = Unit::DAY) : DataType(Type::DATE), unit(unit) 
{}
-
-  DateType(const DateType& other) : DateType(other.unit) {}
-
-  static char const* name() { return "date"; }
-};
-
-struct ARROW_EXPORT TimestampType : public DataType {
-  enum class Unit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
-
-  typedef int64_t c_type;
-  static constexpr Type::type type_enum = Type::TIMESTAMP;
-
-  int value_size() const override { return sizeof(int64_t); }
-
-  Unit unit;
-
-  explicit TimestampType(Unit unit = Unit::MILLI)
-      : DataType(Type::TIMESTAMP), unit(unit) {}
-
-  TimestampType(const TimestampType& other) : TimestampType(other.unit) {}
-  virtual ~TimestampType() {}
-
-  std::string ToString() const override { return "timestamp"; }
-
-  static char const* name() { return "timestamp"; }
-};
-
-}  // namespace arrow
+namespace arrow {}  // namespace arrow
 
 #endif  // ARROW_TYPES_DATETIME_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/decimal.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/decimal.h b/cpp/src/arrow/types/decimal.h
index 6c497c5..b3ea3a5 100644
--- a/cpp/src/arrow/types/decimal.h
+++ b/cpp/src/arrow/types/decimal.h
@@ -23,18 +23,6 @@
 #include "arrow/type.h"
 #include "arrow/util/visibility.h"
 
-namespace arrow {
-
-struct ARROW_EXPORT DecimalType : public DataType {
-  explicit DecimalType(int precision_, int scale_)
-      : DataType(Type::DECIMAL), precision(precision_), scale(scale_) {}
-  int precision;
-  int scale;
-  static char const* name() { return "decimal"; }
-
-  std::string ToString() const override;
-};
-
-}  // namespace arrow
+namespace arrow {}  // namespace arrow
 
 #endif  // ARROW_TYPES_DECIMAL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 12c5394..cb9a8c1 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -141,7 +141,7 @@ TEST_F(TestListBuilder, TestAppendNull) {
   ASSERT_TRUE(result_->IsNull(0));
   ASSERT_TRUE(result_->IsNull(1));
 
-  ASSERT_EQ(0, result_->offsets()[0]);
+  ASSERT_EQ(0, result_->raw_offsets()[0]);
   ASSERT_EQ(0, result_->offset(1));
   ASSERT_EQ(0, result_->offset(2));
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 4b1e821..d865632 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -155,4 +155,8 @@ void ListBuilder::Reset() {
   null_bitmap_ = nullptr;
 }
 
+Status ListArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 9440ffe..bd93e8f 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -39,6 +39,8 @@ class MemoryPool;
 
 class ARROW_EXPORT ListArray : public Array {
  public:
+  using TypeClass = ListType;
+
   ListArray(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> 
offsets,
       const ArrayPtr& values, int32_t null_count = 0,
       std::shared_ptr<Buffer> null_bitmap = nullptr)
@@ -56,13 +58,13 @@ class ARROW_EXPORT ListArray : public Array {
   // Return a shared pointer in case the requestor desires to share ownership
   // with this array.
   const std::shared_ptr<Array>& values() const { return values_; }
-  const std::shared_ptr<Buffer> offset_buffer() const {
+  std::shared_ptr<Buffer> offsets() const {
     return std::static_pointer_cast<Buffer>(offset_buffer_);
   }
 
   const std::shared_ptr<DataType>& value_type() const { return 
values_->type(); }
 
-  const int32_t* offsets() const { return offsets_; }
+  const int32_t* raw_offsets() const { return offsets_; }
 
   int32_t offset(int i) const { return offsets_[i]; }
 
@@ -76,6 +78,8 @@ class ARROW_EXPORT ListArray : public Array {
   bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
       const ArrayPtr& arr) const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
  protected:
   std::shared_ptr<Buffer> offset_buffer_;
   const int32_t* offsets_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc 
b/cpp/src/arrow/types/primitive-test.cc
index e47f6dc..bdc8ec0 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -25,6 +25,7 @@
 #include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/types/construct.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/test-common.h"
@@ -41,15 +42,15 @@ namespace arrow {
 
 class Array;
 
-#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \
-  TEST(TypesTest, TestPrimitive_##ENUM) { \
-    KLASS tp;                             \
-                                          \
-    ASSERT_EQ(tp.type, Type::ENUM);       \
-    ASSERT_EQ(tp.name(), string(NAME));   \
-                                          \
-    KLASS tp_copy = tp;                   \
-    ASSERT_EQ(tp_copy.type, Type::ENUM);  \
+#define PRIMITIVE_TEST(KLASS, ENUM, NAME)   \
+  TEST(TypesTest, TestPrimitive_##ENUM) {   \
+    KLASS tp;                               \
+                                            \
+    ASSERT_EQ(tp.type, Type::ENUM);         \
+    ASSERT_EQ(tp.ToString(), string(NAME)); \
+                                            \
+    KLASS tp_copy = tp;                     \
+    ASSERT_EQ(tp_copy.type, Type::ENUM);    \
   }
 
 PRIMITIVE_TEST(Int8Type, INT8, "int8");
@@ -243,7 +244,8 @@ void TestPrimitiveBuilder<PBoolean>::Check(
 }
 
 typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, 
PInt16,
-    PInt32, PInt64, PFloat, PDouble> Primitives;
+    PInt32, PInt64, PFloat, PDouble>
+    Primitives;
 
 TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
 
@@ -311,20 +313,6 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
   ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
 }
 
-template <class T, class Builder>
-Status MakeArray(const vector<uint8_t>& valid_bytes, const vector<T>& draws, 
int size,
-    Builder* builder, ArrayPtr* out) {
-  // Append the first 1000
-  for (int i = 0; i < size; ++i) {
-    if (valid_bytes[i] > 0) {
-      RETURN_NOT_OK(builder->Append(draws[i]));
-    } else {
-      RETURN_NOT_OK(builder->AppendNull());
-    }
-  }
-  return builder->Finish(out);
-}
-
 TYPED_TEST(TestPrimitiveBuilder, Equality) {
   DECL_T();
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index d2288ba..14667ee 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -19,6 +19,7 @@
 
 #include <memory>
 
+#include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/logging.h"
@@ -48,13 +49,14 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& 
other) const {
     const uint8_t* this_data = raw_data_;
     const uint8_t* other_data = other.raw_data_;
 
-    int value_size = type_->value_size();
-    DCHECK_GT(value_size, 0);
+    auto size_meta = dynamic_cast<const FixedWidthMeta*>(type_.get());
+    int value_byte_size = size_meta->bit_width() / 8;
+    DCHECK_GT(value_byte_size, 0);
 
     for (int i = 0; i < length_; ++i) {
-      if (!IsNull(i) && memcmp(this_data, other_data, value_size)) { return 
false; }
-      this_data += value_size;
-      other_data += value_size;
+      if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { 
return false; }
+      this_data += value_byte_size;
+      other_data += value_byte_size;
     }
     return true;
   } else {
@@ -70,6 +72,11 @@ bool PrimitiveArray::Equals(const std::shared_ptr<Array>& 
arr) const {
   return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
 }
 
+template <typename T>
+Status NumericArray<T>::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 template class NumericArray<UInt8Type>;
 template class NumericArray<UInt16Type>;
 template class NumericArray<UInt32Type>;
@@ -79,9 +86,9 @@ template class NumericArray<Int16Type>;
 template class NumericArray<Int32Type>;
 template class NumericArray<Int64Type>;
 template class NumericArray<TimestampType>;
+template class NumericArray<HalfFloatType>;
 template class NumericArray<FloatType>;
 template class NumericArray<DoubleType>;
-template class NumericArray<BooleanType>;
 
 template <typename T>
 Status PrimitiveBuilder<T>::Init(int32_t capacity) {
@@ -145,8 +152,65 @@ Status PrimitiveBuilder<T>::Finish(std::shared_ptr<Array>* 
out) {
   return Status::OK();
 }
 
-template <>
-Status PrimitiveBuilder<BooleanType>::Append(
+template class PrimitiveBuilder<UInt8Type>;
+template class PrimitiveBuilder<UInt16Type>;
+template class PrimitiveBuilder<UInt32Type>;
+template class PrimitiveBuilder<UInt64Type>;
+template class PrimitiveBuilder<Int8Type>;
+template class PrimitiveBuilder<Int16Type>;
+template class PrimitiveBuilder<Int32Type>;
+template class PrimitiveBuilder<Int64Type>;
+template class PrimitiveBuilder<TimestampType>;
+template class PrimitiveBuilder<HalfFloatType>;
+template class PrimitiveBuilder<FloatType>;
+template class PrimitiveBuilder<DoubleType>;
+
+Status BooleanBuilder::Init(int32_t capacity) {
+  RETURN_NOT_OK(ArrayBuilder::Init(capacity));
+  data_ = std::make_shared<PoolBuffer>(pool_);
+
+  int64_t nbytes = BitUtil::BytesForBits(capacity);
+  RETURN_NOT_OK(data_->Resize(nbytes));
+  // TODO(emkornfield) valgrind complains without this
+  memset(data_->mutable_data(), 0, nbytes);
+
+  raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+  return Status::OK();
+}
+
+Status BooleanBuilder::Resize(int32_t capacity) {
+  // XXX: Set floor size for now
+  if (capacity < kMinBuilderCapacity) { capacity = kMinBuilderCapacity; }
+
+  if (capacity_ == 0) {
+    RETURN_NOT_OK(Init(capacity));
+  } else {
+    RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
+    const int64_t old_bytes = data_->size();
+    const int64_t new_bytes = BitUtil::BytesForBits(capacity);
+
+    RETURN_NOT_OK(data_->Resize(new_bytes));
+    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+    memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
+  }
+  return Status::OK();
+}
+
+Status BooleanBuilder::Finish(std::shared_ptr<Array>* out) {
+  const int64_t bytes_required = BitUtil::BytesForBits(length_);
+
+  if (bytes_required > 0 && bytes_required < data_->size()) {
+    // Trim buffers
+    RETURN_NOT_OK(data_->Resize(bytes_required));
+  }
+  *out = std::make_shared<BooleanArray>(type_, length_, data_, null_count_, 
null_bitmap_);
+
+  data_ = null_bitmap_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+Status BooleanBuilder::Append(
     const uint8_t* values, int32_t length, const uint8_t* valid_bytes) {
   RETURN_NOT_OK(Reserve(length));
 
@@ -168,19 +232,6 @@ Status PrimitiveBuilder<BooleanType>::Append(
   return Status::OK();
 }
 
-template class PrimitiveBuilder<UInt8Type>;
-template class PrimitiveBuilder<UInt16Type>;
-template class PrimitiveBuilder<UInt32Type>;
-template class PrimitiveBuilder<UInt64Type>;
-template class PrimitiveBuilder<Int8Type>;
-template class PrimitiveBuilder<Int16Type>;
-template class PrimitiveBuilder<Int32Type>;
-template class PrimitiveBuilder<Int64Type>;
-template class PrimitiveBuilder<TimestampType>;
-template class PrimitiveBuilder<FloatType>;
-template class PrimitiveBuilder<DoubleType>;
-template class PrimitiveBuilder<BooleanType>;
-
 BooleanArray::BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
     int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
     : PrimitiveArray(
@@ -235,4 +286,8 @@ bool BooleanArray::RangeEquals(int32_t start_idx, int32_t 
end_idx,
   return true;
 }
 
+Status BooleanArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index c71df58..a5a3704 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -26,6 +26,7 @@
 #include "arrow/array.h"
 #include "arrow/builder.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/types/datetime.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
@@ -54,9 +55,10 @@ class ARROW_EXPORT PrimitiveArray : public Array {
   const uint8_t* raw_data_;
 };
 
-template <class TypeClass>
+template <class TYPE>
 class ARROW_EXPORT NumericArray : public PrimitiveArray {
  public:
+  using TypeClass = TYPE;
   using value_type = typename TypeClass::c_type;
   NumericArray(int32_t length, const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = 
nullptr)
@@ -88,29 +90,15 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray {
     return reinterpret_cast<const value_type*>(raw_data_);
   }
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
   value_type Value(int i) const { return raw_data()[i]; }
 };
 
-#define NUMERIC_ARRAY_DECL(NAME, TypeClass) \
-  using NAME = NumericArray<TypeClass>;     \
-  extern template class ARROW_EXPORT NumericArray<TypeClass>;
-
-NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type);
-NUMERIC_ARRAY_DECL(Int8Array, Int8Type);
-NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type);
-NUMERIC_ARRAY_DECL(Int16Array, Int16Type);
-NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type);
-NUMERIC_ARRAY_DECL(Int32Array, Int32Type);
-NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type);
-NUMERIC_ARRAY_DECL(Int64Array, Int64Type);
-NUMERIC_ARRAY_DECL(TimestampArray, TimestampType);
-NUMERIC_ARRAY_DECL(FloatArray, FloatType);
-NUMERIC_ARRAY_DECL(DoubleArray, DoubleType);
-
 template <typename Type>
 class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
  public:
-  typedef typename Type::c_type value_type;
+  using value_type = typename Type::c_type;
 
   explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type)
       : ArrayBuilder(pool, type), data_(nullptr) {}
@@ -183,101 +171,27 @@ class ARROW_EXPORT NumericBuilder : public 
PrimitiveBuilder<T> {
   using PrimitiveBuilder<T>::raw_data_;
 };
 
-template <>
-struct TypeTraits<UInt8Type> {
-  typedef UInt8Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements; }
-};
-
-template <>
-struct TypeTraits<Int8Type> {
-  typedef Int8Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements; }
-};
-
-template <>
-struct TypeTraits<UInt16Type> {
-  typedef UInt16Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(uint16_t); }
-};
-
-template <>
-struct TypeTraits<Int16Type> {
-  typedef Int16Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(int16_t); }
-};
-
-template <>
-struct TypeTraits<UInt32Type> {
-  typedef UInt32Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(uint32_t); }
-};
-
-template <>
-struct TypeTraits<Int32Type> {
-  typedef Int32Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(int32_t); }
-};
-
-template <>
-struct TypeTraits<UInt64Type> {
-  typedef UInt64Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(uint64_t); }
-};
-
-template <>
-struct TypeTraits<Int64Type> {
-  typedef Int64Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(int64_t); }
-};
-
-template <>
-struct TypeTraits<TimestampType> {
-  typedef TimestampArray ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(int64_t); }
-};
-template <>
-
-struct TypeTraits<FloatType> {
-  typedef FloatArray ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(float); }
-};
-
-template <>
-struct TypeTraits<DoubleType> {
-  typedef DoubleArray ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * 
sizeof(double); }
-};
-
 // Builders
 
-typedef NumericBuilder<UInt8Type> UInt8Builder;
-typedef NumericBuilder<UInt16Type> UInt16Builder;
-typedef NumericBuilder<UInt32Type> UInt32Builder;
-typedef NumericBuilder<UInt64Type> UInt64Builder;
+using UInt8Builder = NumericBuilder<UInt8Type>;
+using UInt16Builder = NumericBuilder<UInt16Type>;
+using UInt32Builder = NumericBuilder<UInt32Type>;
+using UInt64Builder = NumericBuilder<UInt64Type>;
 
-typedef NumericBuilder<Int8Type> Int8Builder;
-typedef NumericBuilder<Int16Type> Int16Builder;
-typedef NumericBuilder<Int32Type> Int32Builder;
-typedef NumericBuilder<Int64Type> Int64Builder;
-typedef NumericBuilder<TimestampType> TimestampBuilder;
+using Int8Builder = NumericBuilder<Int8Type>;
+using Int16Builder = NumericBuilder<Int16Type>;
+using Int32Builder = NumericBuilder<Int32Type>;
+using Int64Builder = NumericBuilder<Int64Type>;
+using TimestampBuilder = NumericBuilder<TimestampType>;
 
-typedef NumericBuilder<FloatType> FloatBuilder;
-typedef NumericBuilder<DoubleType> DoubleBuilder;
+using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
+using FloatBuilder = NumericBuilder<FloatType>;
+using DoubleBuilder = NumericBuilder<DoubleType>;
 
 class ARROW_EXPORT BooleanArray : public PrimitiveArray {
  public:
+  using TypeClass = BooleanType;
+
   BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = 
nullptr);
   BooleanArray(const TypePtr& type, int32_t length, const 
std::shared_ptr<Buffer>& data,
@@ -288,28 +202,36 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
   bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
       const ArrayPtr& arr) const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
   const uint8_t* raw_data() const { return reinterpret_cast<const 
uint8_t*>(raw_data_); }
 
   bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); }
 };
 
-template <>
-struct TypeTraits<BooleanType> {
-  typedef BooleanArray ArrayType;
-
-  static inline int bytes_required(int elements) {
-    return BitUtil::BytesForBits(elements);
-  }
-};
-
-class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType> {
+class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
  public:
   explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type)
-      : PrimitiveBuilder<BooleanType>(pool, type) {}
+      : ArrayBuilder(pool, type), data_(nullptr) {}
 
   virtual ~BooleanBuilder() {}
 
-  using PrimitiveBuilder<BooleanType>::Append;
+  using ArrayBuilder::Advance;
+
+  // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  Status AppendNulls(const uint8_t* valid_bytes, int32_t length) {
+    RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  std::shared_ptr<Buffer> data() const { return data_; }
 
   // Scalar append
   Status Append(bool val) {
@@ -324,9 +246,39 @@ class ARROW_EXPORT BooleanBuilder : public 
PrimitiveBuilder<BooleanType> {
     return Status::OK();
   }
 
-  Status Append(uint8_t val) { return Append(static_cast<bool>(val)); }
+  // Vector append
+  //
+  // If passed, valid_bytes is of equal length to values, and any zero byte
+  // will be considered as a null for that slot
+  Status Append(
+      const uint8_t* values, int32_t length, const uint8_t* valid_bytes = 
nullptr);
+
+  Status Finish(std::shared_ptr<Array>* out) override;
+  Status Init(int32_t capacity) override;
+
+  // Increase the capacity of the builder to accommodate at least the indicated
+  // number of elements
+  Status Resize(int32_t capacity) override;
+
+ protected:
+  std::shared_ptr<PoolBuffer> data_;
+  uint8_t* raw_data_;
 };
 
+// Only instantiate these templates once
+extern template class ARROW_EXPORT NumericArray<Int8Type>;
+extern template class ARROW_EXPORT NumericArray<UInt8Type>;
+extern template class ARROW_EXPORT NumericArray<Int16Type>;
+extern template class ARROW_EXPORT NumericArray<UInt16Type>;
+extern template class ARROW_EXPORT NumericArray<Int32Type>;
+extern template class ARROW_EXPORT NumericArray<UInt32Type>;
+extern template class ARROW_EXPORT NumericArray<Int64Type>;
+extern template class ARROW_EXPORT NumericArray<UInt64Type>;
+extern template class ARROW_EXPORT NumericArray<HalfFloatType>;
+extern template class ARROW_EXPORT NumericArray<FloatType>;
+extern template class ARROW_EXPORT NumericArray<DoubleType>;
+extern template class ARROW_EXPORT NumericArray<TimestampType>;
+
 }  // namespace arrow
 
 #endif  // ARROW_TYPES_PRIMITIVE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc 
b/cpp/src/arrow/types/string-test.cc
index af87a14..3c4b12b 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -47,7 +47,7 @@ TEST(TypesTest, BinaryType) {
 TEST(TypesTest, TestStringType) {
   StringType str;
   ASSERT_EQ(str.type, Type::STRING);
-  ASSERT_EQ(str.name(), std::string("string"));
+  ASSERT_EQ(str.ToString(), std::string("string"));
 }
 
 // ----------------------------------------------------------------------
@@ -66,8 +66,8 @@ class TestStringContainer : public ::testing::Test {
 
   void MakeArray() {
     length_ = offsets_.size() - 1;
-    value_buf_ = test::to_buffer(chars_);
-    offsets_buf_ = test::to_buffer(offsets_);
+    value_buf_ = test::GetBufferFromVector(chars_);
+    offsets_buf_ = test::GetBufferFromVector(offsets_);
     null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
     null_count_ = test::null_count(valid_bytes_);
 
@@ -131,7 +131,7 @@ TEST_F(TestStringContainer, TestGetString) {
 
 TEST_F(TestStringContainer, TestEmptyStringComparison) {
   offsets_ = {0, 0, 0, 0, 0, 0};
-  offsets_buf_ = test::to_buffer(offsets_);
+  offsets_buf_ = test::GetBufferFromVector(offsets_);
   length_ = offsets_.size() - 1;
 
   auto strings_a = std::make_shared<StringArray>(
@@ -227,8 +227,8 @@ class TestBinaryContainer : public ::testing::Test {
 
   void MakeArray() {
     length_ = offsets_.size() - 1;
-    value_buf_ = test::to_buffer(chars_);
-    offsets_buf_ = test::to_buffer(offsets_);
+    value_buf_ = test::GetBufferFromVector(chars_);
+    offsets_buf_ = test::GetBufferFromVector(offsets_);
 
     null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
     null_count_ = test::null_count(valid_bytes_);

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc
index f6d26df..db963df 100644
--- a/cpp/src/arrow/types/string.cc
+++ b/cpp/src/arrow/types/string.cc
@@ -94,6 +94,10 @@ bool BinaryArray::RangeEquals(int32_t start_idx, int32_t 
end_idx, int32_t other_
   return true;
 }
 
+Status BinaryArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 StringArray::StringArray(int32_t length, const std::shared_ptr<Buffer>& 
offsets,
     const std::shared_ptr<Buffer>& data, int32_t null_count,
     const std::shared_ptr<Buffer>& null_bitmap)
@@ -104,6 +108,10 @@ Status StringArray::Validate() const {
   return BinaryArray::Validate();
 }
 
+Status StringArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 // This used to be a static member variable of BinaryBuilder, but it can cause
 // valgrind to report a (spurious?) memory leak when needed in other shared
 // libraries. The problem came up while adding explicit visibility to libarrow
@@ -122,8 +130,8 @@ Status BinaryBuilder::Finish(std::shared_ptr<Array>* out) {
   const auto list = std::dynamic_pointer_cast<ListArray>(result);
   auto values = std::dynamic_pointer_cast<UInt8Array>(list->values());
 
-  *out = std::make_shared<BinaryArray>(list->length(), list->offset_buffer(),
-      values->data(), list->null_count(), list->null_bitmap());
+  *out = std::make_shared<BinaryArray>(list->length(), list->offsets(), 
values->data(),
+      list->null_count(), list->null_bitmap());
   return Status::OK();
 }
 
@@ -134,8 +142,8 @@ Status StringBuilder::Finish(std::shared_ptr<Array>* out) {
   const auto list = std::dynamic_pointer_cast<ListArray>(result);
   auto values = std::dynamic_pointer_cast<UInt8Array>(list->values());
 
-  *out = std::make_shared<StringArray>(list->length(), list->offset_buffer(),
-      values->data(), list->null_count(), list->null_bitmap());
+  *out = std::make_shared<StringArray>(list->length(), list->offsets(), 
values->data(),
+      list->null_count(), list->null_bitmap());
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index aaba49c..c875243 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -37,6 +37,8 @@ class MemoryPool;
 
 class ARROW_EXPORT BinaryArray : public Array {
  public:
+  using TypeClass = BinaryType;
+
   BinaryArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
       const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
@@ -60,6 +62,8 @@ class ARROW_EXPORT BinaryArray : public Array {
   std::shared_ptr<Buffer> data() const { return data_buffer_; }
   std::shared_ptr<Buffer> offsets() const { return offset_buffer_; }
 
+  const int32_t* raw_offsets() const { return offsets_; }
+
   int32_t offset(int i) const { return offsets_[i]; }
 
   // Neither of these functions will perform boundschecking
@@ -73,6 +77,8 @@ class ARROW_EXPORT BinaryArray : public Array {
 
   Status Validate() const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
  private:
   std::shared_ptr<Buffer> offset_buffer_;
   const int32_t* offsets_;
@@ -83,6 +89,8 @@ class ARROW_EXPORT BinaryArray : public Array {
 
 class ARROW_EXPORT StringArray : public BinaryArray {
  public:
+  using TypeClass = StringType;
+
   StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
       const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
@@ -96,6 +104,8 @@ class ARROW_EXPORT StringArray : public BinaryArray {
   }
 
   Status Validate() const override;
+
+  Status Accept(ArrayVisitor* visitor) const override;
 };
 
 // BinaryBuilder : public ListBuilder
@@ -109,6 +119,12 @@ class ARROW_EXPORT BinaryBuilder : public ListBuilder {
     return byte_builder_->Append(value, length);
   }
 
+  Status Append(const char* value, int32_t length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(const std::string& value) { return Append(value.c_str(), 
value.size()); }
+
   Status Finish(std::shared_ptr<Array>* out) override;
 
  protected:
@@ -121,13 +137,9 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
   explicit StringBuilder(MemoryPool* pool, const TypePtr& type)
       : BinaryBuilder(pool, type) {}
 
-  Status Finish(std::shared_ptr<Array>* out) override;
-
-  Status Append(const std::string& value) { return Append(value.c_str(), 
value.size()); }
+  using BinaryBuilder::Append;
 
-  Status Append(const char* value, int32_t length) {
-    return BinaryBuilder::Append(reinterpret_cast<const uint8_t*>(value), 
length);
-  }
+  Status Finish(std::shared_ptr<Array>* out) override;
 
   Status Append(const std::vector<std::string>& values, uint8_t* null_bytes);
 };

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc 
b/cpp/src/arrow/types/struct-test.cc
index 8e82c38..197d7d4 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -80,7 +80,7 @@ void ValidateBasicStructArray(const StructArray* result,
   ASSERT_EQ(4, list_char_arr->length());
   ASSERT_EQ(10, list_char_arr->values()->length());
   for (size_t i = 0; i < list_offsets.size(); ++i) {
-    ASSERT_EQ(list_offsets[i], list_char_arr->offsets()[i]);
+    ASSERT_EQ(list_offsets[i], list_char_arr->raw_offsets()[i]);
   }
   for (size_t i = 0; i < list_values.size(); ++i) {
     ASSERT_EQ(list_values[i], char_arr->Value(i));

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index 369c29d..0e0db23 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -87,6 +87,10 @@ Status StructArray::Validate() const {
   return Status::OK();
 }
 
+Status StructArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 Status StructBuilder::Finish(std::shared_ptr<Array>* out) {
   std::vector<std::shared_ptr<Array>> fields(field_builders_.size());
   for (size_t i = 0; i < field_builders_.size(); ++i) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
index 65b8daf..035af05 100644
--- a/cpp/src/arrow/types/struct.h
+++ b/cpp/src/arrow/types/struct.h
@@ -31,6 +31,8 @@ namespace arrow {
 
 class ARROW_EXPORT StructArray : public Array {
  public:
+  using TypeClass = StructType;
+
   StructArray(const TypePtr& type, int32_t length, std::vector<ArrayPtr>& 
field_arrays,
       int32_t null_count = 0, std::shared_ptr<Buffer> null_bitmap = nullptr)
       : Array(type, length, null_count, null_bitmap) {
@@ -55,6 +57,8 @@ class ARROW_EXPORT StructArray : public Array {
   bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
       const std::shared_ptr<Array>& arr) const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
  protected:
   // The child arrays corresponding to each field of the struct data type.
   std::vector<ArrayPtr> field_arrays_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h 
b/cpp/src/arrow/types/test-common.h
index 1957636..6e6ab85 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -24,6 +24,8 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
+#include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/memory-pool.h"
@@ -49,6 +51,20 @@ class TestBuilder : public ::testing::Test {
   unique_ptr<ArrayBuilder> builder_nn_;
 };
 
+template <class T, class Builder>
+Status MakeArray(const std::vector<uint8_t>& valid_bytes, const 
std::vector<T>& values,
+    int size, Builder* builder, ArrayPtr* out) {
+  // Append the first 1000
+  for (int i = 0; i < size; ++i) {
+    if (valid_bytes[i] > 0) {
+      RETURN_NOT_OK(builder->Append(values[i]));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
+    }
+  }
+  return builder->Finish(out);
+}
+
 }  // namespace arrow
 
 #endif  // ARROW_TYPES_TEST_COMMON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/union.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.cc b/cpp/src/arrow/types/union.cc
index c891b4a..cc2934b 100644
--- a/cpp/src/arrow/types/union.cc
+++ b/cpp/src/arrow/types/union.cc
@@ -24,25 +24,4 @@
 
 #include "arrow/type.h"
 
-namespace arrow {
-
-static inline std::string format_union(const std::vector<TypePtr>& 
child_types) {
-  std::stringstream s;
-  s << "union<";
-  for (size_t i = 0; i < child_types.size(); ++i) {
-    if (i) { s << ", "; }
-    s << child_types[i]->ToString();
-  }
-  s << ">";
-  return s.str();
-}
-
-std::string DenseUnionType::ToString() const {
-  return format_union(child_types_);
-}
-
-std::string SparseUnionType::ToString() const {
-  return format_union(child_types_);
-}
-
-}  // namespace arrow
+namespace arrow {}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/union.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h
index d2ee9bd..44f39cc 100644
--- a/cpp/src/arrow/types/union.h
+++ b/cpp/src/arrow/types/union.h
@@ -24,32 +24,11 @@
 
 #include "arrow/array.h"
 #include "arrow/type.h"
-#include "arrow/types/collection.h"
 
 namespace arrow {
 
 class Buffer;
 
-struct DenseUnionType : public CollectionType<Type::DENSE_UNION> {
-  typedef CollectionType<Type::DENSE_UNION> Base;
-
-  explicit DenseUnionType(const std::vector<TypePtr>& child_types) : Base() {
-    child_types_ = child_types;
-  }
-
-  virtual std::string ToString() const;
-};
-
-struct SparseUnionType : public CollectionType<Type::SPARSE_UNION> {
-  typedef CollectionType<Type::SPARSE_UNION> Base;
-
-  explicit SparseUnionType(const std::vector<TypePtr>& child_types) : Base() {
-    child_types_ = child_types;
-  }
-
-  virtual std::string ToString() const;
-};
-
 class UnionArray : public Array {
  protected:
   // The data are types encoded as int16

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/util/logging.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 06ee841..b22f07d 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -118,9 +118,9 @@ class CerrLog {
 class FatalLog : public CerrLog {
  public:
   explicit FatalLog(int /* severity */)  // NOLINT
-      : CerrLog(ARROW_FATAL) {}          // NOLINT
+      : CerrLog(ARROW_FATAL){}           // NOLINT
 
-  [[noreturn]] ~FatalLog() {
+            [[noreturn]] ~FatalLog() {
     if (has_logged_) { std::cerr << std::endl; }
     std::exit(1);
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/format/Metadata.md
----------------------------------------------------------------------
diff --git a/format/Metadata.md b/format/Metadata.md
index 653a4c7..a4878f3 100644
--- a/format/Metadata.md
+++ b/format/Metadata.md
@@ -98,6 +98,11 @@ Union:
   "typeIds" : [ /* integer */ ]
 }
 ```
+
+The `typeIds` field in the Union are the codes used to denote each type, which
+may be different from the index of the child array. This is so that the union
+type ids do not have to be enumerated from 0.
+
 Int:
 ```
 {

Reply via email to