Repository: parquet-cpp
Updated Branches:
  refs/heads/master 8ef68b17a -> 4a7bf1174


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/src/parquet/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc
new file mode 100644
index 0000000..5a38a28
--- /dev/null
+++ b/src/parquet/arrow/schema.cc
@@ -0,0 +1,351 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/arrow/schema.h"
+
+#include <string>
+#include <vector>
+
+#include "parquet/api/schema.h"
+#include "parquet/arrow/utils.h"
+
+#include "arrow/types/decimal.h"
+#include "arrow/types/string.h"
+#include "arrow/util/status.h"
+
+using arrow::Field;
+using arrow::Status;
+using arrow::TypePtr;
+
+using ArrowType = arrow::Type;
+
+using parquet::Repetition;
+using parquet::schema::Node;
+using parquet::schema::NodePtr;
+using parquet::schema::GroupNode;
+using parquet::schema::PrimitiveNode;
+
+using ParquetType = parquet::Type;
+using parquet::LogicalType;
+
+namespace parquet {
+
+namespace arrow {
+
+const auto BOOL = std::make_shared<::arrow::BooleanType>();
+const auto UINT8 = std::make_shared<::arrow::UInt8Type>();
+const auto INT8 = std::make_shared<::arrow::Int8Type>();
+const auto UINT16 = std::make_shared<::arrow::UInt16Type>();
+const auto INT16 = std::make_shared<::arrow::Int16Type>();
+const auto UINT32 = std::make_shared<::arrow::UInt32Type>();
+const auto INT32 = std::make_shared<::arrow::Int32Type>();
+const auto UINT64 = std::make_shared<::arrow::UInt64Type>();
+const auto INT64 = std::make_shared<::arrow::Int64Type>();
+const auto FLOAT = std::make_shared<::arrow::FloatType>();
+const auto DOUBLE = std::make_shared<::arrow::DoubleType>();
+const auto UTF8 = std::make_shared<::arrow::StringType>();
+const auto TIMESTAMP_MS =
+    
std::make_shared<::arrow::TimestampType>(::arrow::TimestampType::Unit::MILLI);
+const auto BINARY =
+    std::make_shared<::arrow::ListType>(std::make_shared<::arrow::Field>("", 
UINT8));
+
+TypePtr MakeDecimalType(const PrimitiveNode* node) {
+  int precision = node->decimal_metadata().precision;
+  int scale = node->decimal_metadata().scale;
+  return std::make_shared<::arrow::DecimalType>(precision, scale);
+}
+
+static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) {
+  switch (node->logical_type()) {
+    case LogicalType::UTF8:
+      *out = UTF8;
+      break;
+    case LogicalType::DECIMAL:
+      *out = MakeDecimalType(node);
+      break;
+    default:
+      // BINARY
+      *out = BINARY;
+      break;
+  }
+  return Status::OK();
+}
+
+static Status FromFLBA(const PrimitiveNode* node, TypePtr* out) {
+  switch (node->logical_type()) {
+    case LogicalType::NONE:
+      *out = BINARY;
+      break;
+    case LogicalType::DECIMAL:
+      *out = MakeDecimalType(node);
+      break;
+    default:
+      return Status::NotImplemented("unhandled type");
+      break;
+  }
+
+  return Status::OK();
+}
+
+static Status FromInt32(const PrimitiveNode* node, TypePtr* out) {
+  switch (node->logical_type()) {
+    case LogicalType::NONE:
+      *out = INT32;
+      break;
+    case LogicalType::UINT_8:
+      *out = UINT8;
+      break;
+    case LogicalType::INT_8:
+      *out = INT8;
+      break;
+    case LogicalType::UINT_16:
+      *out = UINT16;
+      break;
+    case LogicalType::INT_16:
+      *out = INT16;
+      break;
+    case LogicalType::UINT_32:
+      *out = UINT32;
+      break;
+    case LogicalType::DECIMAL:
+      *out = MakeDecimalType(node);
+      break;
+    default:
+      return Status::NotImplemented("Unhandled logical type for int32");
+      break;
+  }
+  return Status::OK();
+}
+
+static Status FromInt64(const PrimitiveNode* node, TypePtr* out) {
+  switch (node->logical_type()) {
+    case LogicalType::NONE:
+      *out = INT64;
+      break;
+    case LogicalType::UINT_64:
+      *out = UINT64;
+      break;
+    case LogicalType::DECIMAL:
+      *out = MakeDecimalType(node);
+      break;
+    case LogicalType::TIMESTAMP_MILLIS:
+      *out = TIMESTAMP_MS;
+      break;
+    default:
+      return Status::NotImplemented("Unhandled logical type for int64");
+      break;
+  }
+  return Status::OK();
+}
+
+// TODO: Logical Type Handling
+Status NodeToField(const NodePtr& node, std::shared_ptr<Field>* out) {
+  std::shared_ptr<::arrow::DataType> type;
+
+  if (node->is_repeated()) {
+    return Status::NotImplemented("No support yet for repeated node types");
+  }
+
+  if (node->is_group()) {
+    const GroupNode* group = static_cast<const GroupNode*>(node.get());
+    std::vector<std::shared_ptr<Field>> fields(group->field_count());
+    for (int i = 0; i < group->field_count(); i++) {
+      RETURN_NOT_OK(NodeToField(group->field(i), &fields[i]));
+    }
+    type = std::make_shared<::arrow::StructType>(fields);
+  } else {
+    // Primitive (leaf) node
+    const PrimitiveNode* primitive = static_cast<const 
PrimitiveNode*>(node.get());
+
+    switch (primitive->physical_type()) {
+      case ParquetType::BOOLEAN:
+        type = BOOL;
+        break;
+      case ParquetType::INT32:
+        RETURN_NOT_OK(FromInt32(primitive, &type));
+        break;
+      case ParquetType::INT64:
+        RETURN_NOT_OK(FromInt64(primitive, &type));
+        break;
+      case ParquetType::INT96:
+        // TODO: Do we have that type in Arrow?
+        // type = TypePtr(new Int96Type());
+        return Status::NotImplemented("int96");
+      case ParquetType::FLOAT:
+        type = FLOAT;
+        break;
+      case ParquetType::DOUBLE:
+        type = DOUBLE;
+        break;
+      case ParquetType::BYTE_ARRAY:
+        // TODO: Do we have that type in Arrow?
+        RETURN_NOT_OK(FromByteArray(primitive, &type));
+        break;
+      case ParquetType::FIXED_LEN_BYTE_ARRAY:
+        RETURN_NOT_OK(FromFLBA(primitive, &type));
+        break;
+    }
+  }
+
+  *out = std::make_shared<Field>(node->name(), type, !node->is_required());
+  return Status::OK();
+}
+
+Status FromParquetSchema(
+    const SchemaDescriptor* parquet_schema, std::shared_ptr<::arrow::Schema>* 
out) {
+  // TODO(wesm): Consider adding an arrow::Schema name attribute, which comes
+  // from the root Parquet node
+  const GroupNode* schema_node = parquet_schema->group_node();
+
+  std::vector<std::shared_ptr<Field>> fields(schema_node->field_count());
+  for (int i = 0; i < schema_node->field_count(); i++) {
+    RETURN_NOT_OK(NodeToField(schema_node->field(i), &fields[i]));
+  }
+
+  *out = std::make_shared<::arrow::Schema>(fields);
+  return Status::OK();
+}
+
+Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
+    const std::string& name, bool nullable, const WriterProperties& properties,
+    NodePtr* out) {
+  Repetition::type repetition = Repetition::REQUIRED;
+  if (nullable) { repetition = Repetition::OPTIONAL; }
+
+  std::vector<NodePtr> children(type->num_children());
+  for (int i = 0; i < type->num_children(); i++) {
+    RETURN_NOT_OK(FieldToNode(type->child(i), properties, &children[i]));
+  }
+
+  *out = GroupNode::Make(name, repetition, children);
+  return Status::OK();
+}
+
+Status FieldToNode(const std::shared_ptr<Field>& field,
+    const WriterProperties& properties, NodePtr* out) {
+  LogicalType::type logical_type = LogicalType::NONE;
+  ParquetType::type type;
+  Repetition::type repetition = Repetition::REQUIRED;
+  if (field->nullable) { repetition = Repetition::OPTIONAL; }
+  int length = -1;
+
+  switch (field->type->type) {
+    // TODO:
+    // case ArrowType::NA:
+    // break;
+    case ArrowType::BOOL:
+      type = ParquetType::BOOLEAN;
+      break;
+    case ArrowType::UINT8:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::UINT_8;
+      break;
+    case ArrowType::INT8:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::INT_8;
+      break;
+    case ArrowType::UINT16:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::UINT_16;
+      break;
+    case ArrowType::INT16:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::INT_16;
+      break;
+    case ArrowType::UINT32:
+      if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0) {
+        type = ParquetType::INT64;
+      } else {
+        type = ParquetType::INT32;
+        logical_type = LogicalType::UINT_32;
+      }
+      break;
+    case ArrowType::INT32:
+      type = ParquetType::INT32;
+      break;
+    case ArrowType::UINT64:
+      type = ParquetType::INT64;
+      logical_type = LogicalType::UINT_64;
+      break;
+    case ArrowType::INT64:
+      type = ParquetType::INT64;
+      break;
+    case ArrowType::FLOAT:
+      type = ParquetType::FLOAT;
+      break;
+    case ArrowType::DOUBLE:
+      type = ParquetType::DOUBLE;
+      break;
+    case ArrowType::STRING:
+      type = ParquetType::BYTE_ARRAY;
+      logical_type = LogicalType::UTF8;
+      break;
+    case ArrowType::BINARY:
+      type = ParquetType::BYTE_ARRAY;
+      break;
+    case ArrowType::DATE:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::DATE;
+      break;
+    case ArrowType::TIMESTAMP: {
+      auto timestamp_type = 
static_cast<::arrow::TimestampType*>(field->type.get());
+      if (timestamp_type->unit != ::arrow::TimestampType::Unit::MILLI) {
+        return Status::NotImplemented(
+            "Other timestamp units than millisecond are not yet support with 
parquet.");
+      }
+      type = ParquetType::INT64;
+      logical_type = LogicalType::TIMESTAMP_MILLIS;
+    } break;
+    case ArrowType::TIMESTAMP_DOUBLE:
+      type = ParquetType::INT64;
+      // This is specified as seconds since the UNIX epoch
+      // TODO: Converted type in Parquet?
+      // logical_type = LogicalType::TIMESTAMP_MILLIS;
+      break;
+    case ArrowType::TIME:
+      type = ParquetType::INT64;
+      logical_type = LogicalType::TIME_MILLIS;
+      break;
+    case ArrowType::STRUCT: {
+      auto struct_type = 
std::static_pointer_cast<::arrow::StructType>(field->type);
+      return StructToNode(struct_type, field->name, field->nullable, 
properties, out);
+    } break;
+    default:
+      // TODO: LIST, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL, 
DECIMAL_TEXT, VARCHAR
+      return Status::NotImplemented("unhandled type");
+  }
+  *out = PrimitiveNode::Make(field->name, repetition, type, logical_type, 
length);
+  return Status::OK();
+}
+
+Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
+    const WriterProperties& properties, std::shared_ptr<SchemaDescriptor>* 
out) {
+  std::vector<NodePtr> nodes(arrow_schema->num_fields());
+  for (int i = 0; i < arrow_schema->num_fields(); i++) {
+    RETURN_NOT_OK(FieldToNode(arrow_schema->field(i), properties, &nodes[i]));
+  }
+
+  NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, nodes);
+  *out = std::make_shared<::parquet::SchemaDescriptor>();
+  PARQUET_CATCH_NOT_OK((*out)->Init(schema));
+
+  return Status::OK();
+}
+
+}  // namespace arrow
+
+}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/src/parquet/arrow/schema.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.h b/src/parquet/arrow/schema.h
new file mode 100644
index 0000000..6917b90
--- /dev/null
+++ b/src/parquet/arrow/schema.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PARQUET_ARROW_SCHEMA_H
+#define PARQUET_ARROW_SCHEMA_H
+
+#include <memory>
+
+#include "arrow/schema.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+#include "parquet/api/schema.h"
+#include "parquet/api/writer.h"
+
+namespace arrow {
+
+class Status;
+
+}  // namespace arrow
+
+namespace parquet {
+
+namespace arrow {
+
+::arrow::Status PARQUET_EXPORT NodeToField(
+    const schema::NodePtr& node, std::shared_ptr<::arrow::Field>* out);
+
+::arrow::Status PARQUET_EXPORT FromParquetSchema(
+    const SchemaDescriptor* parquet_schema, std::shared_ptr<::arrow::Schema>* 
out);
+
+::arrow::Status PARQUET_EXPORT FieldToNode(const 
std::shared_ptr<::arrow::Field>& field,
+    const WriterProperties& properties, schema::NodePtr* out);
+
+::arrow::Status PARQUET_EXPORT ToParquetSchema(const ::arrow::Schema* 
arrow_schema,
+    const WriterProperties& properties, std::shared_ptr<SchemaDescriptor>* 
out);
+
+}  // namespace arrow
+
+}  // namespace parquet
+
+#endif  // PARQUET_ARROW_SCHEMA_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
new file mode 100644
index 0000000..deac9f7
--- /dev/null
+++ b/src/parquet/arrow/test-util.h
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+#include <vector>
+
+#include "arrow/test-util.h"
+#include "arrow/types/primitive.h"
+#include "arrow/types/string.h"
+
+namespace parquet {
+
+namespace arrow {
+
+template <typename ArrowType>
+using is_arrow_float = std::is_floating_point<typename ArrowType::c_type>;
+
+template <typename ArrowType>
+using is_arrow_int = std::is_integral<typename ArrowType::c_type>;
+
+template <typename ArrowType>
+using is_arrow_string = std::is_same<ArrowType, ::arrow::StringType>;
+
+template <class ArrowType>
+typename std::enable_if<is_arrow_float<ArrowType>::value,
+    std::shared_ptr<::arrow::PrimitiveArray>>::type
+NonNullArray(size_t size) {
+  std::vector<typename ArrowType::c_type> values;
+  ::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, 
&values);
+  ::arrow::NumericBuilder<ArrowType> builder(
+      ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+  builder.Append(values.data(), values.size());
+  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+}
+
+template <class ArrowType>
+typename std::enable_if<is_arrow_int<ArrowType>::value,
+    std::shared_ptr<::arrow::PrimitiveArray>>::type
+NonNullArray(size_t size) {
+  std::vector<typename ArrowType::c_type> values;
+  ::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);
+  ::arrow::NumericBuilder<ArrowType> builder(
+      ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+  builder.Append(values.data(), values.size());
+  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+}
+
+template <class ArrowType>
+typename std::enable_if<is_arrow_string<ArrowType>::value,
+    std::shared_ptr<::arrow::StringArray>>::type
+NonNullArray(size_t size) {
+  ::arrow::StringBuilder builder(
+      ::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>());
+  for (size_t i = 0; i < size; i++) {
+    builder.Append("test-string");
+  }
+  return std::static_pointer_cast<::arrow::StringArray>(builder.Finish());
+}
+
+template <>
+std::shared_ptr<::arrow::PrimitiveArray> 
NonNullArray<::arrow::BooleanType>(size_t size) {
+  std::vector<uint8_t> values;
+  ::arrow::test::randint<uint8_t>(size, 0, 1, &values);
+  ::arrow::BooleanBuilder builder(
+      ::arrow::default_memory_pool(), 
std::make_shared<::arrow::BooleanType>());
+  builder.Append(values.data(), values.size());
+  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+}
+
+// This helper function only supports (size/2) nulls.
+template <typename ArrowType>
+typename std::enable_if<is_arrow_float<ArrowType>::value,
+    std::shared_ptr<::arrow::PrimitiveArray>>::type
+NullableArray(size_t size, size_t num_nulls) {
+  std::vector<typename ArrowType::c_type> values;
+  ::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, 
&values);
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  ::arrow::NumericBuilder<ArrowType> builder(
+      ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+  builder.Append(values.data(), values.size(), valid_bytes.data());
+  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+}
+
+// This helper function only supports (size/2) nulls.
+template <typename ArrowType>
+typename std::enable_if<is_arrow_int<ArrowType>::value,
+    std::shared_ptr<::arrow::PrimitiveArray>>::type
+NullableArray(size_t size, size_t num_nulls) {
+  std::vector<typename ArrowType::c_type> values;
+  ::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  ::arrow::NumericBuilder<ArrowType> builder(
+      ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+  builder.Append(values.data(), values.size(), valid_bytes.data());
+  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+}
+
+// This helper function only supports (size/2) nulls yet.
+template <typename ArrowType>
+typename std::enable_if<is_arrow_string<ArrowType>::value,
+    std::shared_ptr<::arrow::StringArray>>::type
+NullableArray(size_t size, size_t num_nulls) {
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  ::arrow::StringBuilder builder(
+      ::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>());
+  for (size_t i = 0; i < size; i++) {
+    builder.Append("test-string");
+  }
+  return std::static_pointer_cast<::arrow::StringArray>(builder.Finish());
+}
+
+// This helper function only supports (size/2) nulls yet.
+template <>
+std::shared_ptr<::arrow::PrimitiveArray> NullableArray<::arrow::BooleanType>(
+    size_t size, size_t num_nulls) {
+  std::vector<uint8_t> values;
+  ::arrow::test::randint<uint8_t>(size, 0, 1, &values);
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  ::arrow::BooleanBuilder builder(
+      ::arrow::default_memory_pool(), 
std::make_shared<::arrow::BooleanType>());
+  builder.Append(values.data(), values.size(), valid_bytes.data());
+  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+}
+
+std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
+    const std::shared_ptr<::arrow::Array>& array, bool nullable) {
+  auto field = std::make_shared<::arrow::Field>(name, array->type(), nullable);
+  return std::make_shared<::arrow::Column>(field, array);
+}
+
+std::shared_ptr<::arrow::Table> MakeSimpleTable(
+    const std::shared_ptr<::arrow::Array>& values, bool nullable) {
+  std::shared_ptr<::arrow::Column> column = MakeColumn("col", values, 
nullable);
+  std::vector<std::shared_ptr<::arrow::Column>> columns({column});
+  std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
+  auto schema = std::make_shared<::arrow::Schema>(fields);
+  return std::make_shared<::arrow::Table>("table", schema, columns);
+}
+
+template <typename T>
+void ExpectArray(T* expected, ::arrow::Array* result) {
+  auto p_array = static_cast<::arrow::PrimitiveArray*>(result);
+  for (int i = 0; i < result->length(); i++) {
+    EXPECT_EQ(expected[i], reinterpret_cast<const 
T*>(p_array->data()->data())[i]);
+  }
+}
+
+template <typename ArrowType>
+void ExpectArray(typename ArrowType::c_type* expected, ::arrow::Array* result) 
{
+  ::arrow::PrimitiveArray* p_array = 
static_cast<::arrow::PrimitiveArray*>(result);
+  for (int64_t i = 0; i < result->length(); i++) {
+    EXPECT_EQ(expected[i],
+        reinterpret_cast<const typename 
ArrowType::c_type*>(p_array->data()->data())[i]);
+  }
+}
+
+template <>
+void ExpectArray<::arrow::BooleanType>(uint8_t* expected, ::arrow::Array* 
result) {
+  ::arrow::BooleanBuilder builder(
+      ::arrow::default_memory_pool(), 
std::make_shared<::arrow::BooleanType>());
+  builder.Append(expected, result->length());
+  std::shared_ptr<::arrow::Array> expected_array = builder.Finish();
+  EXPECT_TRUE(result->Equals(expected_array));
+}
+
+}  // namespace arrow
+
+}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/src/parquet/arrow/utils.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/utils.h b/src/parquet/arrow/utils.h
new file mode 100644
index 0000000..b443c99
--- /dev/null
+++ b/src/parquet/arrow/utils.h
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PARQUET_ARROW_UTILS_H
+#define PARQUET_ARROW_UTILS_H
+
+#include <sstream>
+
+#include "arrow/util/status.h"
+#include "parquet/exception.h"
+
+namespace parquet {
+namespace arrow {
+
+#define PARQUET_CATCH_NOT_OK(s)                    \
+  try {                                            \
+    (s);                                           \
+  } catch (const ::parquet::ParquetException& e) { \
+    return ::arrow::Status::Invalid(e.what());     \
+  }
+
+#define PARQUET_IGNORE_NOT_OK(s) \
+  try {                          \
+    (s);                         \
+  } catch (const ::parquet::ParquetException& e) {}
+
+#define PARQUET_THROW_NOT_OK(s)                    \
+  do {                                             \
+    ::arrow::Status _s = (s);                      \
+    if (!_s.ok()) {                                \
+      std::stringstream ss;                        \
+      ss << "Arrow error: " << _s.ToString();      \
+      throw ::parquet::ParquetException(ss.str()); \
+    }                                              \
+  } while (0);
+
+}  // namespace arrow
+}  // namespace parquet
+
+#endif  // PARQUET_ARROW_UTILS_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
new file mode 100644
index 0000000..5b5f41f
--- /dev/null
+++ b/src/parquet/arrow/writer.cc
@@ -0,0 +1,373 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/arrow/writer.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "parquet/arrow/schema.h"
+#include "parquet/arrow/utils.h"
+
+#include "arrow/array.h"
+#include "arrow/column.h"
+#include "arrow/table.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/primitive.h"
+#include "arrow/types/string.h"
+#include "arrow/util/status.h"
+
+using arrow::MemoryPool;
+using arrow::PoolBuffer;
+using arrow::PrimitiveArray;
+using arrow::Status;
+using arrow::StringArray;
+using arrow::Table;
+
+using parquet::ParquetFileWriter;
+using parquet::ParquetVersion;
+using parquet::schema::GroupNode;
+
+namespace parquet {
+namespace arrow {
+
+class FileWriter::Impl {
+ public:
+  Impl(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer);
+
+  Status NewRowGroup(int64_t chunk_size);
+  template <typename ParquetType, typename ArrowType>
+  Status TypedWriteBatch(ColumnWriter* writer, const PrimitiveArray* data,
+      int64_t offset, int64_t length);
+
+  // TODO(uwe): Same code as in reader.cc the only difference is the name of 
the temporary
+  // buffer
+  template <typename InType, typename OutType>
+  struct can_copy_ptr {
+    static constexpr bool value =
+        std::is_same<InType, OutType>::value ||
+        (std::is_integral<InType>{} && std::is_integral<OutType>{} &&
+            (sizeof(InType) == sizeof(OutType)));
+  };
+
+  template <typename InType, typename OutType,
+      typename std::enable_if<can_copy_ptr<InType, OutType>::value>::type* = 
nullptr>
+  Status ConvertPhysicalType(const InType* in_ptr, int64_t, const OutType** 
out_ptr) {
+    *out_ptr = reinterpret_cast<const OutType*>(in_ptr);
+    return Status::OK();
+  }
+
+  template <typename InType, typename OutType,
+      typename std::enable_if<not can_copy_ptr<InType, OutType>::value>::type* 
= nullptr>
+  Status ConvertPhysicalType(
+      const InType* in_ptr, int64_t length, const OutType** out_ptr) {
+    RETURN_NOT_OK(data_buffer_.Resize(length * sizeof(OutType)));
+    OutType* mutable_out_ptr = 
reinterpret_cast<OutType*>(data_buffer_.mutable_data());
+    std::copy(in_ptr, in_ptr + length, mutable_out_ptr);
+    *out_ptr = mutable_out_ptr;
+    return Status::OK();
+  }
+
+  Status WriteFlatColumnChunk(const PrimitiveArray* data, int64_t offset, 
int64_t length);
+  Status WriteFlatColumnChunk(const StringArray* data, int64_t offset, int64_t 
length);
+  Status Close();
+
+  virtual ~Impl() {}
+
+ private:
+  friend class FileWriter;
+
+  MemoryPool* pool_;
+  // Buffer used for storing the data of an array converted to the physical 
type
+  // as expected by parquet-cpp.
+  PoolBuffer data_buffer_;
+  PoolBuffer def_levels_buffer_;
+  std::unique_ptr<ParquetFileWriter> writer_;
+  RowGroupWriter* row_group_writer_;
+};
+
+FileWriter::Impl::Impl(
+    MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer)
+    : pool_(pool),
+      data_buffer_(pool),
+      writer_(std::move(writer)),
+      row_group_writer_(nullptr) {}
+
+Status FileWriter::Impl::NewRowGroup(int64_t chunk_size) {
+  if (row_group_writer_ != nullptr) { 
PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); }
+  PARQUET_CATCH_NOT_OK(row_group_writer_ = 
writer_->AppendRowGroup(chunk_size));
+  return Status::OK();
+}
+
+template <typename ParquetType, typename ArrowType>
+Status FileWriter::Impl::TypedWriteBatch(ColumnWriter* column_writer,
+    const PrimitiveArray* data, int64_t offset, int64_t length) {
+  using ArrowCType = typename ArrowType::c_type;
+  using ParquetCType = typename ParquetType::c_type;
+
+  DCHECK((offset + length) <= data->length());
+  auto data_ptr = reinterpret_cast<const ArrowCType*>(data->data()->data()) + 
offset;
+  auto writer =
+      reinterpret_cast<TypedColumnWriter<ParquetType>*>(column_writer);
+  if (writer->descr()->max_definition_level() == 0) {
+    // no nulls, just dump the data
+    const ParquetCType* data_writer_ptr = nullptr;
+    RETURN_NOT_OK((ConvertPhysicalType<ArrowCType, ParquetCType>(
+        data_ptr, length, &data_writer_ptr)));
+    PARQUET_CATCH_NOT_OK(writer->WriteBatch(length, nullptr, nullptr, 
data_writer_ptr));
+  } else if (writer->descr()->max_definition_level() == 1) {
+    RETURN_NOT_OK(def_levels_buffer_.Resize(length * sizeof(int16_t)));
+    int16_t* def_levels_ptr =
+        reinterpret_cast<int16_t*>(def_levels_buffer_.mutable_data());
+    if (data->null_count() == 0) {
+      std::fill(def_levels_ptr, def_levels_ptr + length, 1);
+      const ParquetCType* data_writer_ptr = nullptr;
+      RETURN_NOT_OK((ConvertPhysicalType<ArrowCType, ParquetCType>(
+          data_ptr, length, &data_writer_ptr)));
+      PARQUET_CATCH_NOT_OK(
+          writer->WriteBatch(length, def_levels_ptr, nullptr, 
data_writer_ptr));
+    } else {
+      RETURN_NOT_OK(data_buffer_.Resize(length * sizeof(ParquetCType)));
+      auto buffer_ptr = 
reinterpret_cast<ParquetCType*>(data_buffer_.mutable_data());
+      int buffer_idx = 0;
+      for (int i = 0; i < length; i++) {
+        if (data->IsNull(offset + i)) {
+          def_levels_ptr[i] = 0;
+        } else {
+          def_levels_ptr[i] = 1;
+          buffer_ptr[buffer_idx++] = static_cast<ParquetCType>(data_ptr[i]);
+        }
+      }
+      PARQUET_CATCH_NOT_OK(
+          writer->WriteBatch(length, def_levels_ptr, nullptr, buffer_ptr));
+    }
+  } else {
+    return Status::NotImplemented("no support for max definition level > 1 
yet");
+  }
+  PARQUET_CATCH_NOT_OK(writer->Close());
+  return Status::OK();
+}
+
+// This specialization seems quite similar but it significantly differs in two 
points:
+// * offset is added at the most latest time to the pointer as we have 
sub-byte access
+// * Arrow data is stored bitwise thus we cannot use std::copy to transform 
from
+//   ArrowType::c_type to ParquetType::c_type
+template <>
+Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
+    ColumnWriter* column_writer, const PrimitiveArray* data, int64_t offset,
+    int64_t length) {
+  DCHECK((offset + length) <= data->length());
+  RETURN_NOT_OK(data_buffer_.Resize(length));
+  auto data_ptr = reinterpret_cast<const uint8_t*>(data->data()->data());
+  auto buffer_ptr = reinterpret_cast<bool*>(data_buffer_.mutable_data());
+  auto writer = reinterpret_cast<TypedColumnWriter<BooleanType>*>(
+      column_writer);
+  if (writer->descr()->max_definition_level() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < length; i++) {
+      buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i);
+    }
+    PARQUET_CATCH_NOT_OK(writer->WriteBatch(length, nullptr, nullptr, 
buffer_ptr));
+  } else if (writer->descr()->max_definition_level() == 1) {
+    RETURN_NOT_OK(def_levels_buffer_.Resize(length * sizeof(int16_t)));
+    int16_t* def_levels_ptr =
+        reinterpret_cast<int16_t*>(def_levels_buffer_.mutable_data());
+    if (data->null_count() == 0) {
+      std::fill(def_levels_ptr, def_levels_ptr + length, 1);
+      for (int64_t i = 0; i < length; i++) {
+        buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i);
+      }
+      // TODO(PARQUET-644): write boolean values as a packed bitmap
+      PARQUET_CATCH_NOT_OK(
+          writer->WriteBatch(length, def_levels_ptr, nullptr, buffer_ptr));
+    } else {
+      int buffer_idx = 0;
+      for (int i = 0; i < length; i++) {
+        if (data->IsNull(offset + i)) {
+          def_levels_ptr[i] = 0;
+        } else {
+          def_levels_ptr[i] = 1;
+          buffer_ptr[buffer_idx++] = ::arrow::util::get_bit(data_ptr, offset + 
i);
+        }
+      }
+      PARQUET_CATCH_NOT_OK(
+          writer->WriteBatch(length, def_levels_ptr, nullptr, buffer_ptr));
+    }
+  } else {
+    return Status::NotImplemented("no support for max definition level > 1 
yet");
+  }
+  PARQUET_CATCH_NOT_OK(writer->Close());
+  return Status::OK();
+}
+
+Status FileWriter::Impl::Close() {
+  if (row_group_writer_ != nullptr) { 
PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); }
+  PARQUET_CATCH_NOT_OK(writer_->Close());
+  return Status::OK();
+}
+
+#define TYPED_BATCH_CASE(ENUM, ArrowType, ParquetType)                         
   \
+  case ::arrow::Type::ENUM:                                                    
   \
+    return TypedWriteBatch<ParquetType, ArrowType>(writer, data, offset, 
length); \
+    break;
+
+Status FileWriter::Impl::WriteFlatColumnChunk(
+    const PrimitiveArray* data, int64_t offset, int64_t length) {
+  ColumnWriter* writer;
+  PARQUET_CATCH_NOT_OK(writer = row_group_writer_->NextColumn());
+  switch (data->type_enum()) {
+    TYPED_BATCH_CASE(BOOL, ::arrow::BooleanType, BooleanType)
+    TYPED_BATCH_CASE(UINT8, ::arrow::UInt8Type, Int32Type)
+    TYPED_BATCH_CASE(INT8, ::arrow::Int8Type, Int32Type)
+    TYPED_BATCH_CASE(UINT16, ::arrow::UInt16Type, Int32Type)
+    TYPED_BATCH_CASE(INT16, ::arrow::Int16Type, Int32Type)
+    case ::arrow::Type::UINT32:
+      if (writer_->properties()->version() == ParquetVersion::PARQUET_1_0) {
+        // Parquet 1.0 reader cannot read the UINT_32 logical type. Thus we 
need
+        // to use the larger Int64Type to store them lossless.
+        return TypedWriteBatch<Int64Type, ::arrow::UInt32Type>(
+            writer, data, offset, length);
+      } else {
+        return TypedWriteBatch<Int32Type, ::arrow::UInt32Type>(
+            writer, data, offset, length);
+      }
+      TYPED_BATCH_CASE(INT32, ::arrow::Int32Type, Int32Type)
+      TYPED_BATCH_CASE(UINT64, ::arrow::UInt64Type, Int64Type)
+      TYPED_BATCH_CASE(INT64, ::arrow::Int64Type, Int64Type)
+      TYPED_BATCH_CASE(TIMESTAMP, ::arrow::TimestampType, Int64Type)
+      TYPED_BATCH_CASE(FLOAT, ::arrow::FloatType, FloatType)
+      TYPED_BATCH_CASE(DOUBLE, ::arrow::DoubleType, DoubleType)
+    default:
+      return Status::NotImplemented(data->type()->ToString());
+  }
+}
+
+Status FileWriter::Impl::WriteFlatColumnChunk(
+    const StringArray* data, int64_t offset, int64_t length) {
+  ColumnWriter* column_writer;
+  PARQUET_CATCH_NOT_OK(column_writer = row_group_writer_->NextColumn());
+  DCHECK((offset + length) <= data->length());
+  RETURN_NOT_OK(data_buffer_.Resize(length * sizeof(ByteArray)));
+  auto buffer_ptr = reinterpret_cast<ByteArray*>(data_buffer_.mutable_data());
+  auto values = std::dynamic_pointer_cast<PrimitiveArray>(data->values());
+  auto data_ptr = reinterpret_cast<const uint8_t*>(values->data()->data());
+  DCHECK(values != nullptr);
+  auto writer = reinterpret_cast<TypedColumnWriter<ByteArrayType>*>(
+      column_writer);
+  if (writer->descr()->max_definition_level() > 0) {
+    RETURN_NOT_OK(def_levels_buffer_.Resize(length * sizeof(int16_t)));
+  }
+  int16_t* def_levels_ptr = 
reinterpret_cast<int16_t*>(def_levels_buffer_.mutable_data());
+  if (writer->descr()->max_definition_level() == 0 || data->null_count() == 0) 
{
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < length; i++) {
+      buffer_ptr[i] = ByteArray(
+          data->value_length(i + offset), data_ptr + data->value_offset(i));
+    }
+    if (writer->descr()->max_definition_level() > 0) {
+      std::fill(def_levels_ptr, def_levels_ptr + length, 1);
+    }
+    PARQUET_CATCH_NOT_OK(writer->WriteBatch(length, def_levels_ptr, nullptr, 
buffer_ptr));
+  } else if (writer->descr()->max_definition_level() == 1) {
+    int buffer_idx = 0;
+    for (int64_t i = 0; i < length; i++) {
+      if (data->IsNull(offset + i)) {
+        def_levels_ptr[i] = 0;
+      } else {
+        def_levels_ptr[i] = 1;
+        buffer_ptr[buffer_idx++] = ByteArray(
+            data->value_length(i + offset), data_ptr + data->value_offset(i + 
offset));
+      }
+    }
+    PARQUET_CATCH_NOT_OK(writer->WriteBatch(length, def_levels_ptr, nullptr, 
buffer_ptr));
+  } else {
+    return Status::NotImplemented("no support for max definition level > 1 
yet");
+  }
+  PARQUET_CATCH_NOT_OK(writer->Close());
+  return Status::OK();
+}
+
+FileWriter::FileWriter(
+    MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer)
+    : impl_(new FileWriter::Impl(pool, std::move(writer))) {}
+
+Status FileWriter::NewRowGroup(int64_t chunk_size) {
+  return impl_->NewRowGroup(chunk_size);
+}
+
+Status FileWriter::WriteFlatColumnChunk(
+    const ::arrow::Array* array, int64_t offset, int64_t length) {
+  int64_t real_length = length;
+  if (length == -1) { real_length = array->length(); }
+  if (array->type_enum() == ::arrow::Type::STRING) {
+    auto string_array = dynamic_cast<const ::arrow::StringArray*>(array);
+    DCHECK(string_array);
+    return impl_->WriteFlatColumnChunk(string_array, offset, real_length);
+  } else {
+    auto primitive_array = dynamic_cast<const PrimitiveArray*>(array);
+    if (!primitive_array) {
+      return Status::NotImplemented("Table must consist of PrimitiveArray 
instances");
+    }
+    return impl_->WriteFlatColumnChunk(primitive_array, offset, real_length);
+  }
+}
+
+Status FileWriter::Close() {
+  return impl_->Close();
+}
+
+MemoryPool* FileWriter::memory_pool() const {
+  return impl_->pool_;
+}
+
+FileWriter::~FileWriter() {}
+
+Status WriteFlatTable(const Table* table, MemoryPool* pool,
+    const std::shared_ptr<OutputStream>& sink, int64_t chunk_size,
+    const std::shared_ptr<WriterProperties>& properties) {
+  std::shared_ptr<SchemaDescriptor> parquet_schema;
+  RETURN_NOT_OK(
+      ToParquetSchema(table->schema().get(), *properties.get(), 
&parquet_schema));
+  auto schema_node = 
std::static_pointer_cast<GroupNode>(parquet_schema->schema_root());
+  std::unique_ptr<ParquetFileWriter> parquet_writer =
+      ParquetFileWriter::Open(sink, schema_node, properties);
+  FileWriter writer(pool, std::move(parquet_writer));
+
+  // TODO(ARROW-232) Support writing chunked arrays.
+  for (int i = 0; i < table->num_columns(); i++) {
+    if (table->column(i)->data()->num_chunks() != 1) {
+      return Status::NotImplemented("No support for writing chunked arrays 
yet.");
+    }
+  }
+
+  for (int chunk = 0; chunk * chunk_size < table->num_rows(); chunk++) {
+    int64_t offset = chunk * chunk_size;
+    int64_t size = std::min(chunk_size, table->num_rows() - offset);
+    RETURN_NOT_OK_ELSE(writer.NewRowGroup(size), 
PARQUET_IGNORE_NOT_OK(writer.Close()));
+    for (int i = 0; i < table->num_columns(); i++) {
+      std::shared_ptr<::arrow::Array> array = 
table->column(i)->data()->chunk(0);
+      RETURN_NOT_OK_ELSE(writer.WriteFlatColumnChunk(array.get(), offset, 
size),
+          PARQUET_IGNORE_NOT_OK(writer.Close()));
+    }
+  }
+
+  return writer.Close();
+}
+
+}  // namespace arrow
+
+}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/src/parquet/arrow/writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.h b/src/parquet/arrow/writer.h
new file mode 100644
index 0000000..92524d8
--- /dev/null
+++ b/src/parquet/arrow/writer.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PARQUET_ARROW_WRITER_H
+#define PARQUET_ARROW_WRITER_H
+
+#include <memory>
+
+#include "parquet/api/schema.h"
+#include "parquet/api/writer.h"
+
+namespace arrow {
+
+class Array;
+class MemoryPool;
+class PrimitiveArray;
+class RowBatch;
+class Status;
+class StringArray;
+class Table;
+}
+
+namespace parquet {
+
+namespace arrow {
+
+/**
+ * Iterative API:
+ *  Start a new RowGroup/Chunk with NewRowGroup
+ *  Write column-by-column the whole column chunk
+ */
+class PARQUET_EXPORT FileWriter {
+ public:
+  FileWriter(::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileWriter> 
writer);
+
+  ::arrow::Status NewRowGroup(int64_t chunk_size);
+  ::arrow::Status WriteFlatColumnChunk(
+      const ::arrow::Array* data, int64_t offset = 0, int64_t length = -1);
+  ::arrow::Status Close();
+
+  virtual ~FileWriter();
+
+  ::arrow::MemoryPool* memory_pool() const;
+
+ private:
+  class PARQUET_NO_EXPORT Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+/**
+ * Write a flat Table to Parquet.
+ *
+ * The table shall only consist of nullable, non-repeated columns of primitive 
type.
+ */
+::arrow::Status PARQUET_EXPORT WriteFlatTable(const ::arrow::Table* table,
+    ::arrow::MemoryPool* pool, const std::shared_ptr<OutputStream>& sink,
+    int64_t chunk_size,
+    const std::shared_ptr<WriterProperties>& properties = 
default_writer_properties());
+
+}  // namespace arrow
+
+}  // namespace parquet
+
+#endif  // PARQUET_ARROW_WRITER_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh
index dca586c..8229484 100755
--- a/thirdparty/build_thirdparty.sh
+++ b/thirdparty/build_thirdparty.sh
@@ -15,6 +15,7 @@ else
   # Allow passing specific libs to build on the command line
   for arg in "$*"; do
     case $arg in
+      "arrow")      F_ARROW=1 ;;
       "zlib")       F_ZLIB=1 ;;
       "gbenchmark") F_GBENCHMARK=1 ;;
       "gtest")      F_GTEST=1 ;;
@@ -57,6 +58,15 @@ fi
 
 STANDARD_DARWIN_FLAGS="-std=c++11 -stdlib=libc++"
 
+# build arrow
+if [ -n "$F_ALL" -o -n "$F_ARROW" ]; then
+    cd $TP_DIR/$ARROW_BASEDIR/cpp
+    source ./setup_build_env.sh
+    cmake . -DARROW_PARQUET=OFF -DARROW_HDFS=ON -DCMAKE_INSTALL_PREFIX=$PREFIX
+    make -j$PARALLEL install
+    # :
+fi
+
 # build googletest
 GOOGLETEST_ERROR="failed for googletest!"
 if [ -n "$F_ALL" -o -n "$F_GTEST" ]; then

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/thirdparty/download_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/download_thirdparty.sh 
b/thirdparty/download_thirdparty.sh
index fae6c9c..23bdb96 100755
--- a/thirdparty/download_thirdparty.sh
+++ b/thirdparty/download_thirdparty.sh
@@ -20,6 +20,11 @@ download_extract_and_cleanup() {
     rm $filename
 }
 
+if [ ! -d ${ARROW_BASEDIR} ]; then
+  echo "Fetching arrow"
+  download_extract_and_cleanup $ARROW_URL
+fi
+
 if [ ! -d ${SNAPPY_BASEDIR} ]; then
   echo "Fetching snappy"
   download_extract_and_cleanup $SNAPPY_URL

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/thirdparty/set_thirdparty_env.sh
----------------------------------------------------------------------
diff --git a/thirdparty/set_thirdparty_env.sh b/thirdparty/set_thirdparty_env.sh
index 80715ef..3733d08 100644
--- a/thirdparty/set_thirdparty_env.sh
+++ b/thirdparty/set_thirdparty_env.sh
@@ -7,6 +7,7 @@ if [ -z "$THIRDPARTY_DIR" ]; then
        THIRDPARTY_DIR=$SOURCE_DIR
 fi
 
+export ARROW_HOME=$THIRDPARTY_DIR/installed
 export SNAPPY_HOME=$THIRDPARTY_DIR/installed
 export ZLIB_HOME=$THIRDPARTY_DIR/installed
 # build script doesn't support building thrift on OSX

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4a7bf117/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/versions.sh b/thirdparty/versions.sh
index b56262a..05f5cc2 100755
--- a/thirdparty/versions.sh
+++ b/thirdparty/versions.sh
@@ -1,3 +1,7 @@
+ARROW_VERSION="6b8abb4402ff1f39fc5944a7df6e3b4755691d87"
+ARROW_URL="https://github.com/apache/arrow/archive/${ARROW_VERSION}.tar.gz";
+ARROW_BASEDIR="arrow-${ARROW_VERSION}"
+
 SNAPPY_VERSION=1.1.3
 
SNAPPY_URL="https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz";
 SNAPPY_BASEDIR=snappy-$SNAPPY_VERSION

Reply via email to