arrow git commit: ARROW-339: Python 3 compatibility in merge_arrow_pr.py

2016-10-29 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 6178bf7b0 -> da24c1a0a


ARROW-339: Python 3 compatibility in merge_arrow_pr.py

Author: Wes McKinney 

Closes #188 from wesm/ARROW-339 and squashes the following commits:

1f3617f [Wes McKinney] Remove cherry-picking cruft
6b99632 [Wes McKinney] Python 3 compatibility in merge_arrow_pr.py


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/da24c1a0
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/da24c1a0
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/da24c1a0

Branch: refs/heads/master
Commit: da24c1a0a2aba7ccd42cc3cbcf240eeb22d7ffb6
Parents: 6178bf7
Author: Wes McKinney 
Authored: Sat Oct 29 10:02:15 2016 +0200
Committer: Uwe L. Korn 
Committed: Sat Oct 29 10:02:15 2016 +0200

--
 dev/merge_arrow_pr.py | 193 +
 1 file changed, 88 insertions(+), 105 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/da24c1a0/dev/merge_arrow_pr.py
--
diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py
index 8f47f93..aa899ed 100755
--- a/dev/merge_arrow_pr.py
+++ b/dev/merge_arrow_pr.py
@@ -17,22 +17,24 @@
 # limitations under the License.
 #
 
-# Utility for creating well-formed pull request merges and pushing them to 
Apache.
+# Utility for creating well-formed pull request merges and pushing them to
+# Apache.
 #   usage: ./apache-pr-merge.py(see config env vars below)
 #
 # This utility assumes you already have a local Arrow git clone and that you
 # have added remotes corresponding to both (i) the Github Apache Arrow mirror
 # and (ii) the apache git repo.
 
-import json
 import os
 import re
 import subprocess
 import sys
-import tempfile
-import urllib2
+import requests
 import getpass
 
+from six.moves import input
+import six
+
 try:
 import jira.client
 JIRA_IMPORTED = True
@@ -42,8 +44,8 @@ except ImportError:
 # Location of your Arrow git clone
 ARROW_HOME = os.path.abspath(__file__).rsplit("/", 2)[0]
 PROJECT_NAME = ARROW_HOME.rsplit("/", 1)[1]
-print "ARROW_HOME = " + ARROW_HOME
-print "PROJECT_NAME = " + PROJECT_NAME
+print("ARROW_HOME = " + ARROW_HOME)
+print("PROJECT_NAME = " + PROJECT_NAME)
 
 # Remote name which points to the Gihub site
 PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache-github")
@@ -65,46 +67,38 @@ os.chdir(ARROW_HOME)
 
 
 def get_json(url):
-try:
-from urllib2 import urlopen, Request
-env_var = 'ARROW_GITHUB_API_TOKEN'
-
-if env_var in os.environ:
-token = os.environ[env_var]
-request = Request(url)
-request.add_header('Authorization', 'token %s' % token)
-response = urlopen(request)
-else:
-response = urlopen(url)
-return json.load(response)
-except urllib2.HTTPError as e:
-print "Unable to fetch URL, exiting: %s" % url
-sys.exit(-1)
+req = requests.get(url)
+return req.json()
 
 
 def fail(msg):
-print msg
+print(msg)
 clean_up()
 sys.exit(-1)
 
 
 def run_cmd(cmd):
+if isinstance(cmd, six.string_types):
+cmd = cmd.split(' ')
+
 try:
-if isinstance(cmd, list):
-return subprocess.check_output(cmd)
-else:
-return subprocess.check_output(cmd.split(" "))
+output = subprocess.check_output(cmd)
 except subprocess.CalledProcessError as e:
 # this avoids hiding the stdout / stderr of failed processes
-print 'Command failed: %s' % cmd
-print 'With output:'
-print '--'
-print e.output
-print '--'
+print('Command failed: %s' % cmd)
+print('With output:')
+print('--')
+print(e.output)
+print('--')
 raise e
 
+if isinstance(output, six.binary_type):
+output = output.decode('utf-8')
+return output
+
+
 def continue_maybe(prompt):
-result = raw_input("\n%s (y/n): " % prompt)
+result = input("\n%s (y/n): " % prompt)
 if result.lower() != "y":
 fail("Okay, exiting")
 
@@ -113,38 +107,44 @@ original_head = run_cmd("git rev-parse HEAD")[:8]
 
 
 def clean_up():
-print "Restoring head pointer to %s" % original_head
+print("Restoring head pointer to %s" % original_head)
 run_cmd("git checkout %s" % original_head)
 
 branches = run_cmd("git branch").repl

arrow git commit: ARROW-349: Add six as a requirement

2016-10-30 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master d946e7917 -> 772bc6ea6


ARROW-349: Add six as a requirement

fixes https://issues.apache.org/jira/browse/ARROW-349

Author: Peter Hoffmann 

Closes #184 from hoffmann/patch-1 and squashes the following commits:

1bffc69 [Peter Hoffmann] Add six as a requirement


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/772bc6ea
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/772bc6ea
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/772bc6ea

Branch: refs/heads/master
Commit: 772bc6ea6e5d452ccff1df8d5e83299e434c0d04
Parents: d946e79
Author: Peter Hoffmann 
Authored: Sun Oct 30 11:11:28 2016 +0100
Committer: Uwe L. Korn 
Committed: Sun Oct 30 11:11:28 2016 +0100

--
 python/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/772bc6ea/python/setup.py
--
diff --git a/python/setup.py b/python/setup.py
index 9904977..cdfdc24 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -271,7 +271,7 @@ setup(
 'clean': clean,
 'build_ext': build_ext
 },
-install_requires=['cython >= 0.23', 'numpy >= 1.9'],
+install_requires=['cython >= 0.23', 'numpy >= 1.9', 'six >= 1.0.0'],
 description=DESC,
 license='Apache License, Version 2.0',
 maintainer="Apache Arrow Developers",



arrow git commit: ARROW-358: Add explicit environment variable to locate libhdfs in one's environment

2016-11-02 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master c7db80e72 -> e70d97dbc


ARROW-358: Add explicit environment variable to locate libhdfs in one's 
environment

Author: Wes McKinney 

Closes #195 from wesm/ARROW-358 and squashes the following commits:

c00d251 [Wes McKinney] Add explicit environment variable to locate libhdfs in 
one's environment


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e70d97db
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e70d97db
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e70d97db

Branch: refs/heads/master
Commit: e70d97dbc8dc86161083e94c45d5828f79211f6b
Parents: c7db80e
Author: Wes McKinney 
Authored: Wed Nov 2 08:06:29 2016 +0100
Committer: Uwe L. Korn 
Committed: Wed Nov 2 08:06:29 2016 +0100

--
 cpp/src/arrow/io/libhdfs_shim.cc | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/e70d97db/cpp/src/arrow/io/libhdfs_shim.cc
--
diff --git a/cpp/src/arrow/io/libhdfs_shim.cc b/cpp/src/arrow/io/libhdfs_shim.cc
index 07eb625..1fee595 100644
--- a/cpp/src/arrow/io/libhdfs_shim.cc
+++ b/cpp/src/arrow/io/libhdfs_shim.cc
@@ -386,6 +386,11 @@ static std::vector get_potential_libhdfs_paths() 
{
 search_paths.push_back(path);
   }
 
+  const char* libhdfs_dir = std::getenv("ARROW_LIBHDFS_DIR");
+  if (libhdfs_dir != nullptr) {
+search_paths.push_back(fs::path(libhdfs_dir));
+  }
+
   // All paths with file name
   for (auto& path : search_paths) {
 libhdfs_potential_paths.push_back(path / file_name);



arrow git commit: ARROW-368: Added note for LD_LIBRARY_PATH in Python README

2016-11-06 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 25e010607 -> e8bc1fe3b


ARROW-368:  Added note for LD_LIBRARY_PATH in Python README

Added note to use LD_LIBRARY_PATH env var to add $ARROW_HOME/lib path so 
PyArrow can locate Arrow-Cpp shared libs.

Author: Bryan Cutler 

Closes #199 from BryanCutler/pyarrow-README-note-LD_LIBRARY_PATH-ARROW-368 and 
squashes the following commits:

15861c4 [Bryan Cutler] Added note for LD_LIBRARY_PATH in Python README


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e8bc1fe3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e8bc1fe3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e8bc1fe3

Branch: refs/heads/master
Commit: e8bc1fe3ba7f94b39f38571a435f93f387e67d37
Parents: 25e0106
Author: Bryan Cutler 
Authored: Sun Nov 6 12:10:06 2016 +0100
Committer: Uwe L. Korn 
Committed: Sun Nov 6 12:10:06 2016 +0100

--
 python/README.md | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/e8bc1fe3/python/README.md
--
diff --git a/python/README.md b/python/README.md
index 4fce0d2..88ab17e 100644
--- a/python/README.md
+++ b/python/README.md
@@ -33,12 +33,19 @@ These are the various projects that PyArrow depends on.
 1. **g++ and gcc Version >= 4.8**
 2. **cmake > 2.8.6**
 3. **boost**
-4. **Arrow-cpp and its dependencies***
+4. **Arrow-cpp and its dependencies**
 
 The Arrow C++ library must be built with all options enabled and installed with
 ``ARROW_HOME`` environment variable set to the installation location. Look at
 (https://github.com/apache/arrow/blob/master/cpp/README.md) for instructions.
 
+Ensure PyArrow can locate the Arrow-cpp shared libraries by setting the
+LD_LIBRARY_PATH environment variable.
+
+```bash
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ARROW_HOME/lib
+```
+
 5. **Python dependencies: numpy, pandas, cython, pytest**
 
  Build pyarrow and run the unit tests



arrow git commit: ARROW-396: [Python] Add pyarrow.schema.Schema.equals

2016-12-01 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 072b7d671 -> 3b946b822


ARROW-396: [Python] Add pyarrow.schema.Schema.equals

Added pyarrow api for `Schema.equals` to check if 2 schema's are equal and 
corresponding test case.

Author: Bryan Cutler 

Closes #221 from BryanCutler/add-pyarrow-schema_equals-ARROW-396 and squashes 
the following commits:

910e943 [Bryan Cutler] added test case for pyarrow Schema equals
24cf982 [Bryan Cutler] added pyarrow Schema equals, and related def for CSchema


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3b946b82
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3b946b82
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3b946b82

Branch: refs/heads/master
Commit: 3b946b822445f21872c7cb42563c8d0c7bc84b80
Parents: 072b7d6
Author: Bryan Cutler 
Authored: Thu Dec 1 13:26:43 2016 +0100
Committer: Uwe L. Korn 
Committed: Thu Dec 1 13:26:43 2016 +0100

--
 python/pyarrow/includes/libarrow.pxd |  3 +++
 python/pyarrow/schema.pyx|  9 +
 python/pyarrow/tests/test_schema.py  | 17 +
 3 files changed, 29 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/3b946b82/python/pyarrow/includes/libarrow.pxd
--
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 3ae1789..19da408 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -88,6 +88,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
 cdef cppclass CSchema" arrow::Schema":
 CSchema(const vector[shared_ptr[CField]]& fields)
+
+c_bool Equals(const shared_ptr[CSchema]& other)
+
 const shared_ptr[CField]& field(int i)
 int num_fields()
 c_string ToString()

http://git-wip-us.apache.org/repos/asf/arrow/blob/3b946b82/python/pyarrow/schema.pyx
--
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index 084c304..e0badb9 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -110,6 +110,15 @@ cdef class Schema:
 self.schema = schema.get()
 self.sp_schema = schema
 
+def equals(self, other):
+"""
+Test if this schema is equal to the other
+"""
+cdef Schema _other
+_other = other
+
+return self.sp_schema.get().Equals(_other.sp_schema)
+
 @classmethod
 def from_fields(cls, fields):
 cdef:

http://git-wip-us.apache.org/repos/asf/arrow/blob/3b946b82/python/pyarrow/tests/test_schema.py
--
diff --git a/python/pyarrow/tests/test_schema.py 
b/python/pyarrow/tests/test_schema.py
index 2894ea8..4aa8112 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -69,3 +69,20 @@ class TestTypes(unittest.TestCase):
 foo: int32
 bar: string
 baz: list"""
+
+def test_schema_equals(self):
+fields = [
+A.field('foo', A.int32()),
+A.field('bar', A.string()),
+A.field('baz', A.list_(A.int8()))
+]
+
+sch1 = A.schema(fields)
+print(dir(sch1))
+sch2 = A.schema(fields)
+assert sch1.equals(sch2)
+
+del fields[-1]
+sch3 = A.schema(fields)
+assert not sch1.equals(sch3)
+



arrow git commit: ARROW-409: [Python] Change record batches conversion to Table

2016-12-07 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 4b72329fe -> 72f80d450


ARROW-409: [Python] Change record batches conversion to Table

>From discussion in ARROW-369, it is more consistent and flexible for the 
>pyarrow.Table API to convert a RecordBatch list first a Table, then Table to 
>pandas.DataFrame.  For example:
```
table = pa.Table.from_batches(batches)
df = table.to_pandas()
```
Also updated conversion to print schemas in exception message if not equal.

Author: Bryan Cutler 

Closes #229 from BryanCutler/pyarrow-table-from_batches-ARROW-409 and squashes 
the following commits:

f5751e0 [Bryan Cutler] fixed schema check to print out if not equal
72ea875 [Bryan Cutler] changed batches conversion to Table instead


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/72f80d45
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/72f80d45
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/72f80d45

Branch: refs/heads/master
Commit: 72f80d450e0e8e20812fd80571b0c1d18e88114a
Parents: 4b72329
Author: Bryan Cutler 
Authored: Wed Dec 7 15:00:18 2016 +0100
Committer: Uwe L. Korn 
Committed: Wed Dec 7 15:00:18 2016 +0100

--
 python/pyarrow/__init__.py |  3 +-
 python/pyarrow/table.pyx   | 94 +
 python/pyarrow/tests/test_table.py |  5 +-
 3 files changed, 52 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/72f80d45/python/pyarrow/__init__.py
--
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index f366317..5af93fb 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -49,5 +49,4 @@ from pyarrow.schema import (null, bool_,
 list_, struct, field,
 DataType, Field, Schema, schema)
 
-from pyarrow.table import (Column, RecordBatch, dataframe_from_batches, Table,
-   from_pandas_dataframe)
+from pyarrow.table import Column, RecordBatch, Table, from_pandas_dataframe

http://git-wip-us.apache.org/repos/asf/arrow/blob/72f80d45/python/pyarrow/table.pyx
--
diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx
index 45cf7be..0a9805c 100644
--- a/python/pyarrow/table.pyx
+++ b/python/pyarrow/table.pyx
@@ -415,52 +415,6 @@ cdef class RecordBatch:
 return result
 
 
-def dataframe_from_batches(batches):
-"""
-Convert a list of Arrow RecordBatches to a pandas.DataFrame
-
-Parameters
---
-
-batches: list of RecordBatch
-RecordBatch list to be converted, schemas must be equal
-"""
-
-cdef:
-vector[shared_ptr[CArray]] c_array_chunks
-vector[shared_ptr[CColumn]] c_columns
-shared_ptr[CTable] c_table
-Array arr
-Schema schema
-
-import pandas as pd
-
-schema = batches[0].schema
-
-# check schemas are equal
-if any((not schema.equals(other.schema) for other in batches[1:])):
-raise ArrowException("Error converting list of RecordBatches to "
-"DataFrame, not all schemas are equal")
-
-cdef int K = batches[0].num_columns
-
-# create chunked columns from the batches
-c_columns.resize(K)
-for i in range(K):
-for batch in batches:
-arr = batch[i]
-c_array_chunks.push_back(arr.sp_array)
-c_columns[i].reset(new CColumn(schema.sp_schema.get().field(i),
-   c_array_chunks))
-c_array_chunks.clear()
-
-# create a Table from columns and convert to DataFrame
-c_table.reset(new CTable('', schema.sp_schema, c_columns))
-table = Table()
-table.init(c_table)
-return table.to_pandas()
-
-
 cdef class Table:
 """
 A collection of top-level named, equal length Arrow arrays.
@@ -567,6 +521,54 @@ cdef class Table:
 
 return result
 
+@staticmethod
+def from_batches(batches):
+"""
+Construct a Table from a list of Arrow RecordBatches
+
+Parameters
+--
+
+batches: list of RecordBatch
+RecordBatch list to be converted, schemas must be equal
+"""
+
+cdef:
+vector[shared_ptr[CArray]] c_array_chunks
+vector[shared_ptr[CColumn]] c_columns
+shared_ptr[CTable] c_table
+Array arr
+Schema schema
+
+import pandas as pd
+
+schema = batches[0].schema
+
+# check schemas are equal
+for other in batches[1:]:
+if not schema.equals(other.schema):
+ 

arrow git commit: ARROW-408: Remove defunct conda recipes

2016-12-08 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 72f80d450 -> c8eb49e41


ARROW-408: Remove defunct conda recipes

These are better maintained on conda-forge since there's also an associated 
cross-platform build environment

Author: Wes McKinney 

Closes #231 from wesm/ARROW-408 and squashes the following commits:

8c58b75 [Wes McKinney] Remove defunct conda recipes


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c8eb49e4
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c8eb49e4
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c8eb49e4

Branch: refs/heads/master
Commit: c8eb49e4136365f8056e09c36746b6dbb02d2814
Parents: 72f80d4
Author: Wes McKinney 
Authored: Thu Dec 8 20:58:48 2016 +0100
Committer: Uwe L. Korn 
Committed: Thu Dec 8 20:58:48 2016 +0100

--
 cpp/conda.recipe/build.sh | 62 --
 cpp/conda.recipe/meta.yaml| 43 --
 python/conda.recipe/build.sh  | 45 ---
 python/conda.recipe/meta.yaml | 54 -
 4 files changed, 204 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/c8eb49e4/cpp/conda.recipe/build.sh
--
diff --git a/cpp/conda.recipe/build.sh b/cpp/conda.recipe/build.sh
deleted file mode 100644
index 0536fd9..000
--- a/cpp/conda.recipe/build.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License. See accompanying LICENSE file.
-
-set -e
-set -x
-
-cd $RECIPE_DIR
-
-# Build dependencies
-export FLATBUFFERS_HOME=$PREFIX
-export PARQUET_HOME=$PREFIX
-
-if [ "$(uname)" == "Darwin" ]; then
-  # C++11 finagling for Mac OSX
-  export CC=clang
-  export CXX=clang++
-  export MACOSX_VERSION_MIN="10.7"
-  CXXFLAGS="${CXXFLAGS} -mmacosx-version-min=${MACOSX_VERSION_MIN}"
-  CXXFLAGS="${CXXFLAGS} -stdlib=libc++ -std=c++11"
-  export LDFLAGS="${LDFLAGS} -mmacosx-version-min=${MACOSX_VERSION_MIN}"
-  export LDFLAGS="${LDFLAGS} -stdlib=libc++ -std=c++11"
-  export LINKFLAGS="${LDFLAGS}"
-  export MACOSX_DEPLOYMENT_TARGET=10.7
-fi
-
-cd ..
-
-rm -rf conda-build
-mkdir conda-build
-cd conda-build
-pwd
-
-# if [ `uname` == Linux ]; then
-# SHARED_LINKER_FLAGS='-static-libstdc++'
-# elif [ `uname` == Darwin ]; then
-# SHARED_LINKER_FLAGS=''
-# fi
-
-# -DCMAKE_SHARED_LINKER_FLAGS=$SHARED_LINKER_FLAGS \
-
-cmake \
--DCMAKE_BUILD_TYPE=release \
--DCMAKE_INSTALL_PREFIX=$PREFIX \
--DARROW_HDFS=on \
--DARROW_IPC=on \
--DARROW_PARQUET=on \
-..
-
-make
-ctest -L unittest
-make install

http://git-wip-us.apache.org/repos/asf/arrow/blob/c8eb49e4/cpp/conda.recipe/meta.yaml
--
diff --git a/cpp/conda.recipe/meta.yaml b/cpp/conda.recipe/meta.yaml
deleted file mode 100644
index 31f150c..000
--- a/cpp/conda.recipe/meta.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License. See accompanying LICENSE file.
-
-package:
-  name: arrow-cpp
-  version: "0.1"
-
-build:
-  number: {{environ.get('TRAVIS_BUILD_NUMBER', 0)}}# [unix]
-  skip: true  # [win]
-  script_env:
-- CC [linux]
-- CXX [linux]
-- LD_LIBRARY_PATH [linux]
-
-requirements:
-  build:
-- cmake
-- flatbuffers
-- parquet-cpp
-
-  run:
-- parquet-cpp
-
-test:
-  commands:
-- test -f $PREFIX/lib/libarrow.so# [linux]
-- test -f $PREFIX/lib/libarrow_parquet.so# [linux]
-- test -f $PREFIX/include/arrow/api.h
-
-about:
-  home: http://github.com/a

arrow git commit: ARROW-404: [Python] Fix segfault caused by HdfsClient getting closed before an HdfsFile

2016-12-08 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master c8eb49e41 -> e139b8b7c


ARROW-404: [Python] Fix segfault caused by HdfsClient getting closed before an 
HdfsFile

The one downside of this patch is that HdfsFile handles don't get 
garbage-collected until the cyclic GC runs -- I tried to fix this but couldn't 
get it working. So bytes don't always get flushed to HDFS until `close()` is 
called. The flush issue should be addressed on the C++ side

Author: Wes McKinney 

Closes #230 from wesm/ARROW-404 and squashes the following commits:

3a8e641 [Wes McKinney] Use weakref in _HdfsFileNanny to avoid cyclic gc
274d0c5 [Wes McKinney] amend comment
1539a2c [Wes McKinney] Ensure that HdfsClient does not get closed before an 
open file does when the last user-accessible client reference goes out of scope


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e139b8b7
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e139b8b7
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e139b8b7

Branch: refs/heads/master
Commit: e139b8b7c11b7f36fa57a625a39d9c8779d033f4
Parents: c8eb49e
Author: Wes McKinney 
Authored: Fri Dec 9 06:49:49 2016 +0100
Committer: Uwe L. Korn 
Committed: Fri Dec 9 06:49:49 2016 +0100

--
 python/pyarrow/io.pyx | 86 ++
 python/pyarrow/tests/test_hdfs.py | 23 +
 2 files changed, 79 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/e139b8b7/python/pyarrow/io.pyx
--
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 0e6b81e..2fa5fb6 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -504,7 +504,7 @@ cdef class HdfsClient:
 
 out.mode = mode
 out.buffer_size = c_buffer_size
-out.parent = self
+out.parent = _HdfsFileNanny(self, out)
 out.is_open = True
 out.own_file = True
 
@@ -516,48 +516,69 @@ cdef class HdfsClient:
 """
 write_queue = Queue(50)
 
-f = self.open(path, 'wb')
+with self.open(path, 'wb') as f:
+done = False
+exc_info = None
+def bg_write():
+try:
+while not done or write_queue.qsize() > 0:
+try:
+buf = write_queue.get(timeout=0.01)
+except QueueEmpty:
+continue
 
-done = False
-exc_info = None
-def bg_write():
-try:
-while not done or write_queue.qsize() > 0:
-try:
-buf = write_queue.get(timeout=0.01)
-except QueueEmpty:
-continue
+f.write(buf)
 
-f.write(buf)
+except Exception as e:
+exc_info = sys.exc_info()
 
-except Exception as e:
-exc_info = sys.exc_info()
-
-writer_thread = threading.Thread(target=bg_write)
-writer_thread.start()
+writer_thread = threading.Thread(target=bg_write)
+writer_thread.start()
 
-try:
-while True:
-buf = stream.read(buffer_size)
-if not buf:
-break
+try:
+while True:
+buf = stream.read(buffer_size)
+if not buf:
+break
 
-write_queue.put_nowait(buf)
-finally:
-done = True
+write_queue.put_nowait(buf)
+finally:
+done = True
 
-writer_thread.join()
-if exc_info is not None:
-raise exc_info[0], exc_info[1], exc_info[2]
+writer_thread.join()
+if exc_info is not None:
+raise exc_info[0], exc_info[1], exc_info[2]
 
 def download(self, path, stream, buffer_size=None):
-f = self.open(path, 'rb', buffer_size=buffer_size)
-f.download(stream)
+with self.open(path, 'rb', buffer_size=buffer_size) as f:
+f.download(stream)
 
 
 # --
 # Specialization for HDFS
 
+# ARROW-404: Helper class to ensure that files are closed before the
+# client. During deallocation of the extension class, the attributes are
+# decref'd which can cause the client to get closed first if the file has the
+# last remaining reference
+cdef class _HdfsFileNanny:
+cdef:
+object client
+object file_handle_ref
+
+def __cinit

arrow git commit: ARROW-328: Return shared_ptr by value instead of const-ref

2016-12-10 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 45ed7e7a3 -> 73fe55683


ARROW-328: Return shared_ptr by value instead of const-ref

Author: Wes McKinney 

Closes #235 from wesm/ARROW-328 and squashes the following commits:

f71decc [Wes McKinney] Return shared_ptr by value instead of const-ref


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/73fe5568
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/73fe5568
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/73fe5568

Branch: refs/heads/master
Commit: 73fe55683c36465972e21bef01b377c3b66579f9
Parents: 45ed7e7
Author: Wes McKinney 
Authored: Sat Dec 10 09:05:48 2016 +0100
Committer: Uwe L. Korn 
Committed: Sat Dec 10 09:05:48 2016 +0100

--
 cpp/src/arrow/array.h|  4 ++--
 cpp/src/arrow/builder.h  |  4 ++--
 cpp/src/arrow/column.cc  |  2 +-
 cpp/src/arrow/column.h   |  8 
 cpp/src/arrow/ipc/file.cc|  2 +-
 cpp/src/arrow/ipc/file.h |  2 +-
 cpp/src/arrow/ipc/metadata-internal.cc   |  2 +-
 cpp/src/arrow/table.h|  8 
 cpp/src/arrow/type.cc|  2 +-
 cpp/src/arrow/type.h |  6 +++---
 cpp/src/arrow/types/construct.cc |  2 +-
 cpp/src/arrow/types/list.h   |  6 +++---
 cpp/src/arrow/types/primitive.h  |  2 +-
 cpp/src/arrow/types/struct.h |  4 ++--
 cpp/src/arrow/util/buffer.h  |  2 +-
 python/pyarrow/includes/libarrow.pxd | 22 +++---
 python/pyarrow/includes/libarrow_ipc.pxd |  2 +-
 17 files changed, 40 insertions(+), 40 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/73fe5568/cpp/src/arrow/array.h
--
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 78aa2b8..91fb93e 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -53,10 +53,10 @@ class ARROW_EXPORT Array {
   int32_t length() const { return length_; }
   int32_t null_count() const { return null_count_; }
 
-  const std::shared_ptr& type() const { return type_; }
+  std::shared_ptr type() const { return type_; }
   Type::type type_enum() const { return type_->type; }
 
-  const std::shared_ptr& null_bitmap() const { return null_bitmap_; }
+  std::shared_ptr null_bitmap() const { return null_bitmap_; }
 
   const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/73fe5568/cpp/src/arrow/builder.h
--
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index cef17e5..73e49c0 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -89,13 +89,13 @@ class ARROW_EXPORT ArrayBuilder {
   // this function responsibly.
   Status Advance(int32_t elements);
 
-  const std::shared_ptr& null_bitmap() const { return 
null_bitmap_; }
+  std::shared_ptr null_bitmap() const { return null_bitmap_; }
 
   // Creates new array object to hold the contents of the builder and transfers
   // ownership of the data.  This resets all variables on the builder.
   virtual Status Finish(std::shared_ptr* out) = 0;
 
-  const std::shared_ptr& type() const { return type_; }
+  std::shared_ptr type() const { return type_; }
 
  protected:
   MemoryPool* pool_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/73fe5568/cpp/src/arrow/column.cc
--
diff --git a/cpp/src/arrow/column.cc b/cpp/src/arrow/column.cc
index 52e4c58..eca5f4d 100644
--- a/cpp/src/arrow/column.cc
+++ b/cpp/src/arrow/column.cc
@@ -51,7 +51,7 @@ Column::Column(
 
 Status Column::ValidateData() {
   for (int i = 0; i < data_->num_chunks(); ++i) {
-const std::shared_ptr& type = data_->chunk(i)->type();
+std::shared_ptr type = data_->chunk(i)->type();
 if (!this->type()->Equals(type)) {
   std::stringstream ss;
   ss << "In chunk " << i << " expected type " << this->type()->ToString()

http://git-wip-us.apache.org/repos/asf/arrow/blob/73fe5568/cpp/src/arrow/column.h
--
diff --git a/cpp/src/arrow/column.h b/cpp/src/arrow/column.h
index d5168cb..1caafec 100644
--- a/cpp/src/arrow/column.h
+++ b/cpp/src/arrow/column.h
@@ -46,7 +46,7 @@ class ARROW_EXPORT ChunkedArray {
 
   int num_chunks() const { return chunks_.size(); }
 
-  const std::shared_ptr& chunk(int i) const { return chunks_[i]; }
+  std::shared_ptr chunk(int i) const { return chunks_[i]; }
 
  protected:
   ArrayVector

arrow git commit: ARROW-405: Use vendored hdfs.h if not found in include/ in $HADOOP_HOME

2016-12-12 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 2c10d7cce -> 7e93075cd


ARROW-405: Use vendored hdfs.h if not found in include/ in $HADOOP_HOME

Not all Hadoop distributions have their files arranged in the same way.

Author: Wes McKinney 

Closes #237 from wesm/ARROW-405 and squashes the following commits:

3a266d3 [Wes McKinney] Use vendored hdfs.h if not found in include/ in 
$HADOOP_HOME


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/7e93075c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/7e93075c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/7e93075c

Branch: refs/heads/master
Commit: 7e93075cd48c5f6b1b75f9adc43ba53c831046e7
Parents: 2c10d7c
Author: Wes McKinney 
Authored: Tue Dec 13 06:50:25 2016 +0100
Committer: Uwe L. Korn 
Committed: Tue Dec 13 06:50:25 2016 +0100

--
 cpp/src/arrow/io/CMakeLists.txt | 4 
 1 file changed, 4 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/7e93075c/cpp/src/arrow/io/CMakeLists.txt
--
diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt
index a1892a9..f285180 100644
--- a/cpp/src/arrow/io/CMakeLists.txt
+++ b/cpp/src/arrow/io/CMakeLists.txt
@@ -56,6 +56,10 @@ if(ARROW_HDFS)
 
   if (DEFINED ENV{HADOOP_HOME})
 set(HADOOP_HOME $ENV{HADOOP_HOME})
+if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h")
+  message(STATUS "Did not find hdfs.h in expected location, using vendored 
one")
+  set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
+endif()
   else()
 set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
   endif()



arrow git commit: ARROW-429: Revert ARROW-379 until git-archive issues are resolved

2016-12-19 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master c369709c4 -> 68e39c686


ARROW-429: Revert ARROW-379 until git-archive issues are resolved

These changes are resulting in GitHub producing archive tarballs with 
non-deterministic contents.

Author: Wes McKinney 

Closes #243 from wesm/ARROW-429 and squashes the following commits:

49f6edb [Wes McKinney] Revert "ARROW-379: Use setuptools_scm for Python 
versioning"


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/68e39c68
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/68e39c68
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/68e39c68

Branch: refs/heads/master
Commit: 68e39c6868d449f10243707ca1a7513aaa29761f
Parents: c369709
Author: Wes McKinney 
Authored: Mon Dec 19 21:11:25 2016 +0100
Committer: Uwe L. Korn 
Committed: Mon Dec 19 21:11:25 2016 +0100

--
 dev/release/00-prepare.sh  |  5 +
 python/.git_archival.txt   |  1 -
 python/.gitattributes  |  1 -
 python/pyarrow/__init__.py | 10 ++
 python/setup.cfg   | 20 
 python/setup.py| 23 +--
 6 files changed, 28 insertions(+), 32 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/68e39c68/dev/release/00-prepare.sh
--
diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh
index 00af5e7..3423a3e 100644
--- a/dev/release/00-prepare.sh
+++ b/dev/release/00-prepare.sh
@@ -43,4 +43,9 @@ mvn release:prepare -Dtag=${tag} -DreleaseVersion=${version} 
-DautoVersionSubmod
 
 cd -
 
+cd "${SOURCE_DIR}/../../python"
+sed -i "s/VERSION = '[^']*'/VERSION = '${version}'/g" setup.py
+sed -i "s/ISRELEASED = False/ISRELEASED = True/g" setup.py
+cd -
+
 echo "Finish staging binary artifacts by running: sh dev/release/01-perform.sh"

http://git-wip-us.apache.org/repos/asf/arrow/blob/68e39c68/python/.git_archival.txt
--
diff --git a/python/.git_archival.txt b/python/.git_archival.txt
deleted file mode 100644
index 95cb3ee..000
--- a/python/.git_archival.txt
+++ /dev/null
@@ -1 +0,0 @@
-ref-names: $Format:%D$

http://git-wip-us.apache.org/repos/asf/arrow/blob/68e39c68/python/.gitattributes
--
diff --git a/python/.gitattributes b/python/.gitattributes
deleted file mode 100644
index 00a7b00..000
--- a/python/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-.git_archival.txt  export-subst

http://git-wip-us.apache.org/repos/asf/arrow/blob/68e39c68/python/pyarrow/__init__.py
--
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 5af93fb..b9d3861 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -17,14 +17,6 @@
 
 # flake8: noqa
 
-from pkg_resources import get_distribution, DistributionNotFound
-try:
-__version__ = get_distribution(__name__).version
-except DistributionNotFound:
-   # package is not installed
-   pass
-
-
 import pyarrow.config
 
 from pyarrow.array import (Array,
@@ -50,3 +42,5 @@ from pyarrow.schema import (null, bool_,
 DataType, Field, Schema, schema)
 
 from pyarrow.table import Column, RecordBatch, Table, from_pandas_dataframe
+
+from pyarrow.version import version as __version__

http://git-wip-us.apache.org/repos/asf/arrow/blob/68e39c68/python/setup.cfg
--
diff --git a/python/setup.cfg b/python/setup.cfg
deleted file mode 100644
index caae3e0..000
--- a/python/setup.cfg
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[build_sphinx]
-source-dir = doc/
-build-dir  = doc/_build

http://git-wip-us.apache.org/repos/asf/arrow/blob/68e39c68/python/setup.py
--

arrow git commit: ARROW-374: More precise handling of bytes vs unicode in Python API

2016-12-21 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 73455b56f -> 268ffbeff


ARROW-374: More precise handling of bytes vs unicode in Python API

Python built-in types that are not all unicode become `arrow::BinaryArray` 
instead of `arrow::StringArray`, since we cannot be sure that the PyBytes 
objects are UTF-8-encoded strings.

Author: Wes McKinney 

Closes #249 from wesm/ARROW-374 and squashes the following commits:

1371a30 [Wes McKinney] py3 fixes
8ac3a49 [Wes McKinney] Consistently convert PyBytes to BinaryArray with pandas, 
too
83d1c05 [Wes McKinney] Remove print statement
c8df606 [Wes McKinney] Timestamp and time cannot be static
4a9aaf4 [Wes McKinney] Add Python interface to BinaryArray, convert PyBytes to 
binary instead of assuming utf8 unicode


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/268ffbef
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/268ffbef
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/268ffbef

Branch: refs/heads/master
Commit: 268ffbeffb1cd0617e52d381d500a2d10f61124c
Parents: 73455b5
Author: Wes McKinney 
Authored: Wed Dec 21 09:31:56 2016 +0100
Committer: Uwe L. Korn 
Committed: Wed Dec 21 09:31:56 2016 +0100

--
 cpp/src/arrow/type.cc|  6 +-
 python/pyarrow/__init__.py   |  5 +-
 python/pyarrow/array.pyx |  5 ++
 python/pyarrow/includes/libarrow.pxd |  6 +-
 python/pyarrow/scalar.pyx| 16 -
 python/pyarrow/schema.pyx|  6 ++
 python/pyarrow/tests/test_convert_builtin.py | 31 ++---
 python/pyarrow/tests/test_convert_pandas.py  | 18 +++--
 python/pyarrow/tests/test_scalars.py | 22 +--
 python/src/pyarrow/adapters/builtin.cc   | 80 ---
 python/src/pyarrow/adapters/pandas.cc| 65 +-
 python/src/pyarrow/helpers.cc| 50 +-
 python/src/pyarrow/helpers.h | 16 -
 13 files changed, 227 insertions(+), 99 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/268ffbef/cpp/src/arrow/type.cc
--
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 4748cc3..8ff9eea 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -155,13 +155,11 @@ TYPE_FACTORY(binary, BinaryType);
 TYPE_FACTORY(date, DateType);
 
 std::shared_ptr timestamp(TimeUnit unit) {
-  static std::shared_ptr result = std::make_shared();
-  return result;
+  return std::make_shared(unit);
 }
 
 std::shared_ptr time(TimeUnit unit) {
-  static std::shared_ptr result = std::make_shared();
-  return result;
+  return std::make_shared(unit);
 }
 
 std::shared_ptr list(const std::shared_ptr& value_type) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/268ffbef/python/pyarrow/__init__.py
--
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 39ba4c7..9ede934 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -40,13 +40,14 @@ from pyarrow.scalar import (ArrayValue, Scalar, NA, NAType,
 BooleanValue,
 Int8Value, Int16Value, Int32Value, Int64Value,
 UInt8Value, UInt16Value, UInt32Value, UInt64Value,
-FloatValue, DoubleValue, ListValue, StringValue)
+FloatValue, DoubleValue, ListValue,
+BinaryValue, StringValue)
 
 from pyarrow.schema import (null, bool_,
 int8, int16, int32, int64,
 uint8, uint16, uint32, uint64,
 timestamp, date,
-float_, double, string,
+float_, double, binary, string,
 list_, struct, field,
 DataType, Field, Schema, schema)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/268ffbef/python/pyarrow/array.pyx
--
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index 84f1705..c178d5c 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -238,6 +238,10 @@ cdef class StringArray(Array):
 pass
 
 
+cdef class BinaryArray(Array):
+pass
+
+
 cdef dict _array_classes = {
 Type_NA: NullArray,
 Type_BOOL: BooleanArray,
@@ -253,6 +257,7 @@ cdef dict _array_classes = {
 Type_FLOAT: FloatArray,
 Type_DOUBLE: DoubleArray,
 Type_LIST: ListArray,
+Type_BINARY: BinaryArray,
 Type_STRING: StringArray,
 Type_TIMESTAMP: Int64Array,
 }

http://git-wip-us.apache.org/re

arrow git commit: ARROW-440: [C++] Support pkg-config

2016-12-21 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 268ffbeff -> fd4eb98af


ARROW-440: [C++] Support pkg-config

pkg-config is a tool to get build flags.

If Arrow supports pkg-config, users can set build flags easily.

For example, CMake supports pkg-config.

To support pkg-config, we just install .pc file that includes build
flags information.

Author: Kouhei Sutou 

Closes #250 from kou/ARROW-440-support-pkg-config and squashes the following 
commits:

f35fc44 [Kouhei Sutou] ARROW-440: [C++] Support pkg-config


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/fd4eb98a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/fd4eb98a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/fd4eb98a

Branch: refs/heads/master
Commit: fd4eb98af9bbf19b7a640b55e2d8ed5ad87b6af1
Parents: 268ffbe
Author: Kouhei Sutou 
Authored: Wed Dec 21 16:50:55 2016 +0100
Committer: Uwe L. Korn 
Committed: Wed Dec 21 16:50:55 2016 +0100

--
 cpp/src/arrow/CMakeLists.txt  |  8 
 cpp/src/arrow/arrow.pc.in | 26 ++
 cpp/src/arrow/io/CMakeLists.txt   |  8 
 cpp/src/arrow/io/arrow-io.pc.in   | 27 +++
 cpp/src/arrow/ipc/CMakeLists.txt  |  8 
 cpp/src/arrow/ipc/arrow-ipc.pc.in | 27 +++
 6 files changed, 104 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/fd4eb98a/cpp/src/arrow/CMakeLists.txt
--
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index b8500ab..f8c5051 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -33,6 +33,14 @@ install(FILES
   test-util.h
   DESTINATION include/arrow)
 
+# pkg-config support
+configure_file(arrow.pc.in
+  "${CMAKE_CURRENT_BINARY_DIR}/arrow.pc"
+  @ONLY)
+install(
+  FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow.pc"
+  DESTINATION "lib/pkgconfig/")
+
 ###
 # Unit tests
 ###

http://git-wip-us.apache.org/repos/asf/arrow/blob/fd4eb98a/cpp/src/arrow/arrow.pc.in
--
diff --git a/cpp/src/arrow/arrow.pc.in b/cpp/src/arrow/arrow.pc.in
new file mode 100644
index 000..5ad429b
--- /dev/null
+++ b/cpp/src/arrow/arrow.pc.in
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${prefix}/lib
+includedir=${prefix}/include
+
+Name: Apache Arrow
+Description: Arrow is a set of technologies that enable big-data systems to 
process and move data fast.
+Version: @ARROW_VERSION@
+Libs: -L${libdir} -larrow
+Cflags: -I${includedir}

http://git-wip-us.apache.org/repos/asf/arrow/blob/fd4eb98a/cpp/src/arrow/io/CMakeLists.txt
--
diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt
index e2b6496..2062cd4 100644
--- a/cpp/src/arrow/io/CMakeLists.txt
+++ b/cpp/src/arrow/io/CMakeLists.txt
@@ -134,3 +134,11 @@ install(FILES
 install(TARGETS arrow_io
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib)
+
+# pkg-config support
+configure_file(arrow-io.pc.in
+  "${CMAKE_CURRENT_BINARY_DIR}/arrow-io.pc"
+  @ONLY)
+install(
+  FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-io.pc"
+  DESTINATION "lib/pkgconfig/")

http://git-wip-us.apache.org/repos/asf/arrow/blob/fd4eb98a/cpp/src/arrow/io/arrow-io.pc.in
--
diff --git a/cpp/src/arrow/io/arrow-io.pc.in b/cpp/src/arrow/io/arrow-io.pc.in
new file mode 100644
index 000..4b4abdd
--- /dev/null
+++ b/cpp/src/arrow/io/arrow-io.pc.in
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF 

arrow git commit: ARROW-322: [C++] Remove ARROW_HDFS option, always build the module

2016-12-28 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master ab5f66a2e -> cfbdb6800


ARROW-322: [C++] Remove ARROW_HDFS option, always build the module

Author: Wes McKinney 

Closes #253 from wesm/ARROW-322 and squashes the following commits:

e793fd1 [Wes McKinney] Use string() instead of native() for file paths because 
windows uses utf16 native encoding
d0cc376 [Wes McKinney] Add NOMINMAX windows workaround
5e53ddb [Wes McKinney] Visibility fix
ea8fb9d [Wes McKinney] Various Win32 compilation fixes
82c4d2d [Wes McKinney] Remove ARROW_HDFS option, always build the module


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/cfbdb680
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/cfbdb680
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/cfbdb680

Branch: refs/heads/master
Commit: cfbdb680063b15b5068d99175fe2f042d16abf52
Parents: ab5f66a
Author: Wes McKinney 
Authored: Wed Dec 28 14:52:43 2016 +0100
Committer: Uwe L. Korn 
Committed: Wed Dec 28 14:52:43 2016 +0100

--
 ci/travis_before_script_cpp.sh |  2 -
 cpp/CMakeLists.txt |  4 --
 cpp/src/arrow/io/CMakeLists.txt| 56 +
 cpp/src/arrow/io/hdfs-internal.cc  | 26 +++-
 cpp/src/arrow/io/hdfs-internal.h   | 22 +-
 cpp/src/arrow/io/io-hdfs-test.cc   |  2 +-
 cpp/src/arrow/ipc/json-integration-test.cc |  2 +-
 7 files changed, 47 insertions(+), 67 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/cfbdb680/ci/travis_before_script_cpp.sh
--
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 2030773..73bdaeb 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -26,8 +26,6 @@ CPP_DIR=$TRAVIS_BUILD_DIR/cpp
 
 CMAKE_COMMON_FLAGS="\
 -DARROW_BUILD_BENCHMARKS=ON \
--DARROW_PARQUET=OFF \
--DARROW_HDFS=ON \
 -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"
 
 if [ $TRAVIS_OS_NAME == "linux" ]; then

http://git-wip-us.apache.org/repos/asf/arrow/blob/cfbdb680/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 4507e67..47b7671 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -74,10 +74,6 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL 
"${CMAKE_CURRENT_SOURCE_DIR}")
 "Build the Arrow IPC extensions"
 ON)
 
-  option(ARROW_HDFS
-"Build the Arrow IO extensions for the Hadoop file system"
-OFF)
-
   option(ARROW_BOOST_USE_SHARED
 "Rely on boost shared libraries where relevant"
 ON)

http://git-wip-us.apache.org/repos/asf/arrow/blob/cfbdb680/cpp/src/arrow/io/CMakeLists.txt
--
diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt
index 2062cd4..1e65a1a 100644
--- a/cpp/src/arrow/io/CMakeLists.txt
+++ b/cpp/src/arrow/io/CMakeLists.txt
@@ -45,50 +45,30 @@ set(ARROW_IO_TEST_LINK_LIBS
 
 set(ARROW_IO_SRCS
   file.cc
+  hdfs.cc
+  hdfs-internal.cc
   interfaces.cc
   memory.cc
 )
 
-if(ARROW_HDFS)
-  if(NOT THIRDPARTY_DIR)
-message(FATAL_ERROR "THIRDPARTY_DIR not set")
-  endif()
-
-  if (DEFINED ENV{HADOOP_HOME})
-set(HADOOP_HOME $ENV{HADOOP_HOME})
-if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h")
-  message(STATUS "Did not find hdfs.h in expected location, using vendored 
one")
-  set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
-endif()
-  else()
+# HDFS thirdparty setup
+if (DEFINED ENV{HADOOP_HOME})
+  set(HADOOP_HOME $ENV{HADOOP_HOME})
+  if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h")
+message(STATUS "Did not find hdfs.h in expected location, using vendored 
one")
 set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
   endif()
+else()
+  set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
+endif()
 
-  set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h")
-  if (NOT EXISTS ${HDFS_H_PATH})
-message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}")
-  endif()
-  message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH})
-  message(STATUS "Building libhdfs shim component")
-
-  include_directories(SYSTEM "${HADOOP_HOME}/include")
-
-  set(ARROW_HDFS_SRCS
-hdfs.cc
-hdfs-internal.cc)
-
-  set_property(SOURCE ${ARROW_HDFS_SRCS}
-APPEND_STRING PROPERTY
-COMPILE_FLAGS "-DHAS_HADOOP")
-
-  set(ARROW_IO_SRCS
-${ARROW_HDFS_SRCS}
-${ARROW_IO_SRCS})
-
-  ADD_ARROW_TEST(io-hdfs-test)
-  ARROW_TEST_LINK_LIBRARIES(io-hdfs-test
-${ARROW_IO_TEST_LINK_LIBS})
+set(HDFS_H_PATH "

arrow git commit: ARROW-444: [Python] Native file reads into pre-allocated memory. Some IO API cleanup / niceness

2016-12-28 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 8aab00ee1 -> 3095f2cb7


ARROW-444: [Python] Native file reads into pre-allocated memory. Some IO API 
cleanup / niceness

This yields slightly better performance and less memory use. Also deleted some 
duplicated code

Author: Wes McKinney 

Closes #257 from wesm/ARROW-444 and squashes the following commits:

30e480d [Wes McKinney] Rename PyBytes_Empty to something more mundane
9db0d81 [Wes McKinney] Native file reads into pre-allocated memory. Deprecated 
HdfsClient.connect API. Promote pyarrow.io classes into pyarrow namespace


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3095f2cb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3095f2cb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3095f2cb

Branch: refs/heads/master
Commit: 3095f2cb7bc19954d0dfba02486b7ec48d8fef0f
Parents: 8aab00e
Author: Wes McKinney 
Authored: Wed Dec 28 23:05:50 2016 +0100
Committer: Uwe L. Korn 
Committed: Wed Dec 28 23:05:50 2016 +0100

--
 python/pyarrow/__init__.py|   4 ++
 python/pyarrow/io.pyx | 109 +
 python/pyarrow/tests/test_hdfs.py |   2 +-
 python/pyarrow/tests/test_io.py   |   4 +-
 4 files changed, 49 insertions(+), 70 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/3095f2cb/python/pyarrow/__init__.py
--
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 6f81ef4..02b2b06 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -37,6 +37,10 @@ from pyarrow.array import (Array,
 
 from pyarrow.error import ArrowException
 
+from pyarrow.io import (HdfsClient, HdfsFile, NativeFile, PythonFileInterface,
+BytesReader, Buffer, InMemoryOutputStream,
+BufferReader)
+
 from pyarrow.scalar import (ArrayValue, Scalar, NA, NAType,
 BooleanValue,
 Int8Value, Int16Value, Int32Value, Int64Value,

http://git-wip-us.apache.org/repos/asf/arrow/blob/3095f2cb/python/pyarrow/io.pyx
--
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 8491aa8..cab6ccb 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -37,6 +37,10 @@ import sys
 import threading
 import time
 
+# To let us get a PyObject* and avoid Cython auto-ref-counting
+cdef extern from "Python.h":
+PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"(
+char *v, Py_ssize_t len) except NULL
 
 cdef class NativeFile:
 
@@ -119,21 +123,24 @@ cdef class NativeFile:
 with nogil:
 check_status(self.wr_file.get().Write(buf, bufsize))
 
-def read(self, int nbytes):
+def read(self, int64_t nbytes):
 cdef:
 int64_t bytes_read = 0
-uint8_t* buf
-shared_ptr[CBuffer] out
+PyObject* obj
 
 self._assert_readable()
 
+# Allocate empty write space
+obj = PyBytes_FromStringAndSizeNative(NULL, nbytes)
+
+cdef uint8_t* buf =  cp.PyBytes_AS_STRING( obj)
 with nogil:
-check_status(self.rd_file.get().ReadB(nbytes, &out))
+check_status(self.rd_file.get().Read(nbytes, &bytes_read, buf))
 
-result = cp.PyBytes_FromStringAndSize(
-out.get().data(), out.get().size())
+if bytes_read < nbytes:
+cp._PyBytes_Resize(&obj,  bytes_read)
 
-return result
+return PyObject_to_object(obj)
 
 
 # --
@@ -339,31 +346,8 @@ cdef class HdfsClient:
 cdef readonly:
 bint is_open
 
-def __cinit__(self):
-self.is_open = False
-
-def __dealloc__(self):
-if self.is_open:
-self.close()
-
-def close(self):
-"""
-Disconnect from the HDFS cluster
-"""
-self._ensure_client()
-with nogil:
-check_status(self.client.get().Disconnect())
-self.is_open = False
-
-cdef _ensure_client(self):
-if self.client.get() == NULL:
-raise IOError('HDFS client improperly initialized')
-elif not self.is_open:
-raise IOError('HDFS client is closed')
-
-@classmethod
-def connect(cls, host="default", port=0, user=None, kerb_ticket=None,
-driver='libhdfs'):
+def __cinit__(self, host="default", port=0, user=None, kerb_ticket=None,
+  driver='libhdfs'):
 """
 

arrow git commit: ARROW-338: Implement visitor pattern for IPC loading/unloading

2016-12-29 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 4733ee876 -> 23fe6ae02


ARROW-338: Implement visitor pattern for IPC loading/unloading

This is a first cut at getting rid of the if-then-else statements and using the 
visitor pattern. This also has the benefit of forcing us to provide 
implementations should we add new types to Arrow.

Author: Wes McKinney 

Closes #256 from wesm/ARROW-338 and squashes the following commits:

59bac66 [Wes McKinney] Fix accidental copy
17214c4 [Wes McKinney] Fix comment
6b00da4 [Wes McKinney] Implement visitor pattern for IPC loading/unloading


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/23fe6ae0
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/23fe6ae0
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/23fe6ae0

Branch: refs/heads/master
Commit: 23fe6ae02a6fa6ff912986c45079e25b3e5e4deb
Parents: 4733ee8
Author: Wes McKinney 
Authored: Thu Dec 29 10:22:40 2016 +0100
Committer: Uwe L. Korn 
Committed: Thu Dec 29 10:22:40 2016 +0100

--
 cpp/src/arrow/array.h|   1 +
 cpp/src/arrow/ipc/adapter.cc | 477 --
 cpp/src/arrow/type_fwd.h |   3 +-
 3 files changed, 306 insertions(+), 175 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/23fe6ae0/cpp/src/arrow/array.h
--
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 5cd56d6..6239ccc 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -471,6 +471,7 @@ extern template class ARROW_EXPORT NumericArray;
 extern template class ARROW_EXPORT NumericArray;
 extern template class ARROW_EXPORT NumericArray;
 extern template class ARROW_EXPORT NumericArray;
+extern template class ARROW_EXPORT NumericArray;
 
 #if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop

http://git-wip-us.apache.org/repos/asf/arrow/blob/23fe6ae0/cpp/src/arrow/ipc/adapter.cc
--
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index f813c1d..ac4054b 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -34,6 +34,7 @@
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/logging.h"
 
@@ -43,80 +44,34 @@ namespace flatbuf = org::apache::arrow::flatbuf;
 
 namespace ipc {
 
-static bool IsPrimitive(const DataType* type) {
-  DCHECK(type != nullptr);
-  switch (type->type) {
-// NA is null type or "no type", considered primitive for now
-case Type::NA:
-case Type::BOOL:
-case Type::UINT8:
-case Type::INT8:
-case Type::UINT16:
-case Type::INT16:
-case Type::UINT32:
-case Type::INT32:
-case Type::UINT64:
-case Type::INT64:
-case Type::FLOAT:
-case Type::DOUBLE:
-  return true;
-default:
-  return false;
-  }
-}
-
 // --
 // Record batch write path
 
-Status VisitArray(const Array* arr, std::vector* 
field_nodes,
-std::vector>* buffers, int max_recursion_depth) {
-  if (max_recursion_depth <= 0) { return Status::Invalid("Max recursion depth 
reached"); }
-  DCHECK(arr);
-  DCHECK(field_nodes);
-  // push back all common elements
-  field_nodes->push_back(flatbuf::FieldNode(arr->length(), arr->null_count()));
-  if (arr->null_count() > 0) {
-buffers->push_back(arr->null_bitmap());
-  } else {
-// Push a dummy zero-length buffer, not to be copied
-buffers->push_back(std::make_shared(nullptr, 0));
-  }
-
-  const DataType* arr_type = arr->type().get();
-  if (IsPrimitive(arr_type)) {
-const auto prim_arr = static_cast(arr);
-buffers->push_back(prim_arr->data());
-  } else if (arr->type_enum() == Type::STRING || arr->type_enum() == 
Type::BINARY) {
-const auto binary_arr = static_cast(arr);
-buffers->push_back(binary_arr->offsets());
-buffers->push_back(binary_arr->data());
-  } else if (arr->type_enum() == Type::LIST) {
-const auto list_arr = static_cast(arr);
-buffers->push_back(list_arr->offsets());
-RETURN_NOT_OK(VisitArray(
-list_arr->values().get(), field_nodes, buffers, max_recursion_depth - 
1));
-  } else if (arr->type_enum() == Type::STRUCT) {
-const auto struct_arr = static_cast(arr);
-for (auto& field : struct_arr->fields()) {
-  RETURN_NOT_OK(
-  VisitArray(field.get(), field_nodes, buffers, max_recursion_depth - 
1));
-}
-  } else {
-return Status::NotI

arrow git commit: ARROW-108: [C++] Add Union implementation and IPC/JSON serialization tests

2017-01-02 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 806239fdd -> 9f7d4ae6d


ARROW-108: [C++] Add Union implementation and IPC/JSON serialization tests

Closes #206.

Still need to add test cases for JSON read/write and dense union IPC. 
Integration tests can happen in a subsequent PR (but the Java library does not 
support dense unions yet, so sparse only -- i.e. no offsets vector)

Author: Wes McKinney 

Closes #264 from wesm/ARROW-108 and squashes the following commits:

86c4191 [Wes McKinney] Fix valgrind error
cdfc61d [Wes McKinney] Export UnionArray
3edca1e [Wes McKinney] Implement basic JSON roundtrip for unions
30b7188 [Wes McKinney] Add test case for dense union, implement RangeEquals for 
it
4887fd2 [Wes McKinney] Move Windows stuff into a compatibility header, exclude 
from clang-format because of include order sensitivity
5ca9c57 [Wes McKinney] Implement IPC/JSON serializationf or unions. Test 
UnionMode::SPARSE example in IPC


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/9f7d4ae6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/9f7d4ae6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/9f7d4ae6

Branch: refs/heads/master
Commit: 9f7d4ae6da04d9339dfa2811d750ccf616568bc8
Parents: 806239f
Author: Wes McKinney 
Authored: Tue Jan 3 08:27:36 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Jan 3 08:27:36 2017 +0100

--
 cpp/CMakeLists.txt   |   4 +-
 cpp/src/arrow/array-list-test.cc |   2 +-
 cpp/src/arrow/array-primitive-test.cc|   2 +-
 cpp/src/arrow/array-struct-test.cc   |   5 +-
 cpp/src/arrow/array-test.cc  |   6 +-
 cpp/src/arrow/array.cc   | 120 +++---
 cpp/src/arrow/array.h|  90 ---
 cpp/src/arrow/builder.h  |   2 +-
 cpp/src/arrow/io/hdfs-internal.h |  12 +--
 cpp/src/arrow/io/windows_compatibility.h |  36 
 cpp/src/arrow/ipc/adapter.cc |  56 +---
 cpp/src/arrow/ipc/ipc-adapter-test.cc|   6 +-
 cpp/src/arrow/ipc/ipc-json-test.cc   |  18 +++-
 cpp/src/arrow/ipc/json-internal.cc   |  90 ++-
 cpp/src/arrow/ipc/test-common.h  |  83 --
 cpp/src/arrow/pretty_print.cc|  44 +++---
 cpp/src/arrow/test-util.h|  14 ++-
 cpp/src/arrow/type.cc|   2 +-
 cpp/src/arrow/type.h |   8 +-
 19 files changed, 476 insertions(+), 124 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/9f7d4ae6/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index bf30543..13f0354 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -668,7 +668,9 @@ endif (UNIX)
 if (${CLANG_FORMAT_FOUND})
   # runs clang format and updates files in place.
   add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh 
${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1
-`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed 
-e '/_generated/g'`
+`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h |
+sed -e '/_generated/g' |
+sed -e '/windows_compatibility.h/g'`
 `find ${CMAKE_CURRENT_SOURCE_DIR}/../python -name \\*.cc -or -name \\*.h`)
 
   # runs clang format and exits with a non-zero exit code if any files need to 
be reformatted

http://git-wip-us.apache.org/repos/asf/arrow/blob/9f7d4ae6/cpp/src/arrow/array-list-test.cc
--
diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc
index 8baaf06..8e4d319 100644
--- a/cpp/src/arrow/array-list-test.cc
+++ b/cpp/src/arrow/array-list-test.cc
@@ -89,7 +89,7 @@ class TestListBuilder : public TestBuilder {
 TEST_F(TestListBuilder, Equality) {
   Int32Builder* vb = 
static_cast(builder_->value_builder().get());
 
-  ArrayPtr array, equal_array, unequal_array;
+  std::shared_ptr array, equal_array, unequal_array;
   vector equal_offsets = {0, 1, 2, 5};
   vector equal_values = {1, 2, 3, 4, 5, 2, 2, 2};
   vector unequal_offsets = {0, 1, 4};

http://git-wip-us.apache.org/repos/asf/arrow/blob/9f7d4ae6/cpp/src/arrow/array-primitive-test.cc
--
diff --git a/cpp/src/arrow/array-primitive-test.cc 
b/cpp/src/arrow/array-primitive-test.cc
index a10e240..443abac 100644
--- a/cpp/src/arrow/array-primitive-test.cc
+++ b/cpp/src/arrow/array-primitive-test.cc
@@ -318,7 +318,7 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
   this->RandomData(size);
   vector& draws = this->draws_;
   vector& valid_bytes = this->valid_

arrow git commit: ARROW-294: [C++] Do not use platform-dependent fopen/fclose functions for MemoryMappedFile

2017-01-02 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 9f7d4ae6d -> d9df55679


ARROW-294: [C++] Do not use platform-dependent fopen/fclose functions for 
MemoryMappedFile

Also adds a test case for ARROW-340.

Author: Wes McKinney 

Closes #265 from wesm/ARROW-294 and squashes the following commits:

42a83a4 [Wes McKinney] Remove duplicated includes
3928ab0 [Wes McKinney] Base MemoryMappedFile implementation on common OSFile 
interface. Add test case for ARROW-340.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d9df5567
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d9df5567
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d9df5567

Branch: refs/heads/master
Commit: d9df556791fc6051b2c8582668df9c256f675116
Parents: 9f7d4ae
Author: Wes McKinney 
Authored: Tue Jan 3 08:28:46 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Jan 3 08:28:46 2017 +0100

--
 cpp/src/arrow/io/file.cc   | 208 +---
 cpp/src/arrow/io/file.h|  49 
 cpp/src/arrow/io/io-file-test.cc   | 116 +-
 cpp/src/arrow/io/io-memory-test.cc |  91 --
 cpp/src/arrow/io/memory.cc | 178 ---
 cpp/src/arrow/io/memory.h  |  39 --
 cpp/src/arrow/io/test-common.h |   1 +
 7 files changed, 359 insertions(+), 323 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/d9df5567/cpp/src/arrow/io/file.cc
--
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index c50a9bb..3182f2d 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -60,7 +60,7 @@
 
 #endif  // _MSC_VER
 
-// defines that
+// defines that don't exist in MinGW
 #if defined(__MINGW32__)
 #define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR
 #elif defined(_MSC_VER)  // Visual Studio
@@ -174,7 +174,8 @@ static inline Status FileOpenReadable(const std::string& 
filename, int* fd) {
   return CheckOpenResult(ret, errno_actual, filename.c_str(), filename.size());
 }
 
-static inline Status FileOpenWriteable(const std::string& filename, int* fd) {
+static inline Status FileOpenWriteable(
+const std::string& filename, bool write_only, bool truncate, int* fd) {
   int ret;
   errno_t errno_actual = 0;
 
@@ -186,13 +187,31 @@ static inline Status FileOpenWriteable(const std::string& 
filename, int* fd) {
   memcpy(wpath.data(), filename.data(), filename.size());
   memcpy(wpath.data() + nwchars, L"\0", sizeof(wchar_t));
 
-  errno_actual = _wsopen_s(fd, wpath.data(), _O_WRONLY | _O_CREAT | _O_BINARY 
| _O_TRUNC,
-  _SH_DENYNO, _S_IWRITE);
+  int oflag = _O_CREAT | _O_BINARY;
+
+  if (truncate) { oflag |= _O_TRUNC; }
+
+  if (write_only) {
+oflag |= _O_WRONLY;
+  } else {
+oflag |= _O_RDWR;
+  }
+
+  errno_actual = _wsopen_s(fd, wpath.data(), oflag, _SH_DENYNO, _S_IWRITE);
   ret = *fd;
 
 #else
-  ret = *fd =
-  open(filename.c_str(), O_WRONLY | O_CREAT | O_BINARY | O_TRUNC, 
ARROW_WRITE_SHMODE);
+  int oflag = O_CREAT | O_BINARY;
+
+  if (truncate) { oflag |= O_TRUNC; }
+
+  if (write_only) {
+oflag |= O_WRONLY;
+  } else {
+oflag |= O_RDWR;
+  }
+
+  ret = *fd = open(filename.c_str(), oflag, ARROW_WRITE_SHMODE);
 #endif
   return CheckOpenResult(ret, errno_actual, filename.c_str(), filename.size());
 }
@@ -296,10 +315,17 @@ class OSFile {
 
   ~OSFile() {}
 
-  Status OpenWritable(const std::string& path) {
-RETURN_NOT_OK(FileOpenWriteable(path, &fd_));
+  Status OpenWriteable(const std::string& path, bool append, bool write_only) {
+RETURN_NOT_OK(FileOpenWriteable(path, write_only, !append, &fd_));
 path_ = path;
 is_open_ = true;
+mode_ = write_only ? FileMode::READ : FileMode::READWRITE;
+
+if (append) {
+  RETURN_NOT_OK(FileGetSize(fd_, &size_));
+} else {
+  size_ = 0;
+}
 return Status::OK();
   }
 
@@ -307,11 +333,9 @@ class OSFile {
 RETURN_NOT_OK(FileOpenReadable(path, &fd_));
 RETURN_NOT_OK(FileGetSize(fd_, &size_));
 
-// The position should be 0 after GetSize
-// RETURN_NOT_OK(Seek(0));
-
 path_ = path;
 is_open_ = true;
+mode_ = FileMode::READ;
 return Status::OK();
   }
 
@@ -346,12 +370,14 @@ class OSFile {
 
   int64_t size() const { return size_; }
 
- private:
+ protected:
   std::string path_;
 
   // File descriptor
   int fd_;
 
+  FileMode::type mode_;
+
   bool is_open_;
   int64_t size_;
 };
@@ -440,7 +466,9 @@ int ReadableFile::file_descriptor() const {
 
 class FileOutputStream::FileOutputStreamImpl : public OSFile {
  public:
-  Status Open(const std::string& path) { return OpenWritable(path); }
+  Status Open(const std::string&

arrow git commit: ARROW-387: [C++] Verify zero-copy Buffer slices from BufferReader retain reference to parent Buffer

2017-01-02 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master d9df55679 -> 26140dca8


ARROW-387: [C++] Verify zero-copy Buffer slices from BufferReader retain 
reference to parent Buffer

This is stacked on top of the patch for ARROW-294, will rebase.

Author: Wes McKinney 

Closes #266 from wesm/ARROW-387 and squashes the following commits:

061ef8b [Wes McKinney] Verify BufferReader passes on ownership of parent buffer 
to zero-copy slices
42a83a4 [Wes McKinney] Remove duplicated includes
3928ab0 [Wes McKinney] Base MemoryMappedFile implementation on common OSFile 
interface. Add test case for ARROW-340.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/26140dca
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/26140dca
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/26140dca

Branch: refs/heads/master
Commit: 26140dca893296d84cea3b76c97c62fbc4052e3f
Parents: d9df556
Author: Wes McKinney 
Authored: Tue Jan 3 08:31:37 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Jan 3 08:31:37 2017 +0100

--
 cpp/src/arrow/io/interfaces.cc |  5 +
 cpp/src/arrow/io/interfaces.h  |  5 -
 cpp/src/arrow/io/io-memory-test.cc | 23 ++-
 3 files changed, 31 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/26140dca/cpp/src/arrow/io/interfaces.cc
--
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index 68c1ac3..23bef28 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -44,5 +44,10 @@ Status ReadableFileInterface::ReadAt(
   return Read(nbytes, out);
 }
 
+Status Writeable::Write(const std::string& data) {
+  return Write(reinterpret_cast(data.c_str()),
+  static_cast(data.size()));
+}
+
 }  // namespace io
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/26140dca/cpp/src/arrow/io/interfaces.h
--
diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h
index db0c059..8fe2849 100644
--- a/cpp/src/arrow/io/interfaces.h
+++ b/cpp/src/arrow/io/interfaces.h
@@ -20,6 +20,7 @@
 
 #include 
 #include 
+#include 
 
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
@@ -67,9 +68,11 @@ class Seekable {
   virtual Status Seek(int64_t position) = 0;
 };
 
-class Writeable {
+class ARROW_EXPORT Writeable {
  public:
   virtual Status Write(const uint8_t* data, int64_t nbytes) = 0;
+
+  Status Write(const std::string& data);
 };
 
 class Readable {

http://git-wip-us.apache.org/repos/asf/arrow/blob/26140dca/cpp/src/arrow/io/io-memory-test.cc
--
diff --git a/cpp/src/arrow/io/io-memory-test.cc 
b/cpp/src/arrow/io/io-memory-test.cc
index 2463102..95d788c 100644
--- a/cpp/src/arrow/io/io-memory-test.cc
+++ b/cpp/src/arrow/io/io-memory-test.cc
@@ -48,12 +48,33 @@ TEST_F(TestBufferOutputStream, CloseResizes) {
   const int64_t nbytes = static_cast(data.size());
   const int K = 100;
   for (int i = 0; i < K; ++i) {
-EXPECT_OK(stream_->Write(reinterpret_cast(data.c_str()), 
nbytes));
+EXPECT_OK(stream_->Write(data));
   }
 
   ASSERT_OK(stream_->Close());
   ASSERT_EQ(K * nbytes, buffer_->size());
 }
 
+TEST(TestBufferReader, RetainParentReference) {
+  // ARROW-387
+  std::string data = "data123456";
+
+  std::shared_ptr slice1;
+  std::shared_ptr slice2;
+  {
+auto buffer = std::make_shared();
+ASSERT_OK(buffer->Resize(static_cast(data.size(;
+std::memcpy(buffer->mutable_data(), data.c_str(), data.size());
+BufferReader reader(buffer);
+ASSERT_OK(reader.Read(4, &slice1));
+ASSERT_OK(reader.Read(6, &slice2));
+  }
+
+  ASSERT_TRUE(slice1->parent() != nullptr);
+
+  ASSERT_EQ(0, std::memcmp(slice1->data(), data.c_str(), 4));
+  ASSERT_EQ(0, std::memcmp(slice2->data(), data.c_str() + 4, 6));
+}
+
 }  // namespace io
 }  // namespace arrow



arrow git commit: ARROW-456: Add jemalloc based MemoryPool

2017-01-06 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 320f5875e -> 5bf6ae49e


ARROW-456: Add jemalloc based MemoryPool

Runtimes of the `builder-benchmark`:

```
BM_BuildPrimitiveArrayNoNulls/repeats:3   901 ms889 ms  
1   576.196MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3   833 ms829 ms  
1 617.6MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3   825 ms821 ms  
1   623.855MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_mean  853 ms846 ms  
1   605.884MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev 34 ms 30 ms  
021.147MB/s
BM_BuildVectorNoNulls/repeats:3   712 ms701 ms  
1   729.866MB/s
BM_BuildVectorNoNulls/repeats:3   671 ms670 ms  
1   764.464MB/s
BM_BuildVectorNoNulls/repeats:3   688 ms681 ms  
1   751.285MB/s
BM_BuildVectorNoNulls/repeats:3_mean  690 ms684 ms  
1   748.538MB/s
BM_BuildVectorNoNulls/repeats:3_stddev 17 ms 13 ms  
0   14.2578MB/s
```

With an aligned `Reallocate`, the jemalloc version is 50% faster and even 
outperforms `std::vector`:

```
BM_BuildPrimitiveArrayNoNulls/repeats:3   565 ms559 ms  
1   916.516MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3   540 ms537 ms  
1   952.727MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3   544 ms543 ms  
1   942.948MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_mean  550 ms546 ms  
1   937.397MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev 11 ms  9 ms  
0   15.2949MB/s
```

Author: Uwe L. Korn 

Closes #270 from xhochy/ARROW-456 and squashes the following commits:

d3ce3bf [Uwe L. Korn] Zero arrays for now
831399d [Uwe L. Korn] cpplint #2
e6e251b [Uwe L. Korn] cpplint
52b3c76 [Uwe L. Korn] Add Reallocate implementation to PyArrowMemoryPool
113e650 [Uwe L. Korn] Add missing file
d331cd9 [Uwe L. Korn] Add tests for Reallocate
c2be086 [Uwe L. Korn] Add JEMALLOC_HOME to the Readme
bd47f51 [Uwe L. Korn] Add missing return value
5142ac3 [Uwe L. Korn] Don't use deprecated GBenchmark interfaces
b6bff98 [Uwe L. Korn] Add missing (win) include
6f08e19 [Uwe L. Korn] Don't build jemalloc on AppVeyor
834c3b2 [Uwe L. Korn] Add jemalloc to Travis builds
10c6839 [Uwe L. Korn] Implement Reallocate function
a17b313 [Uwe L. Korn] ARROW-456: C++: Add jemalloc based MemoryPool


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/5bf6ae49
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/5bf6ae49
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/5bf6ae49

Branch: refs/heads/master
Commit: 5bf6ae49ec561eaaef823f0eb16ccca2d2ba7cf3
Parents: 320f587
Author: Uwe L. Korn 
Authored: Fri Jan 6 15:57:20 2017 +0100
Committer: Uwe L. Korn 
Committed: Fri Jan 6 15:57:20 2017 +0100

--
 .travis.yml |  1 +
 appveyor.yml|  2 +-
 ci/travis_before_script_cpp.sh  |  5 ++
 cpp/CMakeLists.txt  | 30 ++-
 cpp/README.md   |  1 +
 cpp/cmake_modules/Findjemalloc.cmake| 86 
 cpp/src/arrow/CMakeLists.txt|  1 +
 cpp/src/arrow/buffer.cc |  6 +-
 cpp/src/arrow/builder-benchmark.cc  | 64 +++
 cpp/src/arrow/builder.cc|  1 +
 cpp/src/arrow/column-benchmark.cc   |  2 +-
 cpp/src/arrow/io/interfaces.cc  |  4 +-
 cpp/src/arrow/io/io-file-test.cc| 13 +++
 cpp/src/arrow/jemalloc/CMakeLists.txt   | 80 ++
 cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in | 27 ++
 .../jemalloc/jemalloc-builder-benchmark.cc  | 47 +++
 .../arrow/jemalloc/jemalloc-memory_pool-test.cc | 51 
 cpp/src/arrow/jemalloc/memory_pool.cc   | 74 +
 cpp/src/arrow/jemalloc/memory_pool.h| 57 +
 cpp/src/arrow/jemalloc/symbols.map  | 30 +++
 cpp/src/arrow/memory_pool-test.cc   | 33 +++-
 cpp/src/arrow/memory_pool-test.h| 79 ++
 cpp/src/arrow/memory_pool.cc| 24 ++
 cpp/src/arrow/memory_pool.h |  1 +
 python/src/pyarrow/common.cc| 14 
 25 files changed, 704 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/.travis.yml
--
diff --git a/

arrow git commit: ARROW-442: [Python] Inspect Parquet file metadata from Python

2017-01-09 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 3195948f6 -> f44b6a3b9


ARROW-442: [Python] Inspect Parquet file metadata from Python

I also made the Cython parquet extension "private" so that higher level logic 
(e.g. upcoming handling of multiple files) can be handled in pure Python (which 
doesn't need to be compiled)

Requires PARQUET-828 for the test suite to pass.

Author: Wes McKinney 

Closes #275 from wesm/ARROW-442 and squashes the following commits:

a4255a2 [Wes McKinney] Add row group metadata accessor, add smoke tests
75a11cf [Wes McKinney] Add more metadata accessor scaffolding, to be tested
e59ca40 [Wes McKinney] Move parquet Cython wrapper to a private import, add 
parquet.py for high level logic


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f44b6a3b
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f44b6a3b
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f44b6a3b

Branch: refs/heads/master
Commit: f44b6a3b91a15461804dd7877840a557caa52e4e
Parents: 3195948
Author: Wes McKinney 
Authored: Tue Jan 10 08:44:01 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Jan 10 08:44:01 2017 +0100

--
 python/CMakeLists.txt|   2 +-
 python/pyarrow/_parquet.pxd  | 217 +
 python/pyarrow/_parquet.pyx  | 520 ++
 python/pyarrow/includes/parquet.pxd  | 147 -
 python/pyarrow/parquet.py| 116 +++
 python/pyarrow/parquet.pyx   | 244 --
 python/pyarrow/tests/test_parquet.py |  71 +++-
 python/setup.py  |   4 +-
 8 files changed, 922 insertions(+), 399 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/f44b6a3b/python/CMakeLists.txt
--
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index e42c45d..45115d4 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -428,7 +428,7 @@ if (PYARROW_BUILD_PARQUET)
 parquet_arrow)
   set(CYTHON_EXTENSIONS
 ${CYTHON_EXTENSIONS}
-parquet)
+_parquet)
 endif()
 
 add_library(pyarrow SHARED

http://git-wip-us.apache.org/repos/asf/arrow/blob/f44b6a3b/python/pyarrow/_parquet.pxd
--
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
new file mode 100644
index 000..faca845
--- /dev/null
+++ b/python/pyarrow/_parquet.pxd
@@ -0,0 +1,217 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport (CArray, CSchema, CStatus,
+CTable, MemoryPool)
+from pyarrow.includes.libarrow_io cimport ReadableFileInterface, OutputStream
+
+
+cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
+  cdef cppclass Node:
+pass
+
+  cdef cppclass GroupNode(Node):
+pass
+
+  cdef cppclass PrimitiveNode(Node):
+pass
+
+  cdef cppclass ColumnPath:
+c_string ToDotString()
+
+cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
+enum ParquetType" parquet::Type::type":
+ParquetType_BOOLEAN" parquet::Type::BOOLEAN"
+ParquetType_INT32" parquet::Type::INT32"
+ParquetType_INT64" parquet::Type::INT64"
+ParquetType_INT96" parquet::Type::INT96"
+ParquetType_FLOAT" parquet::Type::FLOAT"
+ParquetType_DOUBLE" parquet::Type::DOUBLE"
+ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY"
+ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY"
+
+enum ParquetLogicalType" parquet::LogicalType::type":
+ParquetLogicalType_NONE" parquet::LogicalType::NONE"
+ParquetLogicalType_UTF8" parquet::LogicalType::UTF8"
+ 

arrow git commit: ARROW-515: [Python] Add read_all methods to FileReader, StreamReader

2017-01-27 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 30bb0d97d -> 4226adfbc


ARROW-515: [Python] Add read_all methods to FileReader, StreamReader

Stacked on top of ARROW-514

Author: Wes McKinney 

Closes #307 from wesm/ARROW-515 and squashes the following commits:

6f2185c [Wes McKinney] Add read_all method to StreamReader, FileReader


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4226adfb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4226adfb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4226adfb

Branch: refs/heads/master
Commit: 4226adfbc6b3dff10b3fe7a6691b30bcc94140bd
Parents: 30bb0d9
Author: Wes McKinney 
Authored: Fri Jan 27 10:46:34 2017 +0100
Committer: Uwe L. Korn 
Committed: Fri Jan 27 10:46:34 2017 +0100

--
 python/pyarrow/io.pyx| 44 ++-
 python/pyarrow/table.pyx |  4 +---
 python/pyarrow/tests/test_ipc.py | 19 +++
 3 files changed, 63 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/4226adfb/python/pyarrow/io.pyx
--
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index e5f8b7a..8b56508 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -34,7 +34,8 @@ cimport pyarrow.includes.pyarrow as pyarrow
 from pyarrow.compat import frombytes, tobytes, encode_file_path
 from pyarrow.error cimport check_status
 from pyarrow.schema cimport Schema
-from pyarrow.table cimport RecordBatch, batch_from_cbatch
+from pyarrow.table cimport (RecordBatch, batch_from_cbatch,
+table_from_ctable)
 
 cimport cpython as cp
 
@@ -936,6 +937,27 @@ cdef class _StreamReader:
 
 return batch_from_cbatch(batch)
 
+def read_all(self):
+"""
+Read all record batches as a pyarrow.Table
+"""
+cdef:
+vector[shared_ptr[CRecordBatch]] batches
+shared_ptr[CRecordBatch] batch
+shared_ptr[CTable] table
+c_string name = b''
+
+with nogil:
+while True:
+check_status(self.reader.get().GetNextRecordBatch(&batch))
+if batch.get() == NULL:
+break
+batches.push_back(batch)
+
+check_status(CTable.FromRecordBatches(name, batches, &table))
+
+return table_from_ctable(table)
+
 
 cdef class _FileWriter(_StreamWriter):
 
@@ -997,3 +1019,23 @@ cdef class _FileReader:
 # TODO(wesm): ARROW-503: Function was renamed. Remove after a period of
 # time has passed
 get_record_batch = get_batch
+
+def read_all(self):
+"""
+Read all record batches as a pyarrow.Table
+"""
+cdef:
+vector[shared_ptr[CRecordBatch]] batches
+shared_ptr[CTable] table
+c_string name = b''
+int i, nbatches
+
+nbatches = self.num_record_batches
+
+batches.resize(nbatches)
+with nogil:
+for i in range(nbatches):
+check_status(self.reader.get().GetRecordBatch(i, &batches[i]))
+check_status(CTable.FromRecordBatches(name, batches, &table))
+
+return table_from_ctable(table)

http://git-wip-us.apache.org/repos/asf/arrow/blob/4226adfb/python/pyarrow/table.pyx
--
diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx
index 9242330..1707210 100644
--- a/python/pyarrow/table.pyx
+++ b/python/pyarrow/table.pyx
@@ -690,9 +690,7 @@ cdef class Table:
 with nogil:
 check_status(CTable.FromRecordBatches(c_name, c_batches, &c_table))
 
-table = Table()
-table.init(c_table)
-return table
+return table_from_ctable(c_table)
 
 def to_pandas(self, nthreads=None):
 """

http://git-wip-us.apache.org/repos/asf/arrow/blob/4226adfb/python/pyarrow/tests/test_ipc.py
--
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 8ca464f..665a63b 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -83,6 +83,16 @@ class TestFile(MessagingTest, unittest.TestCase):
 batch = reader.get_batch(i)
 assert batches[i].equals(batch)
 
+def test_read_all(self):
+batches = self.write_batches()
+file_contents = self._get_source()
+
+reader = pa.FileReader(file_contents)
+
+result = reader.read_all()
+expected = pa.Table.from_batches(batches)
+assert re

arrow git commit: ARROW-497: Integration harness for streaming file format

2017-02-02 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master be5d73f2c -> 0ae4d86e5


ARROW-497: Integration harness for streaming file format

These tests pass locally for me. Thanks @nongli for this!

Author: Nong Li 
Author: Wes McKinney 

Closes #312 from wesm/streaming-integration and squashes the following commits:

8b9ad76 [Wes McKinney] Hook stream<->file tools together and get integration 
tests working. Quiet test output in TestArrowStreamPipe
c7f0483 [Nong Li] ARROW-XXX: [Java] Add file <=> stream utility tools.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0ae4d86e
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0ae4d86e
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0ae4d86e

Branch: refs/heads/master
Commit: 0ae4d86e5ef8ee53a8810f4324dce80ec6a9d422
Parents: be5d73f
Author: Nong Li 
Authored: Thu Feb 2 14:36:23 2017 +0100
Committer: Uwe L. Korn 
Committed: Thu Feb 2 14:36:23 2017 +0100

--
 ci/travis_script_integration.sh |  3 +
 integration/integration_test.py | 76 
 .../org/apache/arrow/tools/FileToStream.java| 68 ++
 .../org/apache/arrow/tools/StreamToFile.java| 61 
 .../arrow/vector/stream/MessageSerializer.java  |  2 +-
 .../vector/stream/TestArrowStreamPipe.java  |  2 +-
 6 files changed, 198 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/0ae4d86e/ci/travis_script_integration.sh
--
diff --git a/ci/travis_script_integration.sh b/ci/travis_script_integration.sh
index d93411b..c019a4b 100755
--- a/ci/travis_script_integration.sh
+++ b/ci/travis_script_integration.sh
@@ -28,7 +28,10 @@ pushd $TRAVIS_BUILD_DIR/integration
 
 VERSION=0.1.1-SNAPSHOT
 export 
ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar
+
 export ARROW_CPP_TESTER=$CPP_BUILD_DIR/debug/json-integration-test
+export ARROW_CPP_STREAM_TO_FILE=$CPP_BUILD_DIR/debug/stream-to-file
+export ARROW_CPP_FILE_TO_STREAM=$CPP_BUILD_DIR/debug/file-to-stream
 
 source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
 export MINICONDA=$HOME/miniconda

http://git-wip-us.apache.org/repos/asf/arrow/blob/0ae4d86e/integration/integration_test.py
--
diff --git a/integration/integration_test.py b/integration/integration_test.py
index 77510da..a622bf2 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -556,12 +556,25 @@ class IntegrationRunner(object):
consumer.name))
 
 for json_path in self.json_files:
-print('Testing with {0}'.format(json_path))
+print('Testing file {0}'.format(json_path))
 
-arrow_path = os.path.join(self.temp_dir, guid())
+# Make the random access file
+print('-- Creating binary inputs')
+producer_file_path = os.path.join(self.temp_dir, guid())
+producer.json_to_file(json_path, producer_file_path)
 
-producer.json_to_arrow(json_path, arrow_path)
-consumer.validate(json_path, arrow_path)
+# Validate the file
+print('-- Validating file')
+consumer.validate(json_path, producer_file_path)
+
+print('-- Validating stream')
+producer_stream_path = os.path.join(self.temp_dir, guid())
+consumer_file_path = os.path.join(self.temp_dir, guid())
+producer.file_to_stream(producer_file_path,
+producer_stream_path)
+consumer.stream_to_file(producer_stream_path,
+consumer_file_path)
+consumer.validate(json_path, consumer_file_path)
 
 
 class Tester(object):
@@ -569,7 +582,13 @@ class Tester(object):
 def __init__(self, debug=False):
 self.debug = debug
 
-def json_to_arrow(self, json_path, arrow_path):
+def json_to_file(self, json_path, arrow_path):
+raise NotImplementedError
+
+def stream_to_file(self, stream_path, file_path):
+raise NotImplementedError
+
+def file_to_stream(self, file_path, stream_path):
 raise NotImplementedError
 
 def validate(self, json_path, arrow_path):
@@ -601,21 +620,40 @@ class JavaTester(Tester):
 if self.debug:
 print(' '.join(cmd))
 
-return run_cmd(cmd)
+run_cmd(cmd)
 
 def validate(self, json_path, arrow_path):
 return self._run(arrow_path, 

arrow git commit: ARROW-467: [Python] Run Python parquet-cpp unit tests in Travis CI

2017-02-03 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master c05292faf -> 720d422fa


ARROW-467: [Python] Run Python parquet-cpp unit tests in Travis CI

This means we'll have to tolerate broken builds whenever APIs change (a good 
incentive to avoid changing them as much as possible)

Author: Wes McKinney 

Closes #311 from wesm/ARROW-467 and squashes the following commits:

a9c285d [Wes McKinney] parquet-cpp build tweaks
661671c [Wes McKinney] Build parquet-cpp from source and run PyArrow Parquet 
unit tests in Travis CI


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/720d422f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/720d422f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/720d422f

Branch: refs/heads/master
Commit: 720d422fa761e2beab1b412b1b42c041ac2db1a4
Parents: c05292f
Author: Wes McKinney 
Authored: Fri Feb 3 09:08:14 2017 +0100
Committer: Uwe L. Korn 
Committed: Fri Feb 3 09:08:14 2017 +0100

--
 ci/travis_script_python.sh | 50 +
 1 file changed, 46 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/720d422f/ci/travis_script_python.sh
--
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 179567b..c186fd4 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -26,12 +26,52 @@ export ARROW_HOME=$ARROW_CPP_INSTALL
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ARROW_CPP_INSTALL/lib
 
 pushd $PYTHON_DIR
+export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env
+
+build_parquet_cpp() {
+  conda create -y -q -p $PARQUET_HOME thrift-cpp snappy zlib brotli boost
+  source activate $PARQUET_HOME
+
+  export BOOST_ROOT=$PARQUET_HOME
+  export SNAPPY_HOME=$PARQUET_HOME
+  export THRIFT_HOME=$PARQUET_HOME
+  export ZLIB_HOME=$PARQUET_HOME
+  export BROTLI_HOME=$PARQUET_HOME
+
+  PARQUET_DIR=$TRAVIS_BUILD_DIR/parquet
+  mkdir -p $PARQUET_DIR
+
+  git clone https://github.com/apache/parquet-cpp.git $PARQUET_DIR
+
+  pushd $PARQUET_DIR
+  mkdir build-dir
+  cd build-dir
+
+  cmake \
+  -DCMAKE_BUILD_TYPE=debug \
+  -DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \
+  -DPARQUET_ARROW=on \
+  -DPARQUET_BUILD_BENCHMARKS=off \
+  -DPARQUET_BUILD_EXECUTABLES=off \
+  -DPARQUET_ZLIB_VENDORED=off \
+  -DPARQUET_BUILD_TESTS=off \
+  ..
+
+  make -j${CPU_COUNT}
+  make install
+
+  popd
+}
+
+build_parquet_cpp
+
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PARQUET_HOME/lib
 
 python_version_tests() {
   PYTHON_VERSION=$1
-  CONDA_ENV_NAME="pyarrow-test-${PYTHON_VERSION}"
-  conda create -y -q -n $CONDA_ENV_NAME python=$PYTHON_VERSION
-  source activate $CONDA_ENV_NAME
+  CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
+  conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
+  source activate $CONDA_ENV_DIR
 
   python --version
   which python
@@ -45,7 +85,9 @@ python_version_tests() {
   # Other stuff pip install
   pip install -r requirements.txt
 
-  python setup.py build_ext --inplace
+  python setup.py build_ext --inplace --with-parquet
+
+  python -c "import pyarrow.parquet"
 
   python -m pytest -vv -r sxX pyarrow
 



arrow git commit: ARROW-524: provide apis to access nested vectors and buffers

2017-02-05 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master c45c3b3e1 -> 70c05be21


ARROW-524: provide apis to access nested vectors and buffers

Author: Julien Le Dem 

Closes #314 from julienledem/setRangeToOne and squashes the following commits:

0d526bd [Julien Le Dem] ARROW-524: provide apis to access nested vectors and 
buffers


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/70c05be2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/70c05be2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/70c05be2

Branch: refs/heads/master
Commit: 70c05be2130bdbb650a83bc46f7c4f8fc8a231df
Parents: c45c3b3
Author: Julien Le Dem 
Authored: Sun Feb 5 14:06:37 2017 +0100
Committer: Uwe L. Korn 
Committed: Sun Feb 5 14:06:37 2017 +0100

--
 .../codegen/templates/NullableValueVectors.java | 21 +++--
 .../java/org/apache/arrow/vector/BitVector.java | 88 +++-
 .../org/apache/arrow/vector/NullableVector.java |  2 +
 .../apache/arrow/vector/TestValueVector.java| 36 
 4 files changed, 137 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/70c05be2/java/vector/src/main/codegen/templates/NullableValueVectors.java
--
diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java 
b/java/vector/src/main/codegen/templates/NullableValueVectors.java
index ce63710..6b25fb3 100644
--- a/java/vector/src/main/codegen/templates/NullableValueVectors.java
+++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java
@@ -132,6 +132,11 @@ public final class ${className} extends 
BaseDataValueVector implements <#if type
   
 
   @Override
+  public BitVector getValidityVector() {
+return bits;
+  }
+
+  @Override
   public List getFieldInnerVectors() {
 return innerVectors;
   }
@@ -426,7 +431,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
 mutator.fillEmpties(thisIndex);
 
 values.copyFromSafe(fromIndex, thisIndex, from);
-bits.getMutator().setSafe(thisIndex, 1);
+bits.getMutator().setSafeToOne(thisIndex);
   }
 
   public void copyFromSafe(int fromIndex, int thisIndex, ${className} from){
@@ -525,7 +530,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
 
 @Override
 public void setIndexDefined(int index){
-  bits.getMutator().set(index, 1);
+  bits.getMutator().setToOne(index);
 }
 
 /**
@@ -543,7 +548,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
 valuesMutator.set(i, emptyByteArray);
   }
   
-  bitsMutator.set(index, 1);
+  bitsMutator.setToOne(index);
   valuesMutator.set(index, value);
   <#if type.major == "VarLen">lastSet = index;
 }
@@ -574,7 +579,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
   <#else>
   fillEmpties(index);
 
-  bits.getMutator().setSafe(index, 1);
+  bits.getMutator().setSafeToOne(index);
   values.getMutator().setSafe(index, value, start, length);
   setCount++;
   <#if type.major == "VarLen">lastSet = index;
@@ -587,7 +592,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
   <#else>
   fillEmpties(index);
 
-  bits.getMutator().setSafe(index, 1);
+  bits.getMutator().setSafeToOne(index);
   values.getMutator().setSafe(index, value, start, length);
   setCount++;
   <#if type.major == "VarLen">lastSet = index;
@@ -626,7 +631,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
 valuesMutator.set(i, emptyByteArray);
   }
   
-  bits.getMutator().set(index, 1);
+  bits.getMutator().setToOne(index);
   valuesMutator.set(index, holder);
   <#if type.major == "VarLen">lastSet = index;
 }
@@ -676,7 +681,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
   <#if type.major == "VarLen">
   fillEmpties(index);
   
-  bits.getMutator().setSafe(index, 1);
+  bits.getMutator().setSafeToOne(index);
   values.getMutator().setSafe(index, value);
   setCount++;
   <#if type.major == "VarLen">lastSet = index;
@@ -687,7 +692,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
 <#if type.major == "VarLen">
 fillEmpties(index);
 
-bits.getMutator().setSafe(index, 1);
+bits.getMutator().setSafeToOne(index);
 values.getMutator().setSafe(index, value);

arrow git commit: ARROW-540: [C++] Build fixes after ARROW-33, PARQUET-866

2017-02-07 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 5439b7158 -> f268e927a


ARROW-540: [C++] Build fixes after ARROW-33, PARQUET-866

Author: Wes McKinney 
Author: Wes McKinney 

Closes #325 from wesm/ARROW-540 and squashes the following commits:

9070baf [Wes McKinney] Change DCHECK_LT to DCHECK_LE. Not sure why it fixes bug 
on OS X
eb80701 [Wes McKinney] Fix API change


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f268e927
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f268e927
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f268e927

Branch: refs/heads/master
Commit: f268e927ada5cb637404769a136506c600582061
Parents: 5439b71
Author: Wes McKinney 
Authored: Tue Feb 7 15:37:33 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Feb 7 15:37:33 2017 +0100

--
 cpp/src/arrow/buffer.cc   | 2 +-
 cpp/src/arrow/column-benchmark.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/f268e927/cpp/src/arrow/buffer.cc
--
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index fb5a010..18e9ed2 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -57,7 +57,7 @@ Status Buffer::Copy(int64_t start, int64_t nbytes, 
std::shared_ptr* out)
 
 std::shared_ptr SliceBuffer(
 const std::shared_ptr& buffer, int64_t offset, int64_t length) {
-  DCHECK_LT(offset, buffer->size());
+  DCHECK_LE(offset, buffer->size());
   DCHECK_LE(length, buffer->size() - offset);
   return std::make_shared(buffer, offset, length);
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/f268e927/cpp/src/arrow/column-benchmark.cc
--
diff --git a/cpp/src/arrow/column-benchmark.cc 
b/cpp/src/arrow/column-benchmark.cc
index 8a1c775..1bab5a8 100644
--- a/cpp/src/arrow/column-benchmark.cc
+++ b/cpp/src/arrow/column-benchmark.cc
@@ -30,7 +30,7 @@ std::shared_ptr MakePrimitive(int32_t length, int32_t 
null_count = 0) {
   auto null_bitmap = std::make_shared(pool);
   data->Resize(length * sizeof(typename ArrayType::value_type));
   null_bitmap->Resize(BitUtil::BytesForBits(length));
-  return std::make_shared(length, data, 10, null_bitmap);
+  return std::make_shared(length, data, null_bitmap, 10);
 }
 }  // anonymous namespace
 



arrow git commit: ARROW-537: [C++] Do not compare String/Binary data in null slots when comparing arrays

2017-02-08 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master c322cbf22 -> 1407abfc9


ARROW-537: [C++] Do not compare String/Binary data in null slots when comparing 
arrays

Author: Wes McKinney 

Closes #327 from wesm/ARROW-537 and squashes the following commits:

66b1961 [Wes McKinney] Do not compare String/Binary data in null slots when 
comparing arrays


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/1407abfc
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/1407abfc
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/1407abfc

Branch: refs/heads/master
Commit: 1407abfc90c03e133f198b59fed48469d171c0a9
Parents: c322cbf
Author: Wes McKinney 
Authored: Wed Feb 8 09:16:57 2017 +0100
Committer: Uwe L. Korn 
Committed: Wed Feb 8 09:16:57 2017 +0100

--
 cpp/src/arrow/array-string-test.cc| 41 ++
 cpp/src/arrow/array.cc| 11 --
 cpp/src/arrow/array.h |  9 +++--
 cpp/src/arrow/compare.cc  | 55 +++---
 python/src/pyarrow/adapters/pandas.cc | 12 +++
 5 files changed, 95 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/1407abfc/cpp/src/arrow/array-string-test.cc
--
diff --git a/cpp/src/arrow/array-string-test.cc 
b/cpp/src/arrow/array-string-test.cc
index 8b7eb41..c4d9bf4 100644
--- a/cpp/src/arrow/array-string-test.cc
+++ b/cpp/src/arrow/array-string-test.cc
@@ -140,6 +140,47 @@ TEST_F(TestStringArray, TestEmptyStringComparison) {
   ASSERT_TRUE(strings_a->Equals(strings_b));
 }
 
+TEST_F(TestStringArray, CompareNullByteSlots) {
+  StringBuilder builder(default_memory_pool());
+  StringBuilder builder2(default_memory_pool());
+  StringBuilder builder3(default_memory_pool());
+
+  builder.Append("foo");
+  builder2.Append("foo");
+  builder3.Append("foo");
+
+  builder.Append("bar");
+  builder2.AppendNull();
+
+  // same length, but different
+  builder3.Append("xyz");
+
+  builder.Append("baz");
+  builder2.Append("baz");
+  builder3.Append("baz");
+
+  std::shared_ptr array, array2, array3;
+  ASSERT_OK(builder.Finish(&array));
+  ASSERT_OK(builder2.Finish(&array2));
+  ASSERT_OK(builder3.Finish(&array3));
+
+  const auto& a1 = static_cast(*array);
+  const auto& a2 = static_cast(*array2);
+  const auto& a3 = static_cast(*array3);
+
+  // The validity bitmaps are the same, the data is different, but the unequal
+  // portion is masked out
+  StringArray equal_array(3, a1.value_offsets(), a1.data(), a2.null_bitmap(), 
1);
+  StringArray equal_array2(3, a3.value_offsets(), a3.data(), a2.null_bitmap(), 
1);
+
+  ASSERT_TRUE(equal_array.Equals(equal_array2));
+  ASSERT_TRUE(a2.RangeEquals(equal_array2, 0, 3, 0));
+
+  
ASSERT_TRUE(equal_array.Array::Slice(1)->Equals(equal_array2.Array::Slice(1)));
+  ASSERT_TRUE(
+  equal_array.Array::Slice(1)->RangeEquals(0, 2, 0, 
equal_array2.Array::Slice(1)));
+}
+
 // --
 // String builder tests
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/1407abfc/cpp/src/arrow/array.cc
--
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index f84023e..39459a0 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -87,11 +87,16 @@ bool Array::ApproxEquals(const std::shared_ptr& arr) 
const {
 }
 
 bool Array::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t 
other_start_idx,
-const std::shared_ptr& arr) const {
-  if (!arr) { return false; }
+const std::shared_ptr& other) const {
+  if (!other) { return false; }
+  return RangeEquals(*other, start_idx, end_idx, other_start_idx);
+}
+
+bool Array::RangeEquals(const Array& other, int32_t start_idx, int32_t end_idx,
+int32_t other_start_idx) const {
   bool are_equal = false;
   Status error =
-  ArrayRangeEquals(*this, *arr, start_idx, end_idx, other_start_idx, 
&are_equal);
+  ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, 
&are_equal);
   if (!error.ok()) { DCHECK(false) << "Arrays not comparable: " << 
error.ToString(); }
   return are_equal;
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/1407abfc/cpp/src/arrow/array.h
--
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index f3e8f9a..32d156b 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -127,7 +127,10 @@ class ARROW_EXPORT Array {
   /// Compare if the range of slots specified are equal f

arrow git commit: ARROW-545: [Python] Ignore non .parq/.parquet files when reading directories as Parquet datasets

2017-02-09 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 0bdfd5efb -> 31f145dc5


ARROW-545: [Python] Ignore non .parq/.parquet files when reading directories as 
Parquet datasets

Author: Wes McKinney 

Closes #331 from wesm/ARROW-545 and squashes the following commits:

5494167 [Wes McKinney] Docstring typo
92b274c [Wes McKinney] Ignore non .parq/.parquet files when reading 
directories-as-Parquet-datasets


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/31f145dc
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/31f145dc
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/31f145dc

Branch: refs/heads/master
Commit: 31f145dc5296d27cc8010a4cd17ca5b4ae461dff
Parents: 0bdfd5e
Author: Wes McKinney 
Authored: Thu Feb 9 13:47:09 2017 +0100
Committer: Uwe L. Korn 
Committed: Thu Feb 9 13:47:09 2017 +0100

--
 python/pyarrow/__init__.py   |  2 +-
 python/pyarrow/filesystem.py | 23 +--
 python/pyarrow/parquet.py| 18 --
 python/pyarrow/tests/test_parquet.py |  4 
 4 files changed, 38 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/31f145dc/python/pyarrow/__init__.py
--
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index ea4710d..6724b52 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -67,4 +67,4 @@ from pyarrow.schema import (null, bool_,
 from pyarrow.table import Column, RecordBatch, Table, concat_tables
 
 
-localfs = LocalFilesystem()
+localfs = LocalFilesystem.get_instance()

http://git-wip-us.apache.org/repos/asf/arrow/blob/31f145dc/python/pyarrow/filesystem.py
--
diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py
index 82409b7..55bcad0 100644
--- a/python/pyarrow/filesystem.py
+++ b/python/pyarrow/filesystem.py
@@ -62,7 +62,7 @@ class Filesystem(object):
 """
 raise NotImplementedError
 
-def read_parquet(self, path, columns=None, schema=None):
+def read_parquet(self, path, columns=None, metadata=None, schema=None):
 """
 Read Parquet data from path in file system. Can read from a single file
 or a directory of files
@@ -73,8 +73,11 @@ class Filesystem(object):
 Single file path or directory
 columns : List[str], optional
 Subset of columns to read
+metadata : pyarrow.parquet.FileMetaData
+Known metadata to validate files against
 schema : pyarrow.parquet.Schema
-Known schema to validate files against
+Known schema to validate files against. Alternative to metadata
+argument
 
 Returns
 ---
@@ -85,18 +88,26 @@ class Filesystem(object):
 if self.isdir(path):
 paths_to_read = []
 for path in self.ls(path):
-if path == '_metadata' or path == '_common_metadata':
-raise ValueError('No support yet for common metadata file')
-paths_to_read.append(path)
+if path.endswith('parq') or path.endswith('parquet'):
+paths_to_read.append(path)
 else:
 paths_to_read = [path]
 
 return read_multiple_files(paths_to_read, columns=columns,
-   filesystem=self, schema=schema)
+   filesystem=self, schema=schema,
+   metadata=metadata)
 
 
 class LocalFilesystem(Filesystem):
 
+_instance = None
+
+@classmethod
+def get_instance(cls):
+if cls._instance is None:
+cls._instance = LocalFilesystem()
+return cls._instance
+
 @implements(Filesystem.ls)
 def ls(self, path):
 return sorted(pjoin(path, x) for x in os.listdir(path))

http://git-wip-us.apache.org/repos/asf/arrow/blob/31f145dc/python/pyarrow/parquet.py
--
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 6654b77..9766ff6 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -15,12 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import six
+
 from pyarrow._parquet import (ParquetReader, FileMetaData,  # noqa
   RowGroupMetaData, Schema, ParquetWriter)
 import pyarrow._parquet as _parquet  # noqa
 from pyarrow.table import concat_tables
 
 
+EXCLUDED_PARQUET_PATHS = {'_metadata', '_common_metadata', '_SUCCESS&

arrow git commit: ARROW-548: [Python] Add nthreads to Filesystem.read_parquet and pass through

2017-02-13 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master e4845c447 -> 1f26040f5


ARROW-548: [Python] Add nthreads to Filesystem.read_parquet and pass through

Author: Wes McKinney 

Closes #337 from wesm/ARROW-548 and squashes the following commits:

b9aeaeb [Wes McKinney] Add nthreads to Filesystem.read_parquet and pass through


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/1f26040f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/1f26040f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/1f26040f

Branch: refs/heads/master
Commit: 1f26040f55eb54e00dc5e67ce0c1df64e51a1567
Parents: e4845c4
Author: Wes McKinney 
Authored: Mon Feb 13 09:52:59 2017 +0100
Committer: Uwe L. Korn 
Committed: Mon Feb 13 09:52:59 2017 +0100

--
 python/pyarrow/filesystem.py | 9 +++--
 python/pyarrow/parquet.py| 4 ++--
 python/pyarrow/tests/test_parquet.py | 8 +++-
 3 files changed, 16 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/1f26040f/python/pyarrow/filesystem.py
--
diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py
index 55bcad0..e820806 100644
--- a/python/pyarrow/filesystem.py
+++ b/python/pyarrow/filesystem.py
@@ -62,7 +62,8 @@ class Filesystem(object):
 """
 raise NotImplementedError
 
-def read_parquet(self, path, columns=None, metadata=None, schema=None):
+def read_parquet(self, path, columns=None, metadata=None, schema=None,
+ nthreads=1):
 """
 Read Parquet data from path in file system. Can read from a single file
 or a directory of files
@@ -78,6 +79,9 @@ class Filesystem(object):
 schema : pyarrow.parquet.Schema
 Known schema to validate files against. Alternative to metadata
 argument
+nthreads : int, default 1
+Number of columns to read in parallel. If > 1, requires that the
+underlying file source is threadsafe
 
 Returns
 ---
@@ -95,7 +99,8 @@ class Filesystem(object):
 
 return read_multiple_files(paths_to_read, columns=columns,
filesystem=self, schema=schema,
-   metadata=metadata)
+   metadata=metadata,
+   nthreads=nthreads)
 
 
 class LocalFilesystem(Filesystem):

http://git-wip-us.apache.org/repos/asf/arrow/blob/1f26040f/python/pyarrow/parquet.py
--
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 9766ff6..fa96f95 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -59,8 +59,8 @@ class ParquetFile(object):
 columns: list
 If not None, only these columns will be read from the file.
 nthreads : int, default 1
-Number of columns to read in parallel. Requires that the underlying
-file source is threadsafe
+Number of columns to read in parallel. If > 1, requires that the
+underlying file source is threadsafe
 
 Returns
 ---

http://git-wip-us.apache.org/repos/asf/arrow/blob/1f26040f/python/pyarrow/tests/test_parquet.py
--
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 969f68b..96f2d15 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -320,17 +320,20 @@ def test_compare_schemas():
 assert fileh.schema[0].equals(fileh.schema[0])
 assert not fileh.schema[0].equals(fileh.schema[1])
 
+
 @parquet
 def test_column_of_lists(tmpdir):
 df, schema = dataframe_with_arrays()
 
 filename = tmpdir.join('pandas_rountrip.parquet')
-arrow_table = pa.Table.from_pandas(df, timestamps_to_ms=True, 
schema=schema)
+arrow_table = pa.Table.from_pandas(df, timestamps_to_ms=True,
+   schema=schema)
 pq.write_table(arrow_table, filename.strpath, version="2.0")
 table_read = pq.read_table(filename.strpath)
 df_read = table_read.to_pandas()
 pdt.assert_frame_equal(df, df_read)
 
+
 @parquet
 def test_multithreaded_read():
 df = alltypes_sample(size=1)
@@ -418,6 +421,9 @@ def test_read_multiple_files(tmpdir):
 expected = pa.Table.from_arrays(to_read)
 assert result.equals(expected)
 
+# Read with multiple threads
+pa.localfs.read_parquet(dirpath, nthreads=2)
+
 # Test failure modes with non-uniform metadata
 bad_apple = _test_dataframe(size, seed=i)

svn commit: r18303 - in /dev/arrow/apache-arrow-0.2.0-rc1: ./ apache-arrow-0.2.0.tar.gz apache-arrow-0.2.0.tar.gz.asc apache-arrow-0.2.0.tar.gz.md5 apache-arrow-0.2.0.tar.gz.sha

2017-02-13 Thread uwe
Author: uwe
Date: Mon Feb 13 16:42:36 2017
New Revision: 18303

Log:
Apache Arrow $version RC${rc}

Added:
dev/arrow/apache-arrow-0.2.0-rc1/
dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz   (with props)
dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.asc
dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.md5
dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.sha

Added: dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.asc
==
--- dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.asc (added)
+++ dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.asc Mon Feb 13 
16:42:36 2017
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+Version: GnuPG v1
+
+iQIcBAABAgAGBQJYoeGGAAoJECnZTiKMqtYCVpoP/3Ma2XeOVABt4n0xey4J3GYO
+EcBIfC3/2iHxr5VP2Riok0aXUVgiLwVdzTCPO8DvAOeYOGseFr8cycBsgM5kn4ZH
+n/OiqMztGXWvPS19A7uT3ylQ87BKX64nPi5oDy2fG2ePLxOOS6rt43DEdqLNKrTn
+97uGhm/hjfDdJI9C/XXwZ1RES+/UqlcIIHzOFiHnVja1HRQgg4FAY8AXZrZ3Yngr
+JYNvN/xu19fUjHKGOA7u8UpnhC31xKPlFPgMWX7U+4UaV7UjhxLRPnzrylpBKDJK
+8N2BdHagubIR0I4urFTf52GFb6INnuKaPepFSfn0OXwhX76vC9bopnluCvbqQIsa
+O+ctevsx196dOs62Mui+FVxVOvhRqstrwa2nmVJDQRVTFNoB0OjikoRouiDGot5L
+NACbMfUAhPL94G/T2N7rRFS1at17V56wCwxubH6FLcLLyA2HZS9/0VAgEuMLfR91
+kPZj5wLb/GZ06AHQGxMkPfeqkIL7WT88UxGWO5Cn3bFpynj7lEBD95g9pQCPsKjv
+SBcZ1b+lhdvNzL5BRNKfd6eEt/tXFz45McXMRGrh9dJAJ3W/SQV8ptt1hajZu5Xx
++WzAxPvHowLiIasdy5VvgRx5N51NRGHV5y510oBgWYH5Y4lyq6CNLNjo5d8K2fUR
+z/68Ki04eCjqzCfGRHwA
+=iFc+
+-END PGP SIGNATURE-

Added: dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.md5
==
--- dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.md5 (added)
+++ dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.md5 Mon Feb 13 
16:42:36 2017
@@ -0,0 +1 @@
+apache-arrow-0.2.0.tar.gz: D1 C7 45 A1 29 99 C5 30  A7 4F 3B C6 5F 55 A6 C9

Added: dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.sha
==
--- dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.sha (added)
+++ dev/arrow/apache-arrow-0.2.0-rc1/apache-arrow-0.2.0.tar.gz.sha Mon Feb 13 
16:42:36 2017
@@ -0,0 +1 @@
+481e416b787ea4d1b1d807abf3355f340204e402  apache-arrow-0.2.0.tar.gz




svn commit: r18343 - in /dev/arrow/apache-arrow-0.2.0-rc2: ./ apache-arrow-0.2.0.tar.gz apache-arrow-0.2.0.tar.gz.asc apache-arrow-0.2.0.tar.gz.md5 apache-arrow-0.2.0.tar.gz.sha

2017-02-15 Thread uwe
Author: uwe
Date: Wed Feb 15 15:00:40 2017
New Revision: 18343

Log:
Apache Arrow $version RC${rc}

Added:
dev/arrow/apache-arrow-0.2.0-rc2/
dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz   (with props)
dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.asc
dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.md5
dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.sha

Added: dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.asc
==
--- dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.asc (added)
+++ dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.asc Wed Feb 15 
15:00:40 2017
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+Version: GnuPG v1
+
+iQIcBAABAgAGBQJYpG0MAAoJECnZTiKMqtYCn8MQAI7vTaCglTcYOsm8ElSgMdR4
+fCmQa66Ny5ldwTJGiWc0KfS9amk2aX3Y+N26+nra5wMNaL01wRdfVxkoxxr4EOEX
+GNnZTI+P9BGviJVkmsfvuqcb1/tnnhsyOfFfI+WdG+txAE8mYOaa3Bz9cXCzetrA
+jJB/zqw99eBjyXkdATiwzfLlfqAurrv9EPcyfBMPNSjolu65K2A19XXRlegoox76
+HCDIgsiCh5xOq7yd85Llw10+JPpWWijixMl2MTLJx70eL1XzWvp504vanI9f0UEf
+rI/d12My62tsatNPDH0Am1oYDzXKN0ltiw5KnwWaDTqNNVVpmEXIM/hBNOBHD524
+vnxoYH2omz45p143M5ZRpiAHwYjD27MQ+JkznWlP9GmYDgv2ptiMAZ7Jy5JLHGTy
+ej5U/sIStMM00f7JshcTBuOf1hq1iFVWSyVFnwBAIT3rbhrOmRyiOqrJsCxTkm7A
+X8um0HRdr4Hz4IGaVgRLQygT9tiwfec5pUp4h4cdMaAMip9xJM2v5gz7QZ0hXg5o
+ZpaPRC1zAj0q+mfwfuDh48eHRbFqTdAphbAR7vKSyd0aMh4KeFjF0M8dZLtzBd3W
+CrE9iD+BAp4N3goW0zv/N0Fk1H2jSK/kXOYNy4HjGPCEZk+b2UoWCFHvU9I4sSBe
+uA1Zek2OP4ZqM2Um3lf8
+=itIv
+-END PGP SIGNATURE-

Added: dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.md5
==
--- dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.md5 (added)
+++ dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.md5 Wed Feb 15 
15:00:40 2017
@@ -0,0 +1 @@
+apache-arrow-0.2.0.tar.gz: 43 3F A2 EE B5 AF 82 77  A7 3B 9A 59 36 DE 71 5E

Added: dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.sha
==
--- dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.sha (added)
+++ dev/arrow/apache-arrow-0.2.0-rc2/apache-arrow-0.2.0.tar.gz.sha Wed Feb 15 
15:00:40 2017
@@ -0,0 +1 @@
+b919e2f65d18b97d0cef0fe69b0ce477bfd1df10  apache-arrow-0.2.0.tar.gz




[2/2] arrow git commit: [maven-release-plugin] prepare for next development iteration

2017-02-19 Thread uwe
[maven-release-plugin] prepare for next development iteration

Change-Id: I1a9e3a6d0dc29c1a7933d373a7224a7bbd60e7e9


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ab15e01c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ab15e01c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ab15e01c

Branch: refs/heads/master
Commit: ab15e01c70d12ea163dd9b0109fa9332884e3e7c
Parents: f6924ad
Author: Uwe L. Korn 
Authored: Wed Feb 15 15:59:46 2017 +0100
Committer: Uwe L. Korn 
Committed: Wed Feb 15 15:59:46 2017 +0100

--
 java/format/pom.xml | 2 +-
 java/memory/pom.xml | 2 +-
 java/pom.xml| 4 ++--
 java/tools/pom.xml  | 2 +-
 java/vector/pom.xml | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/ab15e01c/java/format/pom.xml
--
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 055df5b..c65a7bc 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 
   arrow-java-root
   org.apache.arrow
-  0.2.0
+  0.2.1-SNAPSHOT
 
 
 arrow-format

http://git-wip-us.apache.org/repos/asf/arrow/blob/ab15e01c/java/memory/pom.xml
--
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index a3085aa..f20228b 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   
 org.apache.arrow
 arrow-java-root
-0.2.0
+0.2.1-SNAPSHOT
   
   arrow-memory
   Arrow Memory

http://git-wip-us.apache.org/repos/asf/arrow/blob/ab15e01c/java/pom.xml
--
diff --git a/java/pom.xml b/java/pom.xml
index ea0d029..fa03783 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   org.apache.arrow
   arrow-java-root
-  0.2.0
+  0.2.1-SNAPSHOT
   pom
 
   Apache Arrow Java Root POM
@@ -41,7 +41,7 @@
 
scm:git:https://git-wip-us.apache.org/repos/asf/arrow.git
 
scm:git:https://git-wip-us.apache.org/repos/asf/arrow.git
 https://github.com/apache/arrow
-apache-arrow-0.2.0
+HEAD
   
 
   

http://git-wip-us.apache.org/repos/asf/arrow/blob/ab15e01c/java/tools/pom.xml
--
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 7271778..35e5599 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
 
 org.apache.arrow
 arrow-java-root
-0.2.0
+0.2.1-SNAPSHOT
 
 arrow-tools
 Arrow Tools

http://git-wip-us.apache.org/repos/asf/arrow/blob/ab15e01c/java/vector/pom.xml
--
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 8ac4253..fc3ce66 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   
 org.apache.arrow
 arrow-java-root
-0.2.0
+0.2.1-SNAPSHOT
   
   arrow-vector
   Arrow Vectors



[1/2] arrow git commit: [maven-release-plugin] prepare release apache-arrow-0.2.0

2017-02-19 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master fa8d27f31 -> ab15e01c7


[maven-release-plugin] prepare release apache-arrow-0.2.0

Change-Id: I71a840dd1891d1b738d6a43748642390d7541f42


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f6924ad8
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f6924ad8
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f6924ad8

Branch: refs/heads/master
Commit: f6924ad83bc95741f003830892ad4815ca3b70fd
Parents: fa8d27f
Author: Uwe L. Korn 
Authored: Wed Feb 15 15:59:36 2017 +0100
Committer: Uwe L. Korn 
Committed: Wed Feb 15 15:59:36 2017 +0100

--
 java/format/pom.xml | 2 +-
 java/memory/pom.xml | 2 +-
 java/pom.xml| 4 ++--
 java/tools/pom.xml  | 2 +-
 java/vector/pom.xml | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/f6924ad8/java/format/pom.xml
--
diff --git a/java/format/pom.xml b/java/format/pom.xml
index eb045d6..055df5b 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 
   arrow-java-root
   org.apache.arrow
-  0.1.1-SNAPSHOT
+  0.2.0
 
 
 arrow-format

http://git-wip-us.apache.org/repos/asf/arrow/blob/f6924ad8/java/memory/pom.xml
--
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index a4eb652..a3085aa 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   
 org.apache.arrow
 arrow-java-root
-0.1.1-SNAPSHOT
+0.2.0
   
   arrow-memory
   Arrow Memory

http://git-wip-us.apache.org/repos/asf/arrow/blob/f6924ad8/java/pom.xml
--
diff --git a/java/pom.xml b/java/pom.xml
index e467cc1..ea0d029 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   org.apache.arrow
   arrow-java-root
-  0.1.1-SNAPSHOT
+  0.2.0
   pom
 
   Apache Arrow Java Root POM
@@ -41,7 +41,7 @@
 
scm:git:https://git-wip-us.apache.org/repos/asf/arrow.git
 
scm:git:https://git-wip-us.apache.org/repos/asf/arrow.git
 https://github.com/apache/arrow
-HEAD
+apache-arrow-0.2.0
   
 
   

http://git-wip-us.apache.org/repos/asf/arrow/blob/f6924ad8/java/tools/pom.xml
--
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index ef96328..7271778 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
 
 org.apache.arrow
 arrow-java-root
-0.1.1-SNAPSHOT
+0.2.0
 
 arrow-tools
 Arrow Tools

http://git-wip-us.apache.org/repos/asf/arrow/blob/f6924ad8/java/vector/pom.xml
--
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 8517d4c..8ac4253 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   
 org.apache.arrow
 arrow-java-root
-0.1.1-SNAPSHOT
+0.2.0
   
   arrow-vector
   Arrow Vectors



[arrow] Git Push Summary

2017-02-19 Thread uwe
Repository: arrow
Updated Tags:  refs/tags/apache-arrow-0.2.0 [created] d996e60f1


arrow git commit: ARROW-570: Determine Java tools JAR location from project metadata

2017-02-21 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master ef6b46557 -> 4598c1a36


ARROW-570: Determine Java tools JAR location from project metadata

Author: Uwe L. Korn 

Closes #346 from xhochy/ARROW-570 and squashes the following commits:

32ece28 [Uwe L. Korn] Add missing )
f1071db [Uwe L. Korn] ARROW-570: Determine Java tools JAR location from project 
metadata


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4598c1a3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4598c1a3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4598c1a3

Branch: refs/heads/master
Commit: 4598c1a36c20de1f4d12dee62c79a67197e8a603
Parents: ef6b465
Author: Uwe L. Korn 
Authored: Tue Feb 21 14:41:54 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Feb 21 14:41:54 2017 +0100

--
 ci/travis_script_integration.sh |  3 ---
 integration/integration_test.py | 11 +--
 2 files changed, 9 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/4598c1a3/ci/travis_script_integration.sh
--
diff --git a/ci/travis_script_integration.sh b/ci/travis_script_integration.sh
index 7bb1dc0..8ddd89b 100755
--- a/ci/travis_script_integration.sh
+++ b/ci/travis_script_integration.sh
@@ -26,9 +26,6 @@ popd
 
 pushd $TRAVIS_BUILD_DIR/integration
 
-VERSION=0.1.1-SNAPSHOT
-export 
ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar
-
 export ARROW_CPP_EXE_PATH=$CPP_BUILD_DIR/debug
 
 source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh

http://git-wip-us.apache.org/repos/asf/arrow/blob/4598c1a3/integration/integration_test.py
--
diff --git a/integration/integration_test.py b/integration/integration_test.py
index d5a066b..049436a 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -34,6 +34,12 @@ ARROW_HOME = os.path.abspath(__file__).rsplit("/", 2)[0]
 # Control for flakiness
 np.random.seed(12345)
 
+def load_version_from_pom():
+import xml.etree.ElementTree as ET
+tree = ET.parse(os.path.join(ARROW_HOME, 'java', 'pom.xml'))
+version_tag = 
list(tree.getroot().findall('{http://maven.apache.org/POM/4.0.0}version'))[0]
+return version_tag.text
+
 
 def guid():
 return uuid.uuid4().hex
@@ -638,11 +644,12 @@ class Tester(object):
 
 class JavaTester(Tester):
 
+_arrow_version = load_version_from_pom()
 ARROW_TOOLS_JAR = os.environ.get(
 'ARROW_JAVA_INTEGRATION_JAR',
 os.path.join(ARROW_HOME,
- 'java/tools/target/arrow-tools-0.1.1-'
- 'SNAPSHOT-jar-with-dependencies.jar'))
+ 'java/tools/target/arrow-tools-{}-'
+ 'jar-with-dependencies.jar'.format(_arrow_version)))
 
 name = 'Java'
 



arrow git commit: ARROW-569: [C++] Set version for *.pc

2017-02-21 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 4598c1a36 -> 5e279f0a7


ARROW-569: [C++] Set version for *.pc

*.pc.in such as cpp/build/arrow.pc.in refers ARROW_VERSION but it isn't
defined.

Author: Kouhei Sutou 

Closes #344 from kou/arrow-569-c++-set-version-for-pc and squashes the 
following commits:

48b366b [Kouhei Sutou] ARROW-569: [C++] Set version for *.pc


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/5e279f0a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/5e279f0a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/5e279f0a

Branch: refs/heads/master
Commit: 5e279f0a73842518caf34c2cda7c941548d55dbf
Parents: 4598c1a
Author: Kouhei Sutou 
Authored: Tue Feb 21 14:44:01 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Feb 21 14:44:01 2017 +0100

--
 cpp/CMakeLists.txt | 6 ++
 1 file changed, 6 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/5e279f0a/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 035cd8f..0888a8b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -18,6 +18,12 @@
 cmake_minimum_required(VERSION 2.7)
 project(arrow)
 
+file(READ "${CMAKE_CURRENT_SOURCE_DIR}/../java/pom.xml" POM_XML)
+string(REGEX MATCHALL
+  "\n  [^<]+" ARROW_VERSION_TAG "${POM_XML}")
+string(REGEX REPLACE
+  "(\n  |)" "" ARROW_VERSION "${ARROW_VERSION_TAG}")
+
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
 
 include(CMakeParseArguments)



arrow git commit: ARROW-557: [Python] Add option to explicitly opt in to HDFS tests, do not implicitly skip

2017-02-26 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 16c97592b -> dc103feaf


ARROW-557: [Python] Add option to explicitly opt in to HDFS tests, do not 
implicitly skip

I have

```
$ py.test pyarrow/tests/test_hdfs.py
== test session starts 
==
platform linux2 -- Python 2.7.11, pytest-2.9.0, py-1.4.31, pluggy-0.3.1
rootdir: /home/wesm/code/arrow/python, inifile:
collected 15 items

pyarrow/tests/test_hdfs.py sss
```

But

```
$ py.test pyarrow/tests/test_hdfs.py --hdfs -v
== test session starts 
==
platform linux2 -- Python 2.7.11, pytest-2.9.0, py-1.4.31, pluggy-0.3.1 -- 
/home/wesm/anaconda3/envs/py27/bin/python
cachedir: .cache
rootdir: /home/wesm/code/arrow/python, inifile:
collected 15 items

pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_close PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_download_upload PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_file_context_manager PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_ls PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_mkdir PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_orphaned_file PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_read_multiple_parquet_files 
SKIPPED
pyarrow/tests/test_hdfs.py::TestLibHdfs::test_hdfs_read_whole_file PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs3::test_hdfs_close PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs3::test_hdfs_download_upload PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs3::test_hdfs_file_context_manager PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs3::test_hdfs_ls PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs3::test_hdfs_mkdir PASSED
pyarrow/tests/test_hdfs.py::TestLibHdfs3::test_hdfs_read_multiple_parquet_files 
SKIPPED
pyarrow/tests/test_hdfs.py::TestLibHdfs3::test_hdfs_read_whole_file PASSED
```

The `py.test pyarrow --only-hdfs` option will run only the HDFS tests.

Author: Wes McKinney 

Closes #353 from wesm/ARROW-557 and squashes the following commits:

52e03db [Wes McKinney] Add conftest.py file, hdfs group to opt in to HDFS tests 
with --hdfs


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/dc103fea
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/dc103fea
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/dc103fea

Branch: refs/heads/master
Commit: dc103feaf0bb07b95f0c81afe0e342f239319dec
Parents: 16c9759
Author: Wes McKinney 
Authored: Mon Feb 27 08:13:29 2017 +0100
Committer: Uwe L. Korn 
Committed: Mon Feb 27 08:13:29 2017 +0100

--
 LICENSE.txt   | 12 ---
 NOTICE.txt|  4 +++
 python/pyarrow/tests/conftest.py  | 62 ++
 python/pyarrow/tests/test_hdfs.py |  5 +--
 4 files changed, 69 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/dc103fea/LICENSE.txt
--
diff --git a/LICENSE.txt b/LICENSE.txt
index c3bec43..d645695 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -200,15 +200,3 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-
-
-
-This product includes code from Apache Kudu.
-
- * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's 
cmake_modules/CompilerInfo.cmake
-
-Copyright: 2016 The Apache Software Foundation.
-Home page: https://kudu.apache.org/
-License: http://www.apache.org/licenses/LICENSE-2.0
-
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/dc103fea/NOTICE.txt
--
diff --git a/NOTICE.txt b/NOTICE.txt
index 02cb4dd..e71835c 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -42,6 +42,10 @@ This product includes software from the CMake project
 This product includes software from 
https://github.com/matthew-brett/multibuild (BSD 2-clause)
  * Copyright (c) 2013-2016, Matt Terry and Matthew Brett; all rights reserved.
 
+This product includes software from the Ibis project (Apache 2.0)
+ * Copyright (c) 2015 Cloudera, Inc.
+ * https://github.com/cloudera/ibis
+
 

 
 This product includes code from Apache Kudu, which includes the following in

http://git-wip-us.apache.org/repos/asf/arrow/blob/dc103fea/python/pyarrow/tests/conftest.py
--
diff --git a/python/pyarr

[1/2] arrow git commit: ARROW-493: [C++] Permit large (length > INT32_MAX) arrays in memory

2017-02-26 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master dc103feaf -> 01a67f3ff


http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/adapter.cc
--
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index 2be87a3..f11c88a 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -65,8 +66,14 @@ class RecordBatchWriter : public ArrayVisitor {
 if (max_recursion_depth_ <= 0) {
   return Status::Invalid("Max recursion depth reached");
 }
+
+if (arr.length() > std::numeric_limits::max()) {
+  return Status::Invalid("Cannot write arrays larger than 2^31 - 1 in 
length");
+}
+
 // push back all common elements
-field_nodes_.push_back(flatbuf::FieldNode(arr.length(), arr.null_count()));
+field_nodes_.push_back(flatbuf::FieldNode(
+static_cast(arr.length()), 
static_cast(arr.null_count(;
 if (arr.null_count() > 0) {
   std::shared_ptr bitmap = arr.null_bitmap();
 
@@ -152,13 +159,14 @@ class RecordBatchWriter : public ArrayVisitor {
 int64_t start_offset;
 RETURN_NOT_OK(dst->Tell(&start_offset));
 
-int64_t padded_metadata_length = metadata_fb->size() + 4;
-const int remainder = (padded_metadata_length + start_offset) % 8;
+int32_t padded_metadata_length = static_cast(metadata_fb->size()) 
+ 4;
+const int32_t remainder =
+(padded_metadata_length + static_cast(start_offset)) % 8;
 if (remainder != 0) { padded_metadata_length += 8 - remainder; }
 
 // The returned metadata size includes the length prefix, the flatbuffer,
 // plus padding
-*metadata_length = static_cast(padded_metadata_length);
+*metadata_length = padded_metadata_length;
 
 // Write the flatbuffer size prefix including padding
 int32_t flatbuffer_size = padded_metadata_length - 4;
@@ -169,7 +177,8 @@ class RecordBatchWriter : public ArrayVisitor {
 RETURN_NOT_OK(dst->Write(metadata_fb->data(), metadata_fb->size()));
 
 // Write any padding
-int64_t padding = padded_metadata_length - metadata_fb->size() - 4;
+int32_t padding =
+padded_metadata_length - static_cast(metadata_fb->size()) - 4;
 if (padding > 0) { RETURN_NOT_OK(dst->Write(kPaddingBytes, padding)); }
 
 return Status::OK();
@@ -184,7 +193,8 @@ class RecordBatchWriter : public ArrayVisitor {
 RETURN_NOT_OK(dst->Tell(&start_position));
 #endif
 
-RETURN_NOT_OK(WriteMetadata(batch.num_rows(), *body_length, dst, 
metadata_length));
+RETURN_NOT_OK(WriteMetadata(
+static_cast(batch.num_rows()), *body_length, dst, 
metadata_length));
 
 #ifndef NDEBUG
 RETURN_NOT_OK(dst->Tell(¤t_position));
@@ -430,7 +440,7 @@ class RecordBatchWriter : public ArrayVisitor {
 int32_t* shifted_offsets =
 reinterpret_cast(shifted_offsets_buffer->mutable_data());
 
-for (int32_t i = 0; i < array.length(); ++i) {
+for (int64_t i = 0; i < array.length(); ++i) {
   const uint8_t code = type_ids[i];
   int32_t shift = child_offsets[code];
   if (shift == -1) { child_offsets[code] = shift = 
unshifted_offsets[i]; }

http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/ipc-json-test.cc
--
diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc 
b/cpp/src/arrow/ipc/ipc-json-test.cc
index 3e759cc..4c18a49 100644
--- a/cpp/src/arrow/ipc/ipc-json-test.cc
+++ b/cpp/src/arrow/ipc/ipc-json-test.cc
@@ -240,7 +240,7 @@ TEST(TestJsonFileReadWrite, BasicRoundTrip) {
   const int nbatches = 3;
   std::vector> batches;
   for (int i = 0; i < nbatches; ++i) {
-int32_t num_rows = 5 + i * 5;
+int num_rows = 5 + i * 5;
 std::vector> arrays;
 
 MakeBatchArrays(schema, num_rows, &arrays);

http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/json-internal.cc
--
diff --git a/cpp/src/arrow/ipc/json-internal.cc 
b/cpp/src/arrow/ipc/json-internal.cc
index 6253cd6..0458b85 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -355,7 +355,7 @@ class JsonArrayWriter : public ArrayVisitor {
 writer_->String(name);
 
 writer_->Key("count");
-writer_->Int(arr.length());
+writer_->Int(static_cast(arr.length()));
 
 RETURN_NOT_OK(arr.Accept(this));
 
@@ -394,7 +394,7 @@ class JsonArrayWriter : public ArrayVisitor {
   template 
   typename std::enable_if::value, void>::type
   WriteDataValues(const T& arr) {
-for (int i = 0; i < arr.length(); ++i) {
+for (int64_t i = 0; i < arr.length(); ++i) {
   int32_t length;
   const char* buf = reinterpret_cast(arr.GetValue(i, 
&length));
 
@@ -430,7 +430,7 @@ class JsonArrayWriter : public ArrayVisitor {
   

[2/2] arrow git commit: ARROW-493: [C++] Permit large (length > INT32_MAX) arrays in memory

2017-02-26 Thread uwe
ARROW-493: [C++] Permit large (length > INT32_MAX) arrays in memory

This commit relaxes the INT32_MAX length requirement for in-memory data. It 
does not change the Arrow memory format, nor does it permit arrays over 
INT32_MAX elements to be included in a RecordBatch message sent in the 
streaming or file formats.

The purpose of this change is to enable Arrow containers to do zero-copy 
addressing of large datasets (generally of fixed-size elements) produced by 
other systems. Should those systems wish to send messages to Java, they will 
need to break those large arrays up into smaller pieces. We can create 
utilities to assist in copy-free segmentation of large in-memory datasets into 
compatible chunksizes.

If the large data is only being used in C++-land, then there are no problems.

This is a helpful change en route to adding an `arrow::Tensor` type per 
ARROW-550, and probably some other things.

This also includes ARROW-584, as I wanted to be sure that I caught all the 
places in the codebase where there were imprecise integer conversions.

cc @pcmoritz @robertnishihara

Author: Wes McKinney 

Closes #352 from wesm/ARROW-493 and squashes the following commits:

013d8cc [Wes McKinney] Fix some more compiler warnings
13c4067 [Wes McKinney] Do not pass CMAKE_CXX_FLAGS to googletest ep
dc50d80 [Wes McKinney] Fix last imprecise conversions
c8e90bc [Wes McKinney] Fix many imprecise integer conversions
6bacdf3 [Wes McKinney] Permit in-memory arrays with more than INT32_MAX 
elements in Array and Builder classes. Raise if large arrays used in IPC context


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/01a67f3f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/01a67f3f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/01a67f3f

Branch: refs/heads/master
Commit: 01a67f3ff3f43f504dc92b66e04473a8b29053f1
Parents: dc103fe
Author: Wes McKinney 
Authored: Mon Feb 27 08:14:10 2017 +0100
Committer: Uwe L. Korn 
Committed: Mon Feb 27 08:14:10 2017 +0100

--
 ci/travis_before_script_cpp.sh |   2 +-
 cpp/CMakeLists.txt |   6 +-
 cpp/src/arrow/array-dictionary-test.cc |   2 +-
 cpp/src/arrow/array-primitive-test.cc  |  69 ---
 cpp/src/arrow/array-string-test.cc |  24 ++---
 cpp/src/arrow/array-test.cc|  17 +++-
 cpp/src/arrow/array-union-test.cc  |   2 +-
 cpp/src/arrow/array.cc |  84 +-
 cpp/src/arrow/array.h  | 132 ++--
 cpp/src/arrow/buffer.h |  14 +--
 cpp/src/arrow/builder.cc   |  79 +
 cpp/src/arrow/builder.h|  63 ++---
 cpp/src/arrow/column-benchmark.cc  |   2 +-
 cpp/src/arrow/column.cc|   6 +-
 cpp/src/arrow/column.h |   2 +-
 cpp/src/arrow/compare.cc   |  48 +-
 cpp/src/arrow/compare.h|   2 +-
 cpp/src/arrow/io/file.cc   |   8 +-
 cpp/src/arrow/io/hdfs.cc   |  15 ++--
 cpp/src/arrow/io/io-hdfs-test.cc   |   2 +-
 cpp/src/arrow/ipc/adapter.cc   |  24 +++--
 cpp/src/arrow/ipc/ipc-json-test.cc |   2 +-
 cpp/src/arrow/ipc/json-internal.cc |  61 -
 cpp/src/arrow/ipc/json.cc  |   4 +-
 cpp/src/arrow/ipc/metadata-internal.cc |   7 +-
 cpp/src/arrow/ipc/reader.cc|   2 +-
 cpp/src/arrow/ipc/test-common.h|  24 ++---
 cpp/src/arrow/ipc/writer.cc|   4 +-
 cpp/src/arrow/pretty_print.cc  |   2 +-
 cpp/src/arrow/schema.cc|   2 +-
 cpp/src/arrow/schema.h |   2 +-
 cpp/src/arrow/status.cc|   2 +-
 cpp/src/arrow/table-test.cc|   4 +-
 cpp/src/arrow/table.cc |  10 +--
 cpp/src/arrow/table.h  |  14 +--
 cpp/src/arrow/test-util.h  |  47 +-
 cpp/src/arrow/type.h   |  12 +--
 cpp/src/arrow/type_traits.h|  54 
 cpp/src/arrow/util/bit-util.cc |   4 +-
 cpp/src/arrow/util/bit-util.h  |  25 +++---
 python/pyarrow/array.pxd   |   4 +-
 python/pyarrow/array.pyx   |   2 +-
 python/pyarrow/includes/libarrow.pxd   |  16 ++--
 python/pyarrow/scalar.pxd  |   8 +-
 python/pyarrow/scalar.pyx  |  10 +--
 python/pyarrow/table.pyx   |   2 +-
 python/src/pyarrow/adapters/builtin.cc |   4 +-
 python/src/pyarrow/adapters/pandas.cc  |  13 ++-
 48 files changed, 508 insertions(+), 436 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/ci/travis_before_script_cpp.sh
--
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp

arrow git commit: ARROW-604: Python: boxed Field instances are missing the reference to their DataType

2017-03-07 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 9deb3251e -> fb9fbe498


ARROW-604: Python: boxed Field instances are missing the reference to their 
DataType

Author: Uwe L. Korn 

Closes #362 from xhochy/ARROW-604 and squashes the following commits:

2e837c8 [Uwe L. Korn] ARROW-604: Python: boxed Field instances are missing the 
reference to DataType


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/fb9fbe49
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/fb9fbe49
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/fb9fbe49

Branch: refs/heads/master
Commit: fb9fbe4981420aaa0a56bfe87254d8b10bd5ba18
Parents: 9deb325
Author: Uwe L. Korn 
Authored: Tue Mar 7 17:13:57 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Mar 7 17:13:57 2017 +0100

--
 cpp/src/arrow/type.cc   | 3 +++
 python/pyarrow/schema.pyx   | 5 +
 python/pyarrow/tests/test_schema.py | 2 ++
 3 files changed, 10 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/fb9fbe49/cpp/src/arrow/type.cc
--
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 23fa681..7e5f13a 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -54,6 +54,9 @@ bool DataType::Equals(const DataType& other) const {
 }
 
 bool DataType::Equals(const std::shared_ptr& other) const {
+  if (!other) {
+return false;
+  }
   return Equals(*other.get());
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/fb9fbe49/python/pyarrow/schema.pyx
--
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index 52eeeaf..19910ab 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -88,6 +88,7 @@ cdef class Field:
 cdef init(self, const shared_ptr[CField]& field):
 self.sp_field = field
 self.field = field.get()
+self.type = box_data_type(field.get().type)
 
 @classmethod
 def from_py(cls, object name, DataType type, bint nullable=True):
@@ -326,11 +327,15 @@ def schema(fields):
 return Schema.from_fields(fields)
 
 cdef DataType box_data_type(const shared_ptr[CDataType]& type):
+if type.get() == NULL:
+return None
 cdef DataType out = DataType()
 out.init(type)
 return out
 
 cdef Field box_field(const shared_ptr[CField]& field):
+if field.get() == NULL:
+return None
 cdef Field out = Field()
 out.init(field)
 return out

http://git-wip-us.apache.org/repos/asf/arrow/blob/fb9fbe49/python/pyarrow/tests/test_schema.py
--
diff --git a/python/pyarrow/tests/test_schema.py 
b/python/pyarrow/tests/test_schema.py
index 507ebb8..f6dc33c 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -64,6 +64,8 @@ class TestTypes(unittest.TestCase):
 assert len(sch) == 3
 assert sch[0].name == 'foo'
 assert sch[0].type == fields[0].type
+assert sch.field_by_name('foo').name == 'foo'
+assert sch.field_by_name('foo').type == fields[0].type
 
 assert repr(sch) == """\
 foo: int32



arrow git commit: ARROW-606: [C++] upgrade flatbuffers version to 1.6.0

2017-03-12 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master d99958dd3 -> fdc25b418


ARROW-606: [C++] upgrade flatbuffers version to 1.6.0

all unittests pass

benchmark (builder, column, jemalloc-builder) results suffer minor
differences (<5%) wrt to flatbuffer 1.3.0

Author: Julien Lafaye 

Closes #373 from jlafaye/master and squashes the following commits:

3d001e5 [Julien Lafaye] ARROW-606: [C++] upgrade flatbuffers version to 1.6.0


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/fdc25b41
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/fdc25b41
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/fdc25b41

Branch: refs/heads/master
Commit: fdc25b418273a9a0d9d2512f571236e96cb4e2b4
Parents: d99958d
Author: Julien Lafaye 
Authored: Sun Mar 12 13:28:09 2017 +0100
Committer: Uwe L. Korn 
Committed: Sun Mar 12 13:28:09 2017 +0100

--
 cpp/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/fdc25b41/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 294c439..5ecc34e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -35,7 +35,7 @@ set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
 set(GFLAGS_VERSION "2.1.2")
 set(GTEST_VERSION "1.7.0")
 set(GBENCHMARK_VERSION "1.1.0")
-set(FLATBUFFERS_VERSION "1.3.0")
+set(FLATBUFFERS_VERSION "1.6.0")
 set(JEMALLOC_VERSION "4.4.0")
 
 find_package(ClangTools)



arrow git commit: ARROW-624: [C++] Restore MakePrimitiveArray function, use in feather.cc

2017-03-14 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 6aed18f96 -> f442879d3


ARROW-624: [C++] Restore MakePrimitiveArray function, use in feather.cc

I verified locally the parquet-cpp test suite passes again

Author: Wes McKinney 

Closes #378 from wesm/ARROW-624 and squashes the following commits:

023df9b [Wes McKinney] Use passed offset in MakePrimitiveArray
30a553e [Wes McKinney] Restore MakePrimitiveArray function, use in Feather, 
verify fixes parquet test suite


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f442879d
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f442879d
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f442879d

Branch: refs/heads/master
Commit: f442879d3c791d86fb0fdfa098a72329843f5baf
Parents: 6aed18f
Author: Wes McKinney 
Authored: Tue Mar 14 09:17:30 2017 +0100
Committer: Uwe L. Korn 
Committed: Tue Mar 14 09:17:30 2017 +0100

--
 cpp/src/arrow/api.h  |  3 +++
 cpp/src/arrow/ipc/feather.cc |  8 +---
 cpp/src/arrow/loader.cc  | 28 +++-
 cpp/src/arrow/loader.h   | 10 ++
 4 files changed, 37 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/f442879d/cpp/src/arrow/api.h
--
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 51437d8..3bc8666 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -24,7 +24,10 @@
 #include "arrow/buffer.h"
 #include "arrow/builder.h"
 #include "arrow/column.h"
+#include "arrow/compare.h"
+#include "arrow/loader.h"
 #include "arrow/memory_pool.h"
+#include "arrow/pretty_print.h"
 #include "arrow/schema.h"
 #include "arrow/status.h"
 #include "arrow/table.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/f442879d/cpp/src/arrow/ipc/feather.cc
--
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 13dfa58..1d165ac 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -331,7 +331,6 @@ class TableReader::TableReaderImpl {
 std::shared_ptr type;
 RETURN_NOT_OK(GetDataType(meta, metadata_type, metadata, &type));
 
-std::vector fields(1);
 std::vector> buffers;
 
 // Buffer data from the source (may or may not perform a copy depending on
@@ -357,12 +356,7 @@ class TableReader::TableReaderImpl {
 }
 
 buffers.push_back(SliceBuffer(buffer, offset, buffer->size() - offset));
-
-fields[0].length = meta->length();
-fields[0].null_count = meta->null_count();
-fields[0].offset = 0;
-
-return LoadArray(type, fields, buffers, out);
+return MakePrimitiveArray(type, buffers, meta->length(), 
meta->null_count(), 0, out);
   }
 
   bool HasDescription() const { return metadata_->HasDescription(); }

http://git-wip-us.apache.org/repos/asf/arrow/blob/f442879d/cpp/src/arrow/loader.cc
--
diff --git a/cpp/src/arrow/loader.cc b/cpp/src/arrow/loader.cc
index 3cb51ae..0b3ee1c 100644
--- a/cpp/src/arrow/loader.cc
+++ b/cpp/src/arrow/loader.cc
@@ -235,8 +235,8 @@ class ArrayLoader : public TypeVisitor {
   std::shared_ptr result_;
 };
 
-Status ARROW_EXPORT LoadArray(const std::shared_ptr& type,
-ArrayComponentSource* source, std::shared_ptr* out) {
+Status LoadArray(const std::shared_ptr& type, ArrayComponentSource* 
source,
+std::shared_ptr* out) {
   ArrayLoaderContext context;
   context.source = source;
   context.field_index = context.buffer_index = 0;
@@ -244,8 +244,8 @@ Status ARROW_EXPORT LoadArray(const 
std::shared_ptr& type,
   return LoadArray(type, &context, out);
 }
 
-Status ARROW_EXPORT LoadArray(const std::shared_ptr& type,
-ArrayLoaderContext* context, std::shared_ptr* out) {
+Status LoadArray(const std::shared_ptr& type, ArrayLoaderContext* 
context,
+std::shared_ptr* out) {
   ArrayLoader loader(type, context);
   RETURN_NOT_OK(loader.Load(out));
 
@@ -275,11 +275,29 @@ class InMemorySource : public ArrayComponentSource {
   const std::vector>& buffers_;
 };
 
-Status ARROW_EXPORT LoadArray(const std::shared_ptr& type,
+Status LoadArray(const std::shared_ptr& type,
 const std::vector& fields,
 const std::vector>& buffers, 
std::shared_ptr* out) {
   InMemorySource source(fields, buffers);
   return LoadArray(type, &source, out);
 }
 
+Status MakePrimitiveArray(const std::shared_ptr& type, int64_t 
length,
+const std::shared_ptr& data, const std::shared_ptr& 
null_bitmap,
+int64_t null_count, int64_t offs

arrow git commit: ARROW-642: [Java] Remove temporary file in java/tools

2017-03-16 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 1c101ffe0 -> 0cf2bbb2a


ARROW-642: [Java] Remove temporary file in java/tools

Author: Wes McKinney 

Closes #389 from wesm/ARROW-642 and squashes the following commits:

03771c8 [Wes McKinney] Remove temporary file from ARROW-542


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0cf2bbb2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0cf2bbb2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0cf2bbb2

Branch: refs/heads/master
Commit: 0cf2bbb2afe6006219904265b41123c2ce10715a
Parents: 1c101ff
Author: Wes McKinney 
Authored: Thu Mar 16 21:16:44 2017 +0100
Committer: Uwe L. Korn 
Committed: Thu Mar 16 21:16:44 2017 +0100

--
 java/tools/tmptestfilesio | Bin 628 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/0cf2bbb2/java/tools/tmptestfilesio
--
diff --git a/java/tools/tmptestfilesio b/java/tools/tmptestfilesio
deleted file mode 100644
index d1b6b6c..000
Binary files a/java/tools/tmptestfilesio and /dev/null differ



arrow git commit: ARROW-650: [GLib] Follow ReadableFileInterface -> RnadomAccessFile change

2017-03-18 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 57b537a3c -> 16dd87164


ARROW-650: [GLib] Follow ReadableFileInterface -> RnadomAccessFile change

Author: Kouhei Sutou 

Closes #399 from kou/glib-follow-random-access-change and squashes the 
following commits:

d46a1cb [Kouhei Sutou] [GLib] Follow ReadableFileInterface -> RnadomAccessFile 
change


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/16dd8716
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/16dd8716
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/16dd8716

Branch: refs/heads/master
Commit: 16dd87164d7ab756dc6c5eaabd22ef767edca037
Parents: 57b537a
Author: Kouhei Sutou 
Authored: Sat Mar 18 18:14:49 2017 +0100
Committer: Uwe L. Korn 
Committed: Sat Mar 18 18:14:49 2017 +0100

--
 c_glib/arrow-glib/Makefile.am   |   6 +-
 c_glib/arrow-glib/arrow-io-glib.h   |   2 +-
 c_glib/arrow-glib/arrow-io-glib.hpp |   2 +-
 c_glib/arrow-glib/io-memory-mapped-file.cpp |  14 +--
 c_glib/arrow-glib/io-random-access-file.cpp | 128 +++
 c_glib/arrow-glib/io-random-access-file.h   |  55 ++
 c_glib/arrow-glib/io-random-access-file.hpp |  38 +++
 c_glib/arrow-glib/io-readable-file.cpp  | 127 --
 c_glib/arrow-glib/io-readable-file.h|  55 --
 c_glib/arrow-glib/io-readable-file.hpp  |  38 ---
 c_glib/arrow-glib/ipc-file-reader.cpp   |   6 +-
 c_glib/arrow-glib/ipc-file-reader.h |   4 +-
 c_glib/doc/reference/arrow-glib-docs.sgml   |   2 +-
 13 files changed, 239 insertions(+), 238 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/16dd8716/c_glib/arrow-glib/Makefile.am
--
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index 61137a0..7699594 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -242,8 +242,8 @@ libarrow_io_glib_la_headers =   \
io-input-stream.h   \
io-memory-mapped-file.h \
io-output-stream.h  \
+   io-random-access-file.h \
io-readable.h   \
-   io-readable-file.h  \
io-writeable.h  \
io-writeable-file.h
 
@@ -261,8 +261,8 @@ libarrow_io_glib_la_sources =   \
io-input-stream.cpp \
io-memory-mapped-file.cpp   \
io-output-stream.cpp\
+   io-random-access-file.cpp   \
io-readable.cpp \
-   io-readable-file.cpp\
io-writeable.cpp\
io-writeable-file.cpp   \
$(libarrow_io_glib_la_headers)  \
@@ -276,8 +276,8 @@ libarrow_io_glib_la_cpp_headers =   \
io-input-stream.hpp \
io-memory-mapped-file.hpp   \
io-output-stream.hpp\
+   io-random-access-file.hpp   \
io-readable.hpp \
-   io-readable-file.hpp\
io-writeable.hpp\
io-writeable-file.hpp
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/16dd8716/c_glib/arrow-glib/arrow-io-glib.h
--
diff --git a/c_glib/arrow-glib/arrow-io-glib.h 
b/c_glib/arrow-glib/arrow-io-glib.h
index e02aa9b..4d49a98 100644
--- a/c_glib/arrow-glib/arrow-io-glib.h
+++ b/c_glib/arrow-glib/arrow-io-glib.h
@@ -26,7 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
-#include 
 #include 
 #include 

http://git-wip-us.apache.org/repos/asf/arrow/blob/16dd8716/c_glib/arrow-glib/arrow-io-glib.hpp
--
diff --git a/c_glib/arrow-glib/arrow-io-glib.hpp 
b/c_glib/arrow-glib/arrow-io-glib.hpp
index 378f202..3e7636c 100644
--- a/c_glib/arrow-glib/arrow-io-glib.hpp
+++ b/c_glib/arrow-glib/arrow-io-glib.hpp
@@ -25,6 +25,6 @@
 #include 
 #include 
 #include 
+#include 
 #include 
-#include 
 #include 

http://git-wip-us.apache.org/repos/asf/arrow/blob/16dd8716/c_glib/arrow-glib/io-memory-mapped-file.cpp
--
diff --git a/c_glib/arrow-glib/io-memory-mapped-file.cpp 
b/c_glib/arrow-glib/io-memory-mapped-file.cpp
index aa6ae2a..12c9a6c 100644
--- a/c_glib/arrow-glib/io-memory-mapped-file.cpp
+++ b/c_glib/arrow-glib/io-memory-mapped-file.cpp
@@ -29,7 +29,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 

arrow git commit: ARROW-648: [C++] Support multiarch on Debian

2017-03-18 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 16dd87164 -> f5157a0af


ARROW-648: [C++] Support multiarch on Debian

On multiarch enabled Debian, we need to install libraries into
${CMAKE_INSTALL_PREFIX}/lib/${ARCH}/ instead of
${CMAKE_INSTALL_PREFIX}/lib/.

See also: https://wiki.debian.org/Multiarch/HOWTO

Author: Kouhei Sutou 

Closes #398 from kou/debian-support-multiarch and squashes the following 
commits:

f5c8495 [Kouhei Sutou] [C++] Fix missing "${prefix}/" in .pc.in
8da48f6 [Kouhei Sutou] [C++] Support multiarch on Debian


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f5157a0a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f5157a0a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f5157a0a

Branch: refs/heads/master
Commit: f5157a0af7046a618f159a5d0693a664f45658d7
Parents: 16dd871
Author: Kouhei Sutou 
Authored: Sat Mar 18 18:29:12 2017 +0100
Committer: Uwe L. Korn 
Committed: Sat Mar 18 18:29:12 2017 +0100

--
 cpp/CMakeLists.txt  | 1 +
 cpp/cmake_modules/BuildUtils.cmake  | 8 
 cpp/src/arrow/CMakeLists.txt| 4 ++--
 cpp/src/arrow/arrow.pc.in   | 2 +-
 cpp/src/arrow/io/CMakeLists.txt | 2 +-
 cpp/src/arrow/io/arrow-io.pc.in | 2 +-
 cpp/src/arrow/ipc/CMakeLists.txt| 2 +-
 cpp/src/arrow/ipc/arrow-ipc.pc.in   | 2 +-
 cpp/src/arrow/jemalloc/CMakeLists.txt   | 2 +-
 cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in | 2 +-
 10 files changed, 14 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/f5157a0a/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ecc34e..b39646e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -28,6 +28,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} 
"${CMAKE_SOURCE_DIR}/cmake_modules")
 
 include(CMakeParseArguments)
 include(ExternalProject)
+include(GNUInstallDirs)
 
 set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
 set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")

http://git-wip-us.apache.org/repos/asf/arrow/blob/f5157a0a/cpp/cmake_modules/BuildUtils.cmake
--
diff --git a/cpp/cmake_modules/BuildUtils.cmake 
b/cpp/cmake_modules/BuildUtils.cmake
index 2da8a05..9e14838 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -68,8 +68,8 @@ function(ADD_ARROW_LIB LIB_NAME)
 endif()
   
 install(TARGETS ${LIB_NAME}_shared
-  LIBRARY DESTINATION lib
-  ARCHIVE DESTINATION lib)
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
   endif()
   
   if (ARROW_BUILD_STATIC)
@@ -84,8 +84,8 @@ function(ADD_ARROW_LIB LIB_NAME)
   LINK_PRIVATE ${ARG_STATIC_PRIVATE_LINK_LIBS})
   
   install(TARGETS ${LIB_NAME}_static
-  LIBRARY DESTINATION lib
-  ARCHIVE DESTINATION lib)
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
   endif()
   
   if (APPLE)

http://git-wip-us.apache.org/repos/asf/arrow/blob/f5157a0a/cpp/src/arrow/CMakeLists.txt
--
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 0abd4b9..24a9547 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -34,7 +34,7 @@ install(FILES
   type_fwd.h
   type_traits.h
   test-util.h
-  DESTINATION include/arrow)
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow")
 
 # pkg-config support
 configure_file(arrow.pc.in
@@ -42,7 +42,7 @@ configure_file(arrow.pc.in
   @ONLY)
 install(
   FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow.pc"
-  DESTINATION "lib/pkgconfig/")
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
 
 ###
 # Unit tests

http://git-wip-us.apache.org/repos/asf/arrow/blob/f5157a0a/cpp/src/arrow/arrow.pc.in
--
diff --git a/cpp/src/arrow/arrow.pc.in b/cpp/src/arrow/arrow.pc.in
index 5ad429b..1c3f65d 100644
--- a/cpp/src/arrow/arrow.pc.in
+++ b/cpp/src/arrow/arrow.pc.in
@@ -16,7 +16,7 @@
 # under the License.
 
 prefix=@CMAKE_INSTALL_PREFIX@
-libdir=${prefix}/lib
+libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
 includedir=${prefix}/include
 
 Name: Apache Arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/f5157a0a/cpp/src/arrow/io/CMakeLists.txt
--
diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt
index c

[2/3] arrow git commit: ARROW-661: [C++] Add LargeRecordBatch metadata type, IPC support, associated refactoring

2017-03-20 Thread uwe
http://git-wip-us.apache.org/repos/asf/arrow/blob/df2220f3/cpp/src/arrow/ipc/metadata-internal.cc
--
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc 
b/cpp/src/arrow/ipc/metadata-internal.cc
deleted file mode 100644
index be0d282..000
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ /dev/null
@@ -1,597 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/ipc/metadata-internal.h"
-
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include "flatbuffers/flatbuffers.h"
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/ipc/Message_generated.h"
-#include "arrow/schema.h"
-#include "arrow/status.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-namespace flatbuf = org::apache::arrow::flatbuf;
-
-namespace ipc {
-
-static Status IntFromFlatbuffer(
-const flatbuf::Int* int_data, std::shared_ptr* out) {
-  if (int_data->bitWidth() > 64) {
-return Status::NotImplemented("Integers with more than 64 bits not 
implemented");
-  }
-  if (int_data->bitWidth() < 8) {
-return Status::NotImplemented("Integers with less than 8 bits not 
implemented");
-  }
-
-  switch (int_data->bitWidth()) {
-case 8:
-  *out = int_data->is_signed() ? int8() : uint8();
-  break;
-case 16:
-  *out = int_data->is_signed() ? int16() : uint16();
-  break;
-case 32:
-  *out = int_data->is_signed() ? int32() : uint32();
-  break;
-case 64:
-  *out = int_data->is_signed() ? int64() : uint64();
-  break;
-default:
-  return Status::NotImplemented("Integers not in cstdint are not 
implemented");
-  }
-  return Status::OK();
-}
-
-static Status FloatFromFlatuffer(
-const flatbuf::FloatingPoint* float_data, std::shared_ptr* out) {
-  if (float_data->precision() == flatbuf::Precision_HALF) {
-*out = float16();
-  } else if (float_data->precision() == flatbuf::Precision_SINGLE) {
-*out = float32();
-  } else {
-*out = float64();
-  }
-  return Status::OK();
-}
-
-// Forward declaration
-static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr& field,
-DictionaryMemo* dictionary_memo, FieldOffset* offset);
-
-static Offset IntToFlatbuffer(FBB& fbb, int bitWidth, bool is_signed) {
-  return flatbuf::CreateInt(fbb, bitWidth, is_signed).Union();
-}
-
-static Offset FloatToFlatbuffer(FBB& fbb, flatbuf::Precision precision) {
-  return flatbuf::CreateFloatingPoint(fbb, precision).Union();
-}
-
-static Status AppendChildFields(FBB& fbb, const std::shared_ptr& 
type,
-std::vector* out_children, DictionaryMemo* dictionary_memo) {
-  FieldOffset field;
-  for (int i = 0; i < type->num_children(); ++i) {
-RETURN_NOT_OK(FieldToFlatbuffer(fbb, type->child(i), dictionary_memo, 
&field));
-out_children->push_back(field);
-  }
-  return Status::OK();
-}
-
-static Status ListToFlatbuffer(FBB& fbb, const std::shared_ptr& type,
-std::vector* out_children, DictionaryMemo* dictionary_memo,
-Offset* offset) {
-  RETURN_NOT_OK(AppendChildFields(fbb, type, out_children, dictionary_memo));
-  *offset = flatbuf::CreateList(fbb).Union();
-  return Status::OK();
-}
-
-static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr& 
type,
-std::vector* out_children, DictionaryMemo* dictionary_memo,
-Offset* offset) {
-  RETURN_NOT_OK(AppendChildFields(fbb, type, out_children, dictionary_memo));
-  *offset = flatbuf::CreateStruct_(fbb).Union();
-  return Status::OK();
-}
-
-// --
-// Union implementation
-
-static Status UnionFromFlatbuffer(const flatbuf::Union* union_data,
-const std::vector>& children, 
std::shared_ptr* out) {
-  UnionMode mode = union_data->mode() == flatbuf::UnionMode_Sparse ? 
UnionMode::SPARSE
-   : 
UnionMode::DENSE;
-
-  std::vector type_codes;
-
-  const flatbuffers::Vector* fb_type_ids = union_data->typeIds();
-  if (fb_type_ids == nullptr) {
-for (uint8_t i = 0; i < children.size(); ++i) {
-  type_codes.push_back(i);
-}
-  } else {
-for (int32_t id : (*fb_type_ids)) {
-  // TODO(wesm): can these val

[1/3] arrow git commit: ARROW-661: [C++] Add LargeRecordBatch metadata type, IPC support, associated refactoring

2017-03-20 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 4c5f79c39 -> df2220f35


http://git-wip-us.apache.org/repos/asf/arrow/blob/df2220f3/cpp/src/arrow/ipc/writer.h
--
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 7aff71e..1271652 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -45,6 +45,40 @@ class OutputStream;
 
 namespace ipc {
 
+// Write the RecordBatch (collection of equal-length Arrow arrays) to the
+// output stream in a contiguous block. The record batch metadata is written as
+// a flatbuffer (see format/Message.fbs -- the RecordBatch message type)
+// prefixed by its size, followed by each of the memory buffers in the batch
+// written end to end (with appropriate alignment and padding):
+//
+//   
+//
+// Finally, the absolute offsets (relative to the start of the output stream)
+// to the end of the body and end of the metadata / data header (suffixed by
+// the header size) is returned in out-variables
+//
+// @param(in) buffer_start_offset: the start offset to use in the buffer 
metadata,
+// default should be 0
+//
+// @param(out) metadata_length: the size of the length-prefixed flatbuffer
+// including padding to a 64-byte boundary
+//
+// @param(out) body_length: the size of the contiguous buffer block plus
+// padding bytes
+Status WriteRecordBatch(const RecordBatch& batch, int64_t buffer_start_offset,
+io::OutputStream* dst, int32_t* metadata_length, int64_t* body_length,
+MemoryPool* pool, int max_recursion_depth = kMaxNestingDepth);
+
+// Write Array as a DictionaryBatch message
+Status WriteDictionary(int64_t dictionary_id, const std::shared_ptr& 
dictionary,
+int64_t buffer_start_offset, io::OutputStream* dst, int32_t* 
metadata_length,
+int64_t* body_length, MemoryPool* pool);
+
+// Compute the precise number of bytes needed in a contiguous memory segment to
+// write the record batch. This involves generating the complete serialized
+// Flatbuffers metadata.
+Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size);
+
 class ARROW_EXPORT StreamWriter {
  public:
   virtual ~StreamWriter() = default;
@@ -68,10 +102,6 @@ class ARROW_EXPORT StreamWriter {
   std::unique_ptr impl_;
 };
 
-Status WriteFileFooter(const Schema& schema, const std::vector& 
dictionaries,
-const std::vector& record_batches, DictionaryMemo* 
dictionary_memo,
-io::OutputStream* out);
-
 class ARROW_EXPORT FileWriter : public StreamWriter {
  public:
   static Status Open(io::OutputStream* sink, const std::shared_ptr& 
schema,
@@ -86,6 +116,14 @@ class ARROW_EXPORT FileWriter : public StreamWriter {
   std::unique_ptr impl_;
 };
 
+// --
+
+/// EXPERIMENTAL: Write record batch using LargeRecordBatch IPC metadata. This
+/// data may not be readable by all Arrow implementations
+Status WriteLargeRecordBatch(const RecordBatch& batch, int64_t 
buffer_start_offset,
+io::OutputStream* dst, int32_t* metadata_length, int64_t* body_length,
+MemoryPool* pool, int max_recursion_depth = kMaxNestingDepth);
+
 }  // namespace ipc
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/df2220f3/cpp/src/arrow/loader.h
--
diff --git a/cpp/src/arrow/loader.h b/cpp/src/arrow/loader.h
index f116d64..9b650e2 100644
--- a/cpp/src/arrow/loader.h
+++ b/cpp/src/arrow/loader.h
@@ -41,11 +41,36 @@ struct DataType;
 constexpr int kMaxNestingDepth = 64;
 
 struct ARROW_EXPORT FieldMetadata {
+  FieldMetadata() {}
+  FieldMetadata(int64_t length, int64_t null_count, int64_t offset)
+  : length(length), null_count(null_count), offset(offset) {}
+
+  FieldMetadata(const FieldMetadata& other) {
+this->length = other.length;
+this->null_count = other.null_count;
+this->offset = other.offset;
+  }
+
   int64_t length;
   int64_t null_count;
   int64_t offset;
 };
 
+struct ARROW_EXPORT BufferMetadata {
+  BufferMetadata() {}
+  BufferMetadata(int32_t page, int64_t offset, int64_t length)
+  : page(page), offset(offset), length(length) {}
+
+  /// The shared memory page id where to find this. Set to -1 if unused
+  int32_t page;
+
+  /// The relative offset into the memory page to the starting byte of the 
buffer
+  int64_t offset;
+
+  /// Absolute length in bytes of the buffer
+  int64_t length;
+};
+
 /// Implement this to create new types of Arrow data loaders
 class ARROW_EXPORT ArrayComponentSource {
  public:

http://git-wip-us.apache.org/repos/asf/arrow/blob/df2220f3/cpp/src/arrow/type.h
--
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index a143d79..adc3161 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -222,6 +222,7 @@ struct ARROW_EXPORT Field {
 
   std::string ToString() const;
 };
+
 typedef std::s

[3/3] arrow git commit: ARROW-661: [C++] Add LargeRecordBatch metadata type, IPC support, associated refactoring

2017-03-20 Thread uwe
ARROW-661: [C++] Add LargeRecordBatch metadata type, IPC support, associated 
refactoring

This patch enables the following code for writing record batches exceeding 2^31 
- 1

```c++
RETURN_NOT_OK(WriteLargeRecordBatch(
batch, buffer_offset, mmap_.get(), &metadata_length, &body_length, pool_));
return ReadLargeRecordBatch(batch.schema(), 0, mmap_.get(), result);
```

This also does a fair amount of refactoring and code consolidation related to 
ongoing code cleaning in arrow_ipc.

These APIs are marked experimental. This does add `LargeRecordBatch` flatbuffer 
type to the Message union, but I've indicated that Arrow implementations (e.g. 
Java) are not required to implement this type. It's strictly to enable C++ 
users to write very large datasets that have been embedded for convenience in 
Arrow's structured data model.

cc @pcmoritz @robertnishihara

Author: Wes McKinney 

Closes #404 from wesm/ARROW-661 and squashes the following commits:

9c18a95 [Wes McKinney] Fix import ordering
d7811f2 [Wes McKinney] cpplint
179a1e3 [Wes McKinney] Add unit test for large record batches. Use bytewise 
comparisons with aligned bitmaps
36c3862 [Wes McKinney] Get LargeRecordBatch round trip working. Add to Message 
union for now
4c1d08c [Wes McKinney] Refactoring, failing test fixture for large record batch
f4c8830 [Wes McKinney] Consolidate ipc-metadata-test and ipc-read-write-test 
and draft large record batch read/write path
85d1a1c [Wes McKinney] Add (untested) metadata writer for LargeRecordBatch
0f2722c [Wes McKinney] Consolidate metadata-internal.h into metadata.h. Use own 
Arrow structs for IPC metadata and convert to flatbuffers later
e8f8973 [Wes McKinney] Split adapter.h/cc into reader.h/writer.h. Draft 
LargeRecordBatch type


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/df2220f3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/df2220f3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/df2220f3

Branch: refs/heads/master
Commit: df2220f350282925a454ed911eed6618e4d53969
Parents: 4c5f79c
Author: Wes McKinney 
Authored: Mon Mar 20 10:48:34 2017 +0100
Committer: Uwe L. Korn 
Committed: Mon Mar 20 10:48:34 2017 +0100

--
 cpp/src/arrow/allocator-test.cc  |   1 +
 cpp/src/arrow/allocator.h|   1 +
 cpp/src/arrow/io/test-common.h   |  18 +
 cpp/src/arrow/ipc/CMakeLists.txt |  15 +-
 cpp/src/arrow/ipc/adapter.cc | 630 ---
 cpp/src/arrow/ipc/adapter.h  | 104 
 cpp/src/arrow/ipc/api.h  |   1 -
 cpp/src/arrow/ipc/ipc-adapter-test.cc| 320 
 cpp/src/arrow/ipc/ipc-file-test.cc   | 228 -
 cpp/src/arrow/ipc/ipc-metadata-test.cc   | 100 
 cpp/src/arrow/ipc/ipc-read-write-test.cc | 608 ++
 cpp/src/arrow/ipc/metadata-internal.cc   | 597 --
 cpp/src/arrow/ipc/metadata-internal.h|  83 ---
 cpp/src/arrow/ipc/metadata.cc| 692 +-
 cpp/src/arrow/ipc/metadata.h |  40 +-
 cpp/src/arrow/ipc/reader.cc  | 171 ++-
 cpp/src/arrow/ipc/reader.h   |  22 +
 cpp/src/arrow/ipc/test-common.h  |   2 +-
 cpp/src/arrow/ipc/writer.cc  | 544 ++--
 cpp/src/arrow/ipc/writer.h   |  46 +-
 cpp/src/arrow/loader.h   |  25 +
 cpp/src/arrow/type.h |   1 +
 cpp/src/arrow/util/bit-util.cc   |  16 +-
 format/Message.fbs   |  22 +-
 24 files changed, 2131 insertions(+), 2156 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/df2220f3/cpp/src/arrow/allocator-test.cc
--
diff --git a/cpp/src/arrow/allocator-test.cc b/cpp/src/arrow/allocator-test.cc
index 0b24267..811ef5a 100644
--- a/cpp/src/arrow/allocator-test.cc
+++ b/cpp/src/arrow/allocator-test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "gtest/gtest.h"
+
 #include "arrow/allocator.h"
 #include "arrow/test-util.h"
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/df2220f3/cpp/src/arrow/allocator.h
--
diff --git a/cpp/src/arrow/allocator.h b/cpp/src/arrow/allocator.h
index c976ba9..e00023d 100644
--- a/cpp/src/arrow/allocator.h
+++ b/cpp/src/arrow/allocator.h
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/df2220f3/cpp/src/arrow/io/test-common.h
--
diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/i

arrow git commit: ARROW-664: [C++] Make C++ Arrow serialization deterministic

2017-03-20 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master df2220f35 -> cd4544df8


ARROW-664: [C++] Make C++ Arrow serialization deterministic

Author: Philipp Moritz 

Closes #405 from pcmoritz/init-buffer-builder and squashes the following 
commits:

10a897f [Philipp Moritz] Initialize memory obtained by BufferBuilder to zero


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/cd4544df
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/cd4544df
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/cd4544df

Branch: refs/heads/master
Commit: cd4544df89b60641f49bbb3104043c0ae07ef8a9
Parents: df2220f
Author: Philipp Moritz 
Authored: Mon Mar 20 10:54:57 2017 +0100
Committer: Uwe L. Korn 
Committed: Mon Mar 20 10:54:57 2017 +0100

--
 cpp/src/arrow/buffer.h | 4 
 1 file changed, 4 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/cd4544df/cpp/src/arrow/buffer.h
--
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 1647e86..70c16a2 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -170,9 +170,13 @@ class ARROW_EXPORT BufferBuilder {
 // Resize(0) is a no-op
 if (elements == 0) { return Status::OK(); }
 if (capacity_ == 0) { buffer_ = std::make_shared(pool_); }
+int64_t old_capacity = capacity_;
 RETURN_NOT_OK(buffer_->Resize(elements));
 capacity_ = buffer_->capacity();
 data_ = buffer_->mutable_data();
+if (capacity_ > old_capacity) {
+  memset(data_ + old_capacity, 0, capacity_ - old_capacity);
+}
 return Status::OK();
   }
 



arrow git commit: ARROW-502 [C++/Python]: Logging memory pool

2017-03-20 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master cd4544df8 -> 02bdbf48a


ARROW-502 [C++/Python]: Logging memory pool

This is a simple decorator on MemoryPool that logs it call to ``std::cout``. I 
can improve it later if you need to log to other supports. Are you ok with the 
current logging format ?

Also, I'm not a cython expert so I hope the implementation of 
``CLoggingMemoryPool`` is correct.

Author: Johan Mabille 

Closes #395 from JohanMabille/memory_pool and squashes the following commits:

aa8ad5f [Johan Mabille] cython fix
f70e78a [Johan Mabille] python logging memory pool
9d1d144 [Johan Mabille] formatting
8f9164c [Johan Mabille] Logging memory pool


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/02bdbf48
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/02bdbf48
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/02bdbf48

Branch: refs/heads/master
Commit: 02bdbf48a483b224ebfd61cf9be69cb0807e6e50
Parents: cd4544d
Author: Johan Mabille 
Authored: Mon Mar 20 10:57:57 2017 +0100
Committer: Uwe L. Korn 
Committed: Mon Mar 20 10:57:57 2017 +0100

--
 cpp/src/arrow/memory_pool-test.cc| 17 
 cpp/src/arrow/memory_pool.cc | 32 +++
 cpp/src/arrow/memory_pool.h  | 18 +
 python/pyarrow/includes/libarrow.pxd |  3 +++
 python/pyarrow/memory.pxd|  5 -
 python/pyarrow/memory.pyx|  5 -
 6 files changed, 78 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/02bdbf48/cpp/src/arrow/memory_pool-test.cc
--
diff --git a/cpp/src/arrow/memory_pool-test.cc 
b/cpp/src/arrow/memory_pool-test.cc
index 6ab73fb..8a185ab 100644
--- a/cpp/src/arrow/memory_pool-test.cc
+++ b/cpp/src/arrow/memory_pool-test.cc
@@ -78,4 +78,21 @@ TEST(DefaultMemoryPoolDeathTest, MaxMemory) {
 
 #endif  // ARROW_VALGRIND
 
+TEST(LoggingMemoryPool, Logging) {
+  DefaultMemoryPool pool;
+  LoggingMemoryPool lp(&pool);
+
+  ASSERT_EQ(0, lp.max_memory());
+
+  uint8_t* data;
+  ASSERT_OK(pool.Allocate(100, &data));
+
+  uint8_t* data2;
+  ASSERT_OK(pool.Allocate(100, &data2));
+
+  pool.Free(data, 100);
+  pool.Free(data2, 100);
+
+  ASSERT_EQ(200, pool.max_memory());
+}
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/02bdbf48/cpp/src/arrow/memory_pool.cc
--
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 5a63027..cf01a02 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
@@ -134,4 +135,35 @@ MemoryPool* default_memory_pool() {
   return &default_memory_pool_;
 }
 
+LoggingMemoryPool::LoggingMemoryPool(MemoryPool* pool) : pool_(pool) {}
+
+Status LoggingMemoryPool::Allocate(int64_t size, uint8_t** out) {
+  Status s = pool_->Allocate(size, out);
+  std::cout << "Allocate: size = " << size << " - out = " << *out << std::endl;
+  return s;
+}
+
+Status LoggingMemoryPool::Reallocate(int64_t old_size, int64_t new_size, 
uint8_t** ptr) {
+  Status s = pool_->Reallocate(old_size, new_size, ptr);
+  std::cout << "Reallocate: old_size = " << old_size << " - new_size = " << 
new_size
+<< " - ptr = " << *ptr << std::endl;
+  return s;
+}
+
+void LoggingMemoryPool::Free(uint8_t* buffer, int64_t size) {
+  pool_->Free(buffer, size);
+  std::cout << "Free: buffer = " << buffer << " - size = " << size << 
std::endl;
+}
+
+int64_t LoggingMemoryPool::bytes_allocated() const {
+  int64_t nb_bytes = pool_->bytes_allocated();
+  std::cout << "bytes_allocated: " << nb_bytes << std::endl;
+  return nb_bytes;
+}
+
+int64_t LoggingMemoryPool::max_memory() const {
+  int64_t mem = pool_->max_memory();
+  std::cout << "max_memory: " << mem << std::endl;
+  return mem;
+}
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/02bdbf48/cpp/src/arrow/memory_pool.h
--
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index 0edfda6..90bc593 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -89,6 +89,24 @@ class ARROW_EXPORT DefaultMemoryPool : public MemoryPool {
   std::atomic max_memory_;
 };
 
+class ARROW_EXPORT LoggingMemoryPool : public M

arrow git commit: ARROW-671: [GLib] Install missing license file

2017-03-20 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 02bdbf48a -> 6cd82c2a2


ARROW-671: [GLib] Install missing license file

Author: Kouhei Sutou 

Closes #406 from kou/glib-install-missing-license-file and squashes the 
following commits:

8e452d4 [Kouhei Sutou] [GLib] Install missing license file


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/6cd82c2a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/6cd82c2a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/6cd82c2a

Branch: refs/heads/master
Commit: 6cd82c2a294562d1d16a4767b32f072056f396a3
Parents: 02bdbf4
Author: Kouhei Sutou 
Authored: Mon Mar 20 17:55:56 2017 +0100
Committer: Uwe L. Korn 
Committed: Mon Mar 20 17:55:56 2017 +0100

--
 c_glib/.gitignore  | 1 +
 c_glib/Makefile.am | 6 ++
 c_glib/autogen.sh  | 2 ++
 3 files changed, 9 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/6cd82c2a/c_glib/.gitignore
--
diff --git a/c_glib/.gitignore b/c_glib/.gitignore
index 38e33a2..e57a059 100644
--- a/c_glib/.gitignore
+++ b/c_glib/.gitignore
@@ -8,6 +8,7 @@ Makefile.in
 *.lo
 *.la
 *~
+/LICENSE.txt
 /*.tar.gz
 /aclocal.m4
 /autom4te.cache/

http://git-wip-us.apache.org/repos/asf/arrow/blob/6cd82c2a/c_glib/Makefile.am
--
diff --git a/c_glib/Makefile.am b/c_glib/Makefile.am
index 076f9be..c078b08 100644
--- a/c_glib/Makefile.am
+++ b/c_glib/Makefile.am
@@ -23,4 +23,10 @@ SUBDIRS =\
example
 
 EXTRA_DIST =   \
+   README.md   \
+   LICENSE.txt \
version
+
+doc_DATA = \
+   README.md   \
+   LICENSE.txt

http://git-wip-us.apache.org/repos/asf/arrow/blob/6cd82c2a/c_glib/autogen.sh
--
diff --git a/c_glib/autogen.sh b/c_glib/autogen.sh
index 08e33e6..6e2036d 100755
--- a/c_glib/autogen.sh
+++ b/c_glib/autogen.sh
@@ -25,6 +25,8 @@ ruby \
 ../java/pom.xml > \
 version
 
+cp ../LICENSE.txt ./
+
 mkdir -p m4
 
 gtkdocize --copy --docdir doc/reference



arrow git commit: ARROW-689: [GLib] Fix install directories

2017-03-22 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 1b957dcf1 -> 36103143b


ARROW-689: [GLib] Fix install directories

Header files should be installed into
`${PREFIX}/include/arrow-glib/` instead of
`${PREFIX}/include/apache-arrow-glib/`.

Documents should be installed into
`${PREFIX}/share/doc/arrow-glib/` instead of
`${PREFIX}/share/doc/apache-arrow-glib/`.

We needed to change install directories when we changed `AC_INIT()`'s 3rd
argument to apache-arrow-glib...

Author: Kouhei Sutou 

Closes #421 from kou/glib-fix-install-directory and squashes the following 
commits:

65e5cee [Kouhei Sutou] [GLib] Fix install directories


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/36103143
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/36103143
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/36103143

Branch: refs/heads/master
Commit: 36103143b5975138522f4e54f8b21565a34f6504
Parents: 1b957dc
Author: Kouhei Sutou 
Authored: Wed Mar 22 18:34:52 2017 +0100
Committer: Uwe L. Korn 
Committed: Wed Mar 22 18:34:52 2017 +0100

--
 c_glib/Makefile.am| 3 ++-
 c_glib/arrow-glib/Makefile.am | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/36103143/c_glib/Makefile.am
--
diff --git a/c_glib/Makefile.am b/c_glib/Makefile.am
index c078b08..40e8395 100644
--- a/c_glib/Makefile.am
+++ b/c_glib/Makefile.am
@@ -27,6 +27,7 @@ EXTRA_DIST =  \
LICENSE.txt \
version
 
-doc_DATA = \
+arrow_glib_docdir = ${datarootdir}/doc/arrow-glib
+arrow_glib_doc_DATA =  \
README.md   \
LICENSE.txt

http://git-wip-us.apache.org/repos/asf/arrow/blob/36103143/c_glib/arrow-glib/Makefile.am
--
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index a948007..a72d1e8 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -403,7 +403,8 @@ stamp-ipc-enums.c: $(libarrow_ipc_glib_la_headers) 
ipc-enums.c.template
 $(libarrow_ipc_glib_la_headers)) > ipc-enums.c
touch $@
 
-pkginclude_HEADERS =   \
+arrow_glib_includedir = $(includedir)/arrow-glib
+arrow_glib_include_HEADERS =   \
$(libarrow_glib_la_headers) \
$(libarrow_glib_la_cpp_headers) \
$(libarrow_glib_la_generated_headers)   \



arrow git commit: ARROW-704: Fix bad import caused by conflicting changes

2017-03-23 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master e8f6a492d -> 7594492d5


ARROW-704: Fix bad import caused by conflicting changes

Author: Julien Le Dem 

Closes #430 from julienledem/ARROW-704 and squashes the following commits:

2e42330 [Julien Le Dem] ARROW-704: Fix bad import caused by conflicting changes


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/7594492d
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/7594492d
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/7594492d

Branch: refs/heads/master
Commit: 7594492d5105e86d3388c8bac94dab8dbfa5226a
Parents: e8f6a49
Author: Julien Le Dem 
Authored: Thu Mar 23 17:18:35 2017 +0100
Committer: Uwe L. Korn 
Committed: Thu Mar 23 17:18:35 2017 +0100

--
 .../test/java/org/apache/arrow/vector/types/pojo/TestSchema.java  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/7594492d/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
--
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java 
b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
index 9f1b2e0..57af952 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
@@ -22,7 +22,6 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
-import java.util.List;
 
 import org.apache.arrow.vector.types.FloatingPointPrecision;
 import org.apache.arrow.vector.types.IntervalUnit;
@@ -152,7 +151,7 @@ public class TestSchema {
 assertEquals(schema.hashCode(), actual.hashCode());
   }
 
-  private void validateFieldsHashcode(List schemaFields, List 
actualFields) {
+  private void validateFieldsHashcode(java.util.List schemaFields, 
java.util.List actualFields) {
 assertEquals(schemaFields.size(), actualFields.size());
 if (schemaFields.size() == 0) {
   return;



arrow git commit: ARROW-682: [Integration] Check implementations against themselves

2017-03-24 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master dcaa8e5d7 -> 13c12c6ea


ARROW-682: [Integration] Check implementations against themselves

This adds an additional layer of internal consistency checks

Author: Wes McKinney 

Closes #433 from wesm/ARROW-682 and squashes the following commits:

b33ac7a [Wes McKinney] Run integration tests with same implementation producing 
and consuming to validate internal consistency


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/13c12c6e
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/13c12c6e
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/13c12c6e

Branch: refs/heads/master
Commit: 13c12c6ea5e23928268b5c2c7b962d223cca7bd4
Parents: dcaa8e5
Author: Wes McKinney 
Authored: Fri Mar 24 11:54:18 2017 +0100
Committer: Uwe L. Korn 
Committed: Fri Mar 24 11:54:18 2017 +0100

--
 integration/integration_test.py | 56 +++-
 1 file changed, 29 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/13c12c6e/integration/integration_test.py
--
diff --git a/integration/integration_test.py b/integration/integration_test.py
index 5cd63c5..ec2a38d 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -34,10 +34,12 @@ ARROW_HOME = os.path.abspath(__file__).rsplit("/", 2)[0]
 # Control for flakiness
 np.random.seed(12345)
 
+
 def load_version_from_pom():
 import xml.etree.ElementTree as ET
 tree = ET.parse(os.path.join(ARROW_HOME, 'java', 'pom.xml'))
-version_tag = 
list(tree.getroot().findall('{http://maven.apache.org/POM/4.0.0}version'))[0]
+tag_pattern = '{http://maven.apache.org/POM/4.0.0}version'
+version_tag = list(tree.getroot().findall(tag_pattern))[0]
 return version_tag.text
 
 
@@ -596,32 +598,32 @@ class IntegrationRunner(object):
 def run(self):
 for producer, consumer in itertools.product(self.testers,
 self.testers):
-if producer is consumer:
-continue
-
-print('-- {0} producing, {1} consuming'.format(producer.name,
-   consumer.name))
-
-for json_path in self.json_files:
-print('Testing file {0}'.format(json_path))
-
-# Make the random access file
-print('-- Creating binary inputs')
-producer_file_path = os.path.join(self.temp_dir, guid())
-producer.json_to_file(json_path, producer_file_path)
-
-# Validate the file
-print('-- Validating file')
-consumer.validate(json_path, producer_file_path)
-
-print('-- Validating stream')
-producer_stream_path = os.path.join(self.temp_dir, guid())
-consumer_file_path = os.path.join(self.temp_dir, guid())
-producer.file_to_stream(producer_file_path,
-producer_stream_path)
-consumer.stream_to_file(producer_stream_path,
-consumer_file_path)
-consumer.validate(json_path, consumer_file_path)
+self._compare_implementations(producer, consumer)
+
+def _compare_implementations(self, producer, consumer):
+print('-- {0} producing, {1} consuming'.format(producer.name,
+   consumer.name))
+
+for json_path in self.json_files:
+print('Testing file {0}'.format(json_path))
+
+# Make the random access file
+print('-- Creating binary inputs')
+producer_file_path = os.path.join(self.temp_dir, guid())
+producer.json_to_file(json_path, producer_file_path)
+
+# Validate the file
+print('-- Validating file')
+consumer.validate(json_path, producer_file_path)
+
+print('-- Validating stream')
+producer_stream_path = os.path.join(self.temp_dir, guid())
+consumer_file_path = os.path.join(self.temp_dir, guid())
+producer.file_to_stream(producer_file_path,
+producer_stream_path)
+consumer.stream_to_file(producer_stream_path,
+consumer_file_path)
+consumer.validate(json_path, consumer_file_path)
 
 
 class Tester(object):



arrow git commit: ARROW-595: [Python] Set schema attribute on StreamReader

2017-03-24 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 13c12c6ea -> bc185a41a


ARROW-595: [Python] Set schema attribute on StreamReader

Author: Wes McKinney 

Closes #434 from wesm/ARROW-595 and squashes the following commits:

484cc7b [Wes McKinney] Set schema attribute on StreamReader


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/bc185a41
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/bc185a41
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/bc185a41

Branch: refs/heads/master
Commit: bc185a41a239181d255e72bf255a354da4f5dae6
Parents: 13c12c6
Author: Wes McKinney 
Authored: Fri Mar 24 11:58:30 2017 +0100
Committer: Uwe L. Korn 
Committed: Fri Mar 24 11:58:30 2017 +0100

--
 python/pyarrow/io.pyx| 4 ++--
 python/pyarrow/tests/test_ipc.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/bc185a41/python/pyarrow/io.pyx
--
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 17b43de..72e0e0f 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -933,8 +933,8 @@ cdef class _StreamReader:
 with nogil:
 check_status(CStreamReader.Open(in_stream, &self.reader))
 
-schema = Schema()
-schema.init_schema(self.reader.get().schema())
+self.schema = Schema()
+self.schema.init_schema(self.reader.get().schema())
 
 def get_next_batch(self):
 """

http://git-wip-us.apache.org/repos/asf/arrow/blob/bc185a41/python/pyarrow/tests/test_ipc.py
--
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 665a63b..4c9dad1 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -104,6 +104,8 @@ class TestStream(MessagingTest, unittest.TestCase):
 file_contents = self._get_source()
 reader = pa.StreamReader(file_contents)
 
+assert reader.schema.equals(batches[0].schema)
+
 total = 0
 for i, next_batch in enumerate(reader):
 assert next_batch.equals(batches[i])



arrow git commit: ARROW-713: [C++] Fix cmake linking issue in new IPC benchmark

2017-03-26 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 685ebf490 -> ab848f0ea


ARROW-713: [C++] Fix cmake linking issue in new IPC benchmark

Author: Jeff Knupp 

Closes #444 from jeffknupp/master and squashes the following commits:

37aa10f [Jeff Knupp] [C++] ARROW-713: Fix cmake linking issue in new IPC 
benchmark


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ab848f0e
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ab848f0e
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ab848f0e

Branch: refs/heads/master
Commit: ab848f0eab053eeea62d1cf0c0f285db6460da54
Parents: 685ebf4
Author: Jeff Knupp 
Authored: Sun Mar 26 09:19:44 2017 +0200
Committer: Uwe L. Korn 
Committed: Sun Mar 26 09:19:44 2017 +0200

--
 cpp/src/arrow/ipc/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/ab848f0e/cpp/src/arrow/ipc/CMakeLists.txt
--
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index d6ee930..030cba9 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -173,5 +173,5 @@ if (ARROW_BUILD_UTILITIES)
 endif()
 
 ADD_ARROW_BENCHMARK(ipc-read-write-benchmark)
-ARROW_TEST_LINK_LIBRARIES(ipc-read-write-benchmark
+ARROW_BENCHMARK_LINK_LIBRARIES(ipc-read-write-benchmark
   ${ARROW_IPC_TEST_LINK_LIBS})



arrow git commit: ARROW-684: [Python] More helpful error message if libparquet_arrow not built

2017-03-26 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master ab848f0ea -> fd876697f


ARROW-684: [Python] More helpful error message if libparquet_arrow not built

Author: Wes McKinney 

Closes #443 from wesm/ARROW-684 and squashes the following commits:

c18ca81 [Wes McKinney] More helpful error message if libparquet_arrow not built


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/fd876697
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/fd876697
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/fd876697

Branch: refs/heads/master
Commit: fd876697fc37a270a978117f020bf9e07a6c1bad
Parents: ab848f0
Author: Wes McKinney 
Authored: Sun Mar 26 09:21:15 2017 +0200
Committer: Uwe L. Korn 
Committed: Sun Mar 26 09:21:15 2017 +0200

--
 python/cmake_modules/FindParquet.cmake | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/fd876697/python/cmake_modules/FindParquet.cmake
--
diff --git a/python/cmake_modules/FindParquet.cmake 
b/python/cmake_modules/FindParquet.cmake
index 7445e09..a20b651 100644
--- a/python/cmake_modules/FindParquet.cmake
+++ b/python/cmake_modules/FindParquet.cmake
@@ -68,13 +68,21 @@ else ()
   set(PARQUET_ARROW_FOUND FALSE)
 endif ()
 
-if (PARQUET_FOUND)
+if (PARQUET_FOUND AND PARQUET_ARROW_FOUND)
   if (NOT Parquet_FIND_QUIETLY)
 message(STATUS "Found the Parquet library: ${PARQUET_LIBRARIES}")
+message(STATUS "Found the Parquet Arrow library: ${PARQUET_ARROW_LIBS}")
   endif ()
 else ()
   if (NOT Parquet_FIND_QUIETLY)
-set(PARQUET_ERR_MSG "Could not find the Parquet library. Looked in ")
+if (NOT PARQUET_FOUND)
+  set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} Could not find the parquet 
library.")
+endif()
+
+if (NOT PARQUET_ARROW_FOUND)
+  set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} Could not find the parquet_arrow 
library. Did you build with -DPARQUET_ARROW=on?")
+endif()
+set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} Looked in ")
 if ( _parquet_roots )
   set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} in ${_parquet_roots}.")
 else ()
@@ -88,12 +96,6 @@ else ()
   endif ()
 endif ()
 
-if (PARQUET_ARROW_FOUND)
-  if (NOT Parquet_FIND_QUIETLY)
-message(STATUS "Found the Parquet Arrow library: ${PARQUET_ARROW_LIBS}")
-  endif()
-endif()
-
 mark_as_advanced(
   PARQUET_FOUND
   PARQUET_INCLUDE_DIR



arrow git commit: ARROW-747: [C++] Calling add_dependencies with dl causes spurious CMake warning

2017-04-02 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 651ea9247 -> baf38e47a


ARROW-747: [C++] Calling add_dependencies with dl causes spurious CMake warning

I added an option to make the dependency targets (e.g. external projects) in 
libraries more explicit.

Author: Wes McKinney 

Closes #472 from wesm/ARROW-747 and squashes the following commits:

c60832f [Wes McKinney] Add DEPENDENCIES argument to ADD_ARROW_LIB to fix 
spurious dl dependency issue


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/baf38e47
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/baf38e47
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/baf38e47

Branch: refs/heads/master
Commit: baf38e47a7d73d87017699304dcbe15f297c9284
Parents: 651ea92
Author: Wes McKinney 
Authored: Sun Apr 2 10:04:11 2017 +0200
Committer: Uwe L. Korn 
Committed: Sun Apr 2 10:04:11 2017 +0200

--
 cpp/CMakeLists.txt|  9 +---
 cpp/cmake_modules/BuildUtils.cmake| 18 ++--
 cpp/src/arrow/io/CMakeLists.txt   |  4 +-
 cpp/src/arrow/ipc/CMakeLists.txt  | 74 +-
 cpp/src/arrow/jemalloc/CMakeLists.txt |  5 ++
 5 files changed, 46 insertions(+), 64 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/baf38e47/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7a5a0e6..aacc7a1 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -654,13 +654,8 @@ if (ARROW_JEMALLOC)
 
   include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR})
   ADD_THIRDPARTY_LIB(jemalloc
-  STATIC_LIB ${JEMALLOC_STATIC_LIB}
-  SHARED_LIB ${JEMALLOC_SHARED_LIB})
-
-  if (JEMALLOC_VENDORED)
-add_dependencies(jemalloc_shared jemalloc_ep)
-add_dependencies(jemalloc_static jemalloc_ep)
-  endif()
+STATIC_LIB ${JEMALLOC_STATIC_LIB}
+SHARED_LIB ${JEMALLOC_SHARED_LIB})
 endif()
 
 ## Google PerfTools

http://git-wip-us.apache.org/repos/asf/arrow/blob/baf38e47/cpp/cmake_modules/BuildUtils.cmake
--
diff --git a/cpp/cmake_modules/BuildUtils.cmake 
b/cpp/cmake_modules/BuildUtils.cmake
index 43d9840..3a3b536 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -85,26 +85,18 @@ endfunction()
 function(ADD_ARROW_LIB LIB_NAME)
   set(options)
   set(one_value_args SHARED_LINK_FLAGS)
-  set(multi_value_args SOURCES STATIC_LINK_LIBS STATIC_PRIVATE_LINK_LIBS 
SHARED_LINK_LIBS SHARED_PRIVATE_LINK_LIBS)
+  set(multi_value_args SOURCES STATIC_LINK_LIBS STATIC_PRIVATE_LINK_LIBS 
SHARED_LINK_LIBS SHARED_PRIVATE_LINK_LIBS DEPENDENCIES)
   cmake_parse_arguments(ARG "${options}" "${one_value_args}" 
"${multi_value_args}" ${ARGN})
   if(ARG_UNPARSED_ARGUMENTS)
 message(SEND_ERROR "Error: unrecognized arguments: 
${ARG_UNPARSED_ARGUMENTS}")
   endif()
 
   add_library(${LIB_NAME}_objlib OBJECT
-  ${ARG_SOURCES}
+${ARG_SOURCES}
   )
-  if (ARG_STATIC_LINK_LIBS)
-add_dependencies(${LIB_NAME}_objlib ${ARG_STATIC_LINK_LIBS})
-  endif()
-  if (ARG_STATIC_PRIVATE_LINK_LIBS)
-add_dependencies(${LIB_NAME}_objlib ${ARG_STATIC_PRIVATE_LINK_LIBS})
-  endif()
-  if (ARG_SHARED_LINK_LIBS)
-add_dependencies(${LIB_NAME}_objlib ${ARG_SHARED_LINK_LIBS})
-  endif()
-  if(ARG_SHARED_PRIVATE_LINK_LIBS)
-add_dependencies(${LIB_NAME}_objlib ${ARG_SHARED_PRIVATE_LINK_LIBS})
+
+  if (ARG_DEPENDENCIES)
+add_dependencies(${LIB_NAME}_objlib ${ARG_DEPENDENCIES})
   endif()
 
   # Necessary to make static linking into other shared libraries work properly

http://git-wip-us.apache.org/repos/asf/arrow/blob/baf38e47/cpp/src/arrow/io/CMakeLists.txt
--
diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt
index 8aabf64..3951eac 100644
--- a/cpp/src/arrow/io/CMakeLists.txt
+++ b/cpp/src/arrow/io/CMakeLists.txt
@@ -48,11 +48,11 @@ if (MSVC)
 else()
   set(ARROW_IO_STATIC_LINK_LIBS
 arrow_static
-dl
+${CMAKE_DL_LIBS}
   )
   set(ARROW_IO_SHARED_LINK_LIBS
 arrow_shared
-dl
+${CMAKE_DL_LIBS}
   )
 endif()
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/baf38e47/cpp/src/arrow/ipc/CMakeLists.txt
--
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 31a04df..5fa7d61 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -26,7 +26,8 @@ set(ARROW_IPC_SHARED_LINK_LIBS
 
 set(ARROW_IPC_TEST_LINK_LIBS
   arrow_ipc_static
-  arrow_io_static)
+  arrow_io_static
+  arrow_static)
 
 set(ARROW_IPC_SRCS
   feather.

arrow git commit: ARROW-749: [Python] Delete partially-written Feather file when column write fails

2017-04-02 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 8f113b4d0 -> 96f3d6176


ARROW-749: [Python] Delete partially-written Feather file when column write 
fails

This is currently the only place where we are doing an atomic 
create-file/write-file. We should be mindful of other serialization functions 
which may yield unreadable files in the future.

Author: Wes McKinney 

Closes #484 from wesm/ARROW-749 and squashes the following commits:

137e235 [Wes McKinney] Delete partially-written Feather file when column write 
fails


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/96f3d617
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/96f3d617
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/96f3d617

Branch: refs/heads/master
Commit: 96f3d6176d8c95717f4ff45e4226161de3168b05
Parents: 8f113b4
Author: Wes McKinney 
Authored: Mon Apr 3 08:43:47 2017 +0200
Committer: Uwe L. Korn 
Committed: Mon Apr 3 08:43:47 2017 +0200

--
 python/pyarrow/feather.py| 79 ---
 python/pyarrow/tests/test_feather.py | 16 +++
 2 files changed, 67 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/96f3d617/python/pyarrow/feather.py
--
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index f87c7f3..3b5716e 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -15,8 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import six
 from distutils.version import LooseVersion
+import os
+
+import six
 import pandas as pd
 
 from pyarrow.compat import pdapi
@@ -54,45 +56,66 @@ class FeatherReader(ext.FeatherReader):
 return table.to_pandas()
 
 
-def write_feather(df, dest):
-'''
-Write a pandas.DataFrame to Feather format
-'''
-writer = ext.FeatherWriter()
-writer.open(dest)
+class FeatherWriter(object):
 
-if isinstance(df, pd.SparseDataFrame):
-df = df.to_dense()
+def __init__(self, dest):
+self.dest = dest
+self.writer = ext.FeatherWriter()
+self.writer.open(dest)
 
-if not df.columns.is_unique:
-raise ValueError("cannot serialize duplicate column names")
+def write(self, df):
+if isinstance(df, pd.SparseDataFrame):
+df = df.to_dense()
 
-# TODO(wesm): pipeline conversion to Arrow memory layout
-for i, name in enumerate(df.columns):
-col = df.iloc[:, i]
+if not df.columns.is_unique:
+raise ValueError("cannot serialize duplicate column names")
 
-if pdapi.is_object_dtype(col):
-inferred_type = pd.lib.infer_dtype(col)
-msg = ("cannot serialize column {n} "
-   "named {name} with dtype {dtype}".format(
-   n=i, name=name, dtype=inferred_type))
+# TODO(wesm): pipeline conversion to Arrow memory layout
+for i, name in enumerate(df.columns):
+col = df.iloc[:, i]
 
-if inferred_type in ['mixed']:
+if pdapi.is_object_dtype(col):
+inferred_type = pd.lib.infer_dtype(col)
+msg = ("cannot serialize column {n} "
+   "named {name} with dtype {dtype}".format(
+   n=i, name=name, dtype=inferred_type))
 
-# allow columns with nulls + an inferable type
-inferred_type = pd.lib.infer_dtype(col[col.notnull()])
 if inferred_type in ['mixed']:
+
+# allow columns with nulls + an inferable type
+inferred_type = pd.lib.infer_dtype(col[col.notnull()])
+if inferred_type in ['mixed']:
+raise ValueError(msg)
+
+elif inferred_type not in ['unicode', 'string']:
 raise ValueError(msg)
 
-elif inferred_type not in ['unicode', 'string']:
-raise ValueError(msg)
+if not isinstance(name, six.string_types):
+name = str(name)
 
-if not isinstance(name, six.string_types):
-name = str(name)
+self.writer.write_array(name, col)
 
-writer.write_array(name, col)
+self.writer.close()
 
-writer.close()
+
+def write_feather(df, dest):
+'''
+Write a pandas.DataFrame to Feather format
+'''
+writer = FeatherWriter(dest)
+try:
+writer.write(df)
+except:
+# Try to make sure the resource is closed
+import gc
+ 

arrow git commit: ARROW-656: [C++] Add random access writer for a mutable buffer. Rename WriteableFileInterface to WriteableFile for better consistency

2017-04-03 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master d0cd03d78 -> d560e3077


ARROW-656: [C++] Add random access writer for a mutable buffer. Rename 
WriteableFileInterface to WriteableFile for better consistency

Author: Wes McKinney 

Closes #486 from wesm/ARROW-656 and squashes the following commits:

be0d4bc [Wes McKinney] Fix glib after renaming class
042f533 [Wes McKinney] Add random access writer for a mutable buffer. Rename 
WriteableFileInterface to WriteableFile for better consistency


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d560e307
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d560e307
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d560e307

Branch: refs/heads/master
Commit: d560e307749a2397810962db1a5af4fb65675f17
Parents: d0cd03d
Author: Wes McKinney 
Authored: Tue Apr 4 08:40:40 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Apr 4 08:40:40 2017 +0200

--
 c_glib/arrow-glib/io-memory-mapped-file.cpp |  2 +-
 c_glib/arrow-glib/io-writeable-file.cpp |  2 +-
 c_glib/arrow-glib/io-writeable-file.h   |  2 +-
 c_glib/arrow-glib/io-writeable-file.hpp |  8 ++---
 cpp/src/arrow/io/interfaces.h   |  6 ++--
 cpp/src/arrow/io/io-memory-test.cc  | 27 ++
 cpp/src/arrow/io/memory.cc  | 45 
 cpp/src/arrow/io/memory.h   | 23 
 python/pyarrow/includes/libarrow.pxd|  4 +--
 9 files changed, 107 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/d560e307/c_glib/arrow-glib/io-memory-mapped-file.cpp
--
diff --git a/c_glib/arrow-glib/io-memory-mapped-file.cpp 
b/c_glib/arrow-glib/io-memory-mapped-file.cpp
index 12c9a6c..e2e255c 100644
--- a/c_glib/arrow-glib/io-memory-mapped-file.cpp
+++ b/c_glib/arrow-glib/io-memory-mapped-file.cpp
@@ -127,7 +127,7 @@ 
garrow_io_writeable_interface_init(GArrowIOWriteableInterface *iface)
   iface->get_raw = garrow_io_memory_mapped_file_get_raw_writeable_interface;
 }
 
-static std::shared_ptr
+static std::shared_ptr
 
garrow_io_memory_mapped_file_get_raw_writeable_file_interface(GArrowIOWriteableFile
 *file)
 {
   auto memory_mapped_file = GARROW_IO_MEMORY_MAPPED_FILE(file);

http://git-wip-us.apache.org/repos/asf/arrow/blob/d560e307/c_glib/arrow-glib/io-writeable-file.cpp
--
diff --git a/c_glib/arrow-glib/io-writeable-file.cpp 
b/c_glib/arrow-glib/io-writeable-file.cpp
index 3de42dd..41b682a 100644
--- a/c_glib/arrow-glib/io-writeable-file.cpp
+++ b/c_glib/arrow-glib/io-writeable-file.cpp
@@ -76,7 +76,7 @@ garrow_io_writeable_file_write_at(GArrowIOWriteableFile 
*writeable_file,
 
 G_END_DECLS
 
-std::shared_ptr
+std::shared_ptr
 garrow_io_writeable_file_get_raw(GArrowIOWriteableFile *writeable_file)
 {
   auto *iface = GARROW_IO_WRITEABLE_FILE_GET_IFACE(writeable_file);

http://git-wip-us.apache.org/repos/asf/arrow/blob/d560e307/c_glib/arrow-glib/io-writeable-file.h
--
diff --git a/c_glib/arrow-glib/io-writeable-file.h 
b/c_glib/arrow-glib/io-writeable-file.h
index 4a4dee5..d1ebdbe 100644
--- a/c_glib/arrow-glib/io-writeable-file.h
+++ b/c_glib/arrow-glib/io-writeable-file.h
@@ -28,7 +28,7 @@ G_BEGIN_DECLS
 #define GARROW_IO_WRITEABLE_FILE(obj)   \
   (G_TYPE_CHECK_INSTANCE_CAST((obj),\
   GARROW_IO_TYPE_WRITEABLE_FILE,\
-  GArrowIOWriteableFileInterface))
+  GArrowIOWriteableFile))
 #define GARROW_IO_IS_WRITEABLE_FILE(obj)\
   (G_TYPE_CHECK_INSTANCE_TYPE((obj),\
   GARROW_IO_TYPE_WRITEABLE_FILE))

http://git-wip-us.apache.org/repos/asf/arrow/blob/d560e307/c_glib/arrow-glib/io-writeable-file.hpp
--
diff --git a/c_glib/arrow-glib/io-writeable-file.hpp 
b/c_glib/arrow-glib/io-writeable-file.hpp
index 2043007..aba95b2 100644
--- a/c_glib/arrow-glib/io-writeable-file.hpp
+++ b/c_glib/arrow-glib/io-writeable-file.hpp
@@ -24,15 +24,15 @@
 #include 
 
 /**
- * GArrowIOWriteableFileInterface:
+ * GArrowIOWriteableFile:
  *
- * It wraps `arrow::io::WriteableFileInterface`.
+ * It wraps `arrow::io::WriteableFile`.
  */
 struct _GArrowIOWriteableFileInterface
 {
   GTypeInterface parent_iface;
 
-  std::shared_ptr 
(*get_raw)(GArrowIOWriteableFile *file);
+  std::shared_ptr (*get_raw)(GArrowIOWriteableFile 
*file);
 };
 
-std::shared_ptr 
garrow_io_writeable_file_get_raw(GArrowIOWriteableFile *writeable_file)

arrow git commit: ARROW-769: [GLib] Support building without installed Arrow C++

2017-04-04 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master d560e3077 -> ec6188efc


ARROW-769: [GLib] Support building without installed Arrow C++

It doesn't require "make install"-ed Arrow C++ to build Arrow GLib.
But it requires "make"-ed Arrow C++.

This is useful to build packages.

Author: Kouhei Sutou 

Closes #490 from kou/glib-support-build-without-installed-arrow-cpp and 
squashes the following commits:

352999b [Kouhei Sutou] [GLib] Support building without installed Arrow C++


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ec6188ef
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ec6188ef
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ec6188ef

Branch: refs/heads/master
Commit: ec6188efcc884e46481fe986605e3cbfc33c7e07
Parents: d560e30
Author: Kouhei Sutou 
Authored: Tue Apr 4 18:24:07 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Apr 4 18:24:07 2017 +0200

--
 c_glib/configure.ac | 39 ---
 1 file changed, 36 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/ec6188ef/c_glib/configure.ac
--
diff --git a/c_glib/configure.ac b/c_glib/configure.ac
index c691343..fc24c1b 100644
--- a/c_glib/configure.ac
+++ b/c_glib/configure.ac
@@ -61,9 +61,42 @@ AM_PATH_GLIB_2_0([2.32.4], [], [], [gobject])
 GOBJECT_INTROSPECTION_REQUIRE([1.32.1])
 GTK_DOC_CHECK([1.18-2])
 
-PKG_CHECK_MODULES([ARROW], [arrow])
-PKG_CHECK_MODULES([ARROW_IO], [arrow-io])
-PKG_CHECK_MODULES([ARROW_IPC], [arrow-ipc])
+AC_ARG_WITH(arrow-cpp-build-type,
+  [AS_HELP_STRING([--with-arrow-cpp-build-type=TYPE],
+  [-DCMAKE_BUILD_TYPE option value for Arrow C++ 
(default=Release)])],
+  [GARROW_ARROW_CPP_BUILD_TYPE="$withval"],
+  [GARROW_ARROW_CPP_BUILD_TYPE="Release"])
+
+AC_ARG_WITH(arrow-cpp-build-dir,
+  [AS_HELP_STRING([--with-arrow-cpp-build-dir=PATH],
+  [Use this option to build with not installed Arrow C++])],
+  [GARROW_ARROW_CPP_BUILD_DIR="$withval"],
+  [GARROW_ARROW_CPP_BUILD_DIR=""])
+if test "x$GARROW_ARROW_CPP_BUILD_DIR" = "x"; then
+  PKG_CHECK_MODULES([ARROW], [arrow])
+  PKG_CHECK_MODULES([ARROW_IO], [arrow-io])
+  PKG_CHECK_MODULES([ARROW_IPC], [arrow-ipc])
+else
+  ARROW_INCLUDE_DIR="\$(abs_top_srcdir)/../cpp/src"
+  ARROW_LIB_DIR="${GARROW_ARROW_CPP_BUILD_DIR}/${GARROW_ARROW_CPP_BUILD_TYPE}"
+
+  ARROW_CFLAGS="-I${ARROW_INCLUDE_DIR}"
+  ARROW_IO_CFLAGS="-I${ARROW_INCLUDE_DIR}"
+  ARROW_IPC_CFLAGS="-I${ARROW_INCLUDE_DIR}"
+  ARROW_LIBS="-L${ARROW_LIB_DIR} -larrow"
+  ARROW_IO_LIBS="-L${ARROW_LIB_DIR} -larrow_io"
+  ARROW_IPC_LIBS="-L${ARROW_LIB_DIR} -larrow_ipc"
+
+  AC_SUBST(ARROW_LIB_DIR)
+
+  AC_SUBST(ARROW_CFLAGS)
+  AC_SUBST(ARROW_IO_CFLAGS)
+  AC_SUBST(ARROW_IPC_CFLAGS)
+  AC_SUBST(ARROW_LIBS)
+  AC_SUBST(ARROW_IO_LIBS)
+  AC_SUBST(ARROW_IPC_LIBS)
+fi
+
 
 AC_CONFIG_FILES([
   Makefile



arrow git commit: ARROW-770: [C++] Move .clang* files back into cpp source tree

2017-04-04 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 2aed7845f -> 5d6c6ad6a


ARROW-770: [C++] Move .clang* files back into cpp source tree

After ARROW-341, we don't need these files at the top level anymore to get 
clang-format to work on all of our C++ code

Author: Wes McKinney 

Closes #491 from wesm/ARROW-770 and squashes the following commits:

1588a4f [Wes McKinney] Move .clang* files back into cpp source tree


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/5d6c6ad6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/5d6c6ad6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/5d6c6ad6

Branch: refs/heads/master
Commit: 5d6c6ad6a81be6194a4f8349a369a94ef927e18b
Parents: 2aed784
Author: Wes McKinney 
Authored: Tue Apr 4 21:57:13 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Apr 4 21:57:13 2017 +0200

--
 .clang-format  | 65 -
 .clang-tidy| 14 --
 .clang-tidy-ignore |  2 --
 cpp/.clang-format  | 65 +
 cpp/.clang-tidy| 14 ++
 cpp/.clang-tidy-ignore |  2 ++
 6 files changed, 81 insertions(+), 81 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/5d6c6ad6/.clang-format
--
diff --git a/.clang-format b/.clang-format
deleted file mode 100644
index 7d5b3cf..000
--- a/.clang-format
+++ /dev/null
@@ -1,65 +0,0 @@

-Language:Cpp
-# BasedOnStyle:  Google
-AccessModifierOffset: -1
-AlignAfterOpenBracket: false 
-AlignConsecutiveAssignments: false
-AlignEscapedNewlinesLeft: true
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: true 
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: false 
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true 
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-ColumnLimit: 90 
-CommentPragmas:  '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false 
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
-IndentCaseLabels: true
-IndentWidth: 2
-IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1000
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 100
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles:  false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:Cpp11
-TabWidth:8
-UseTab:  Never

http://git-wip-us.apache.org/repos/asf/arrow/blob/5d6c6ad6/.clang-tidy
--
diff --git a/.clang-tidy b/.clang-tidy
deleted file mode 100644
index deaa9bd..000
--- a/.clang-tidy
+++ /dev/null
@@ -1,14 +0,0 @@

-Checks:  
'clang-diagnostic-*,clang-analyzer-*,-clang-analyzer-alpha*,google-.*,modernize-.*,readablity-.*'
-HeaderFilterRegex: 'arrow/.*'
-AnalyzeTemporaryDtors: true
-CheckOptions:
-  - key: 
google-readability-braces-around-statements.ShortStatementLines
-value:   '1'
-  - key: google-readability-function-size.StatementThreshold
-value:   '800'
-  - key: google-readability-namespace-comments.ShortNamespaceLines
-value:   '10'
-  - key: google-readability-namespace-comments.SpacesBeforeComments
-value:   '2'
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/5d6c6ad6/.clang-tidy-ignore
--
diff --git a/.clang-tidy-ignore b/.clang-tidy-ignore
deleted file mode 100644

arrow git commit: ARROW-776: [GLib] Fix wrong type name

2017-04-06 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 56f1e91d2 -> 58fa4c2fc


ARROW-776: [GLib] Fix wrong type name

Author: Kouhei Sutou 

Closes #499 from kou/glib-fix-wrong-type-name and squashes the following 
commits:

105f2f2 [Kouhei Sutou] [GLib] Fix wrong type name


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/58fa4c2f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/58fa4c2f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/58fa4c2f

Branch: refs/heads/master
Commit: 58fa4c2fcc75f763a89b44eeedafade771d342e8
Parents: 56f1e91
Author: Kouhei Sutou 
Authored: Thu Apr 6 20:36:29 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Apr 6 20:36:29 2017 +0200

--
 c_glib/arrow-glib/io-file.h   | 2 +-
 c_glib/arrow-glib/io-input-stream.h   | 2 +-
 c_glib/arrow-glib/io-output-stream.h  | 2 +-
 c_glib/arrow-glib/io-random-access-file.h | 2 +-
 c_glib/arrow-glib/io-readable.h   | 2 +-
 c_glib/arrow-glib/io-writeable.h  | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/58fa4c2f/c_glib/arrow-glib/io-file.h
--
diff --git a/c_glib/arrow-glib/io-file.h b/c_glib/arrow-glib/io-file.h
index 9fa0ec1..7181f6d 100644
--- a/c_glib/arrow-glib/io-file.h
+++ b/c_glib/arrow-glib/io-file.h
@@ -28,7 +28,7 @@ G_BEGIN_DECLS
 #define GARROW_IO_FILE(obj) \
   (G_TYPE_CHECK_INSTANCE_CAST((obj),\
   GARROW_IO_TYPE_FILE,  \
-  GArrowIOFileInterface))
+  GArrowIOFile))
 #define GARROW_IO_IS_FILE(obj)  \
   (G_TYPE_CHECK_INSTANCE_TYPE((obj),\
   GARROW_IO_TYPE_FILE))

http://git-wip-us.apache.org/repos/asf/arrow/blob/58fa4c2f/c_glib/arrow-glib/io-input-stream.h
--
diff --git a/c_glib/arrow-glib/io-input-stream.h 
b/c_glib/arrow-glib/io-input-stream.h
index a7f0681..5790209 100644
--- a/c_glib/arrow-glib/io-input-stream.h
+++ b/c_glib/arrow-glib/io-input-stream.h
@@ -28,7 +28,7 @@ G_BEGIN_DECLS
 #define GARROW_IO_INPUT_STREAM(obj) \
   (G_TYPE_CHECK_INSTANCE_CAST((obj),\
   GARROW_IO_TYPE_INPUT_STREAM,  \
-  GArrowIOInputStreamInterface))
+  GArrowIOInputStream))
 #define GARROW_IO_IS_INPUT_STREAM(obj)  \
   (G_TYPE_CHECK_INSTANCE_TYPE((obj),\
   GARROW_IO_TYPE_INPUT_STREAM))

http://git-wip-us.apache.org/repos/asf/arrow/blob/58fa4c2f/c_glib/arrow-glib/io-output-stream.h
--
diff --git a/c_glib/arrow-glib/io-output-stream.h 
b/c_glib/arrow-glib/io-output-stream.h
index c4079d5..02478ce 100644
--- a/c_glib/arrow-glib/io-output-stream.h
+++ b/c_glib/arrow-glib/io-output-stream.h
@@ -28,7 +28,7 @@ G_BEGIN_DECLS
 #define GARROW_IO_OUTPUT_STREAM(obj)\
   (G_TYPE_CHECK_INSTANCE_CAST((obj),\
   GARROW_IO_TYPE_OUTPUT_STREAM, \
-  GArrowIOOutputStreamInterface))
+  GArrowIOOutputStream))
 #define GARROW_IO_IS_OUTPUT_STREAM(obj) \
   (G_TYPE_CHECK_INSTANCE_TYPE((obj),\
   GARROW_IO_TYPE_OUTPUT_STREAM))

http://git-wip-us.apache.org/repos/asf/arrow/blob/58fa4c2f/c_glib/arrow-glib/io-random-access-file.h
--
diff --git a/c_glib/arrow-glib/io-random-access-file.h 
b/c_glib/arrow-glib/io-random-access-file.h
index e980ab2..8ac63e4 100644
--- a/c_glib/arrow-glib/io-random-access-file.h
+++ b/c_glib/arrow-glib/io-random-access-file.h
@@ -28,7 +28,7 @@ G_BEGIN_DECLS
 #define GARROW_IO_RANDOM_ACCESS_FILE(obj)\
   (G_TYPE_CHECK_INSTANCE_CAST((obj), \
   GARROW_IO_TYPE_RANDOM_ACCESS_FILE, \
-  GArrowIORandomAccessFileInterface))
+  GArrowIORandomAccessFile))
 #define GARROW_IO_IS_RANDOM_ACCESS_FILE(obj)\
   (G_TYPE_CHECK_INSTANCE_TYPE((obj),\
   GARROW_IO_TYPE_RANDOM_ACCESS_FILE))

http://git-wip-us.apache.org/repos/asf/arrow/blob/58fa4c2f/c_glib/arrow-glib/io-readabl

arrow git commit: ARROW-797: [Python] Make more explicitly curated public API page, sphinx cleanup

2017-04-13 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 3d9bfc2ae -> e93436503


ARROW-797: [Python] Make more explicitly curated public API page, sphinx cleanup

Author: Wes McKinney 

Closes #535 from wesm/ARROW-797 and squashes the following commits:

bc344a8 [Wes McKinney] rat warning
fb1d916 [Wes McKinney] build_sphinx target needs extra options
00c6a03 [Wes McKinney] Remove sphinxext until it's actually needed. Add some 
ASF license headers
60d6ab6 [Wes McKinney] Update gitignore
2b9f3f9 [Wes McKinney] Add _static stub
80e4a4b [Wes McKinney] Remove unused options
b662b85 [Wes McKinney] Remove unused options
30ebd05 [Wes McKinney] Cleaning, explicit API index
83e31d5 [Wes McKinney] Initial API doc
d7f4ed7 [Wes McKinney] Add NumPy extensions from pandas


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e9343650
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e9343650
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e9343650

Branch: refs/heads/master
Commit: e9343650355b1820562bfa85d370cac2070b7c92
Parents: 3d9bfc2
Author: Wes McKinney 
Authored: Thu Apr 13 12:46:58 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Apr 13 12:46:58 2017 +0200

--
 ci/travis_script_python.sh |   2 +-
 python/cmake_modules/UseCython.cmake   |   5 +-
 python/doc/.gitignore  |  22 +-
 python/doc/Makefile|   4 +-
 python/doc/conf.py | 377 
 python/doc/filesystems.rst |  58 -
 python/doc/getting_involved.rst|  37 ---
 python/doc/index.rst   |  48 
 python/doc/install.rst | 152 ---
 python/doc/jemalloc.rst|  52 
 python/doc/pandas.rst  | 119 -
 python/doc/parquet.rst |  91 ---
 python/doc/source/_static/stub |  18 ++
 python/doc/source/api.rst  | 153 +++
 python/doc/source/conf.py  | 375 +++
 python/doc/source/filesystems.rst  |  58 +
 python/doc/source/getting_involved.rst |  37 +++
 python/doc/source/index.rst|  48 
 python/doc/source/install.rst  | 152 +++
 python/doc/source/jemalloc.rst |  52 
 python/doc/source/pandas.rst   | 119 +
 python/doc/source/parquet.rst  |  91 +++
 22 files changed, 1128 insertions(+), 942 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/ci/travis_script_python.sh
--
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 604cd13..680eb01 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -123,7 +123,7 @@ python_version_tests() {
   if [[ "$PYTHON_VERSION" == "3.6" ]]
   then
   pip install -r doc/requirements.txt
-  python setup.py build_sphinx
+  python setup.py build_sphinx -s doc/source
   fi
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/cmake_modules/UseCython.cmake
--
diff --git a/python/cmake_modules/UseCython.cmake 
b/python/cmake_modules/UseCython.cmake
index cee6066..7c06b02 100644
--- a/python/cmake_modules/UseCython.cmake
+++ b/python/cmake_modules/UseCython.cmake
@@ -64,7 +64,7 @@ set( CYTHON_NO_DOCSTRINGS OFF
   CACHE BOOL "Strip docstrings from the compiled module." )
 set( CYTHON_FLAGS "" CACHE STRING
   "Extra flags to the cython compiler." )
-mark_as_advanced( CYTHON_ANNOTATE CYTHON_NO_DOCSTRINGS CYTHON_FLAGS )
+mark_as_advanced( CYTHON_ANNOTATE CYTHON_NO_DOCSTRINGS CYTHON_FLAGS)
 
 find_package( Cython REQUIRED )
 find_package( PythonLibsNew REQUIRED )
@@ -131,7 +131,8 @@ function( compile_pyx _name pyx_target_name generated_files 
pyx_file)
   # Add the command to run the compiler.
   add_custom_target(${pyx_target_name}
 COMMAND ${CYTHON_EXECUTABLE} ${cxx_arg} ${include_directory_arg}
-${annotate_arg} ${no_docstrings_arg} ${cython_debug_arg} ${CYTHON_FLAGS}
+${annotate_arg} ${no_docstrings_arg} ${cython_debug_arg}
+${CYTHON_FLAGS}
 --output-file "${_name}.${extension}" ${pyx_location}
 DEPENDS ${pyx_location}
 # do not specify byproducts for now since they don't work with the older

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/.gitignore
--
diff --git a/python/doc/.gitignore b/python/doc/.gitignore
index 87d0413..3bee39f 100644
--- a/python/doc/.gitignore
+++ b/python/doc/.gitignore
@@ -1,3 +1,19 @@
-# auto-generated module documentation
-pyarrow*.rst
-module

[4/4] arrow git commit: ARROW-751: [Python] Make all Cython modules private. Some code tidying

2017-04-13 Thread uwe
ARROW-751: [Python] Make all Cython modules private. Some code tidying

I also combined schema/array/scalar, as they are all interrelated.

Author: Wes McKinney 

Closes #533 from wesm/ARROW-751 and squashes the following commits:

63b479b [Wes McKinney] jemalloc is now private
0f46116 [Wes McKinney] Fix APIs in Parquet
1074e7c [Wes McKinney] Make all Cython modules private. Code cleaning


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8b64a4fb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8b64a4fb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8b64a4fb

Branch: refs/heads/master
Commit: 8b64a4fb2d3973813e2094e108021606034d27f4
Parents: e934365
Author: Wes McKinney 
Authored: Thu Apr 13 12:51:47 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Apr 13 12:51:47 2017 +0200

--
 ci/travis_script_python.sh   |2 +-
 python/CMakeLists.txt|   16 +-
 python/pyarrow/__init__.py   |   84 +-
 python/pyarrow/_array.pxd|  233 +
 python/pyarrow/_array.pyx| 1368 +
 python/pyarrow/_config.pyx   |   54 ++
 python/pyarrow/_error.pxd|   20 +
 python/pyarrow/_error.pyx|   70 ++
 python/pyarrow/_io.pxd   |   50 ++
 python/pyarrow/_io.pyx   | 1273 +++
 python/pyarrow/_jemalloc.pyx |   28 +
 python/pyarrow/_memory.pxd   |   30 +
 python/pyarrow/_memory.pyx   |   52 ++
 python/pyarrow/_parquet.pyx  |   16 +-
 python/pyarrow/_table.pxd|   62 ++
 python/pyarrow/_table.pyx|  913 +++
 python/pyarrow/array.pxd |  141 ---
 python/pyarrow/array.pyx |  646 --
 python/pyarrow/config.pyx|   54 --
 python/pyarrow/error.pxd |   20 -
 python/pyarrow/error.pyx |   70 --
 python/pyarrow/feather.py|6 +-
 python/pyarrow/filesystem.py |2 +-
 python/pyarrow/formatting.py |4 +-
 python/pyarrow/includes/libarrow.pxd |5 +-
 python/pyarrow/io.pxd|   50 --
 python/pyarrow/io.pyx| 1276 ---
 python/pyarrow/ipc.py|   10 +-
 python/pyarrow/jemalloc.pyx  |   28 -
 python/pyarrow/memory.pxd|   30 -
 python/pyarrow/memory.pyx|   52 --
 python/pyarrow/parquet.py|4 +-
 python/pyarrow/scalar.pxd|   72 --
 python/pyarrow/scalar.pyx|  315 ---
 python/pyarrow/schema.pxd|   76 --
 python/pyarrow/schema.pyx|  477 --
 python/pyarrow/table.pxd |   63 --
 python/pyarrow/table.pyx |  915 ---
 python/pyarrow/tests/test_feather.py |2 +-
 python/pyarrow/tests/test_hdfs.py|8 +-
 python/pyarrow/tests/test_io.py  |   31 +-
 python/pyarrow/tests/test_parquet.py |5 +-
 python/pyarrow/tests/test_schema.py  |8 +-
 python/setup.py  |   18 +-
 44 files changed, 4255 insertions(+), 4404 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/8b64a4fb/ci/travis_script_python.sh
--
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 680eb01..549fe11 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -115,7 +115,7 @@ python_version_tests() {
   python setup.py build_ext --inplace --with-parquet --with-jemalloc
 
   python -c "import pyarrow.parquet"
-  python -c "import pyarrow.jemalloc"
+  python -c "import pyarrow._jemalloc"
 
   python -m pytest -vv -r sxX pyarrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/8b64a4fb/python/CMakeLists.txt
--
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3e86521..36052bc 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -261,14 +261,12 @@ if (UNIX)
 endif()
 
 set(CYTHON_EXTENSIONS
-  array
-  config
-  error
-  io
-  memory
-  scalar
-  schema
-  table
+  _array
+  _config
+  _error
+  _io
+  _memory
+  _table
 )
 
 set(LINK_LIBS
@@ -313,7 +311,7 @@ if (PYARROW_BUILD_JEMALLOC)
 arrow_jemalloc_shared)
   set(CYTHON_EXTENSIONS
 ${CYTHON_EXTENSIONS}
-jemalloc)
+_jemalloc)
 endif()
 
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/8b64a4fb/python/pyarrow/__init__.py
--
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index df615b4..66bde49 100644
--- a/python/pyarrow/__init__.py
++

[2/4] arrow git commit: ARROW-751: [Python] Make all Cython modules private. Some code tidying

2017-04-13 Thread uwe
http://git-wip-us.apache.org/repos/asf/arrow/blob/8b64a4fb/python/pyarrow/array.pyx
--
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
deleted file mode 100644
index 1c4253e..000
--- a/python/pyarrow/array.pyx
+++ /dev/null
@@ -1,646 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from cython.operator cimport dereference as deref
-
-import numpy as np
-
-from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.common cimport PyObject_to_object
-cimport pyarrow.includes.pyarrow as pyarrow
-
-import pyarrow.config
-
-from pyarrow.compat import frombytes, tobytes, PandasSeries, Categorical
-from pyarrow.error cimport check_status
-from pyarrow.memory cimport MemoryPool, maybe_unbox_memory_pool
-
-cimport pyarrow.scalar as scalar
-from pyarrow.scalar import NA
-
-from pyarrow.schema cimport (DataType, Field, Schema, DictionaryType,
- FixedSizeBinaryType,
- box_data_type)
-import pyarrow.schema as schema
-
-cimport cpython
-
-
-cdef maybe_coerce_datetime64(values, dtype, DataType type,
- timestamps_to_ms=False):
-
-from pyarrow.compat import DatetimeTZDtype
-
-if values.dtype.type != np.datetime64:
-return values, type
-
-coerce_ms = timestamps_to_ms and values.dtype != 'datetime64[ms]'
-
-if coerce_ms:
-values = values.astype('datetime64[ms]')
-
-if isinstance(dtype, DatetimeTZDtype):
-tz = dtype.tz
-unit = 'ms' if coerce_ms else dtype.unit
-type = schema.timestamp(unit, tz)
-elif type is None:
-# Trust the NumPy dtype
-type = schema.type_from_numpy_dtype(values.dtype)
-
-return values, type
-
-
-cdef class Array:
-
-cdef init(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
-self.ap = sp_array.get()
-self.type = box_data_type(self.sp_array.get().type())
-
-@staticmethod
-def from_numpy(obj, mask=None, DataType type=None,
-   timestamps_to_ms=False,
-   MemoryPool memory_pool=None):
-"""
-Convert pandas.Series to an Arrow Array.
-
-Parameters
---
-series : pandas.Series or numpy.ndarray
-
-mask : pandas.Series or numpy.ndarray, optional
-boolean mask if the object is valid or null
-
-type : pyarrow.DataType
-Explicit type to attempt to coerce to
-
-timestamps_to_ms : bool, optional
-Convert datetime columns to ms resolution. This is needed for
-compatibility with other functionality like Parquet I/O which
-only supports milliseconds.
-
-memory_pool: MemoryPool, optional
-Specific memory pool to use to allocate the resulting Arrow array.
-
-Notes
--
-Localized timestamps will currently be returned as UTC (pandas's native
-representation).  Timezone-naive data will be implicitly interpreted as
-UTC.
-
-Examples
-
-
->>> import pandas as pd
->>> import pyarrow as pa
->>> pa.Array.from_numpy(pd.Series([1, 2]))
-
-[
-  1,
-  2
-]
-
->>> import numpy as np
->>> pa.Array.from_numpy(pd.Series([1, 2]), np.array([0, 1],
-... dtype=bool))
-
-[
-  1,
-  NA
-]
-
-Returns
----
-pyarrow.array.Array
-"""
-cdef:
-shared_ptr[CArray] out
-shared_ptr[CDataType] c_type
-CMemoryPool* pool
-
-if mask is not None:
-mask = get_series_values(mask)
-
-values = get_series_values(obj)
-pool = maybe_unbox_memory_pool(memory_pool)
-
-if isinstance(values, Categorical):
-return DictionaryArray.from_arrays(
-values.codes, values.categories.values,
-mask=mask, memory_pool=memory_pool)
-elif values.dtype == object:
-# Object dtype underg

[3/4] arrow git commit: ARROW-751: [Python] Make all Cython modules private. Some code tidying

2017-04-13 Thread uwe
http://git-wip-us.apache.org/repos/asf/arrow/blob/8b64a4fb/python/pyarrow/_io.pyx
--
diff --git a/python/pyarrow/_io.pyx b/python/pyarrow/_io.pyx
new file mode 100644
index 000..9f067fb
--- /dev/null
+++ b/python/pyarrow/_io.pyx
@@ -0,0 +1,1273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Cython wrappers for IO interfaces defined in arrow::io and messaging in
+# arrow::ipc
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from cython.operator cimport dereference as deref
+from libc.stdlib cimport malloc, free
+from pyarrow.includes.libarrow cimport *
+cimport pyarrow.includes.pyarrow as pyarrow
+from pyarrow._array cimport Array, Tensor, box_tensor, Schema
+from pyarrow._error cimport check_status
+from pyarrow._memory cimport MemoryPool, maybe_unbox_memory_pool
+from pyarrow._table cimport (Column, RecordBatch, batch_from_cbatch,
+ table_from_ctable)
+cimport cpython as cp
+
+import pyarrow._config
+from pyarrow.compat import frombytes, tobytes, encode_file_path
+
+import re
+import six
+import sys
+import threading
+import time
+
+
+# 64K
+DEFAULT_BUFFER_SIZE = 2 ** 16
+
+
+# To let us get a PyObject* and avoid Cython auto-ref-counting
+cdef extern from "Python.h":
+PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"(
+char *v, Py_ssize_t len) except NULL
+
+cdef class NativeFile:
+
+def __cinit__(self):
+self.is_open = False
+self.own_file = False
+
+def __dealloc__(self):
+if self.is_open and self.own_file:
+self.close()
+
+def __enter__(self):
+return self
+
+def __exit__(self, exc_type, exc_value, tb):
+self.close()
+
+def close(self):
+if self.is_open:
+with nogil:
+if self.is_readable:
+check_status(self.rd_file.get().Close())
+else:
+check_status(self.wr_file.get().Close())
+self.is_open = False
+
+cdef read_handle(self, shared_ptr[RandomAccessFile]* file):
+self._assert_readable()
+file[0] =  self.rd_file
+
+cdef write_handle(self, shared_ptr[OutputStream]* file):
+self._assert_writeable()
+file[0] =  self.wr_file
+
+def _assert_readable(self):
+if not self.is_readable:
+raise IOError("only valid on readonly files")
+
+if not self.is_open:
+raise IOError("file not open")
+
+def _assert_writeable(self):
+if not self.is_writeable:
+raise IOError("only valid on writeable files")
+
+if not self.is_open:
+raise IOError("file not open")
+
+def size(self):
+cdef int64_t size
+self._assert_readable()
+with nogil:
+check_status(self.rd_file.get().GetSize(&size))
+return size
+
+def tell(self):
+cdef int64_t position
+with nogil:
+if self.is_readable:
+check_status(self.rd_file.get().Tell(&position))
+else:
+check_status(self.wr_file.get().Tell(&position))
+return position
+
+def seek(self, int64_t position):
+self._assert_readable()
+with nogil:
+check_status(self.rd_file.get().Seek(position))
+
+def write(self, data):
+"""
+Write byte from any object implementing buffer protocol (bytes,
+bytearray, ndarray, pyarrow.Buffer)
+"""
+self._assert_writeable()
+
+if isinstance(data, six.string_types):
+data = tobytes(data)
+
+cdef Buffer arrow_buffer = frombuffer(data)
+
+cdef const uint8_t* buf = arrow_buffer.buffer.get().data()
+cdef int64_t bufsize = len(arrow_buffer)
+with nogil:
+check_status(self.wr_file.get().Write(buf, bufsize))
+
+def read(self, nbytes=None):
+cdef:
+int64_t c_nbytes
+int64_t bytes_read = 0
+PyObject* obj
+
+if nbytes is None:
+c_nbytes = self.size() - self.tell()
+else:
+c_nbytes = nbytes
+
+self._assert_readabl

[1/4] arrow git commit: ARROW-751: [Python] Make all Cython modules private. Some code tidying

2017-04-13 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master e93436503 -> 8b64a4fb2


http://git-wip-us.apache.org/repos/asf/arrow/blob/8b64a4fb/python/pyarrow/schema.pyx
--
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
deleted file mode 100644
index 4749809..000
--- a/python/pyarrow/schema.pyx
+++ /dev/null
@@ -1,477 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-# Data types, fields, schemas, and so forth
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from cython.operator cimport dereference as deref
-
-from pyarrow.compat import frombytes, tobytes
-from pyarrow.array cimport Array
-from pyarrow.error cimport check_status
-from pyarrow.includes.libarrow cimport (CDataType, CStructType, CListType,
-CFixedSizeBinaryType,
-CDecimalType,
-TimeUnit_SECOND, TimeUnit_MILLI,
-TimeUnit_MICRO, TimeUnit_NANO,
-Type, TimeUnit)
-cimport pyarrow.includes.pyarrow as pyarrow
-cimport pyarrow.includes.libarrow as la
-
-cimport cpython
-
-import six
-
-
-cdef class DataType:
-
-def __cinit__(self):
-pass
-
-cdef void init(self, const shared_ptr[CDataType]& type):
-self.sp_type = type
-self.type = type.get()
-
-def __str__(self):
-return frombytes(self.type.ToString())
-
-def __repr__(self):
-return '{0.__class__.__name__}({0})'.format(self)
-
-def __richcmp__(DataType self, DataType other, int op):
-if op == cpython.Py_EQ:
-return self.type.Equals(deref(other.type))
-elif op == cpython.Py_NE:
-return not self.type.Equals(deref(other.type))
-else:
-raise TypeError('Invalid comparison')
-
-
-cdef class DictionaryType(DataType):
-
-cdef void init(self, const shared_ptr[CDataType]& type):
-DataType.init(self, type)
-self.dict_type =  type.get()
-
-
-cdef class TimestampType(DataType):
-
-cdef void init(self, const shared_ptr[CDataType]& type):
-DataType.init(self, type)
-self.ts_type =  type.get()
-
-property unit:
-
-def __get__(self):
-return timeunit_to_string(self.ts_type.unit())
-
-property tz:
-
-def __get__(self):
-if self.ts_type.timezone().size() > 0:
-return frombytes(self.ts_type.timezone())
-else:
-return None
-
-
-cdef class FixedSizeBinaryType(DataType):
-
-cdef void init(self, const shared_ptr[CDataType]& type):
-DataType.init(self, type)
-self.fixed_size_binary_type =  type.get()
-
-property byte_width:
-
-def __get__(self):
-return self.fixed_size_binary_type.byte_width()
-
-
-cdef class DecimalType(FixedSizeBinaryType):
-
-cdef void init(self, const shared_ptr[CDataType]& type):
-DataType.init(self, type)
-self.decimal_type =  type.get()
-
-
-cdef class Field:
-
-def __cinit__(self):
-pass
-
-cdef init(self, const shared_ptr[CField]& field):
-self.sp_field = field
-self.field = field.get()
-self.type = box_data_type(field.get().type())
-
-@classmethod
-def from_py(cls, object name, DataType type, bint nullable=True):
-cdef Field result = Field()
-result.type = type
-result.sp_field.reset(new CField(tobytes(name), type.sp_type,
- nullable))
-result.field = result.sp_field.get()
-
-return result
-
-def __repr__(self):
-return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
-
-property nullable:
-
-def __get__(self):
-return self.field.nullable()
-
-property name:
-
-def __get__(self):
-if box_field(self.sp_field) is None:
-raise ReferenceError(
-'Field not initialized (references NULL pointer)')
-return frombytes(self.field.name())
-
-
-cde

arrow git commit: ARROW-828: [C++] Add new dependency to README

2017-04-15 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 88c351abc -> 4b030dd0e


ARROW-828: [C++] Add new dependency to README

`libboost-regex-dev` is required to build on Ubuntu; added to `apt` install 
command.

Author: Jeff Knupp 

Closes #545 from jeffknupp/master and squashes the following commits:

b527ebb [Jeff Knupp] Add new dependency to README


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4b030dd0
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4b030dd0
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4b030dd0

Branch: refs/heads/master
Commit: 4b030dd0ea193eeb60644518f897ec966eb6b720
Parents: 88c351a
Author: Jeff Knupp 
Authored: Sat Apr 15 11:09:51 2017 +0200
Committer: Uwe L. Korn 
Committed: Sat Apr 15 11:09:51 2017 +0200

--
 cpp/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/4b030dd0/cpp/README.md
--
diff --git a/cpp/README.md b/cpp/README.md
index b19fa00..339b6b4 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -31,6 +31,7 @@ On Ubuntu/Debian you can install the requirements with:
 sudo apt-get install cmake \
  libboost-dev \
  libboost-filesystem-dev \
+ libboost-regex-dev \
  libboost-system-dev
 ```
 
@@ -126,4 +127,4 @@ both of these options would be used rarely.  Current known 
uses-cases whent hey
 *  Parameterized tests in google test.
 
 [1]: https://brew.sh/
-[2]: https://github.com/apache/arrow/blob/master/cpp/doc/Windows.md
\ No newline at end of file
+[2]: https://github.com/apache/arrow/blob/master/cpp/doc/Windows.md



arrow git commit: ARROW-820: [C++] Build dependencies for Parquet library without arrow…

2017-04-15 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 4b030dd0e -> ce5b98e1d


ARROW-820: [C++] Build dependencies for Parquet library without arrow…

… support

Author: Deepak Majeti 

Closes #538 from majetideepak/ARROW-820 and squashes the following commits:

10ca617 [Deepak Majeti] Revert HDFS change
f399ab5 [Deepak Majeti] Add flags for ARROW_IPC and ARROW_HDFS
add683a [Deepak Majeti] ARROW-820: [C++] Build dependencies for Parquet library 
without arrow support


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ce5b98e1
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ce5b98e1
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ce5b98e1

Branch: refs/heads/master
Commit: ce5b98e1d8254219419220c42e45959ca1aeac21
Parents: 4b030dd
Author: Deepak Majeti 
Authored: Sat Apr 15 11:27:46 2017 +0200
Committer: Uwe L. Korn 
Committed: Sat Apr 15 11:27:46 2017 +0200

--
 cpp/CMakeLists.txt | 107 ++--
 1 file changed, 57 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/ce5b98e1/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 0e4a4bb..83610d3 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -621,27 +621,49 @@ if(ARROW_BUILD_BENCHMARKS)
   endif()
 endif()
 
-# RapidJSON, header only dependency
-if("$ENV{RAPIDJSON_HOME}" STREQUAL "")
-  ExternalProject_Add(rapidjson_ep
-PREFIX "${CMAKE_BINARY_DIR}"
-URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz";
-URL_MD5 "badd12c511e081fec6c89c43a7027bce"
-CONFIGURE_COMMAND ""
-BUILD_COMMAND ""
-BUILD_IN_SOURCE 1
-INSTALL_COMMAND "")
-
-  ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
-  set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
-  set(RAPIDJSON_VENDORED 1)
-else()
-  set(RAPIDJSON_INCLUDE_DIR "$ENV{RAPIDJSON_HOME}/include")
-  set(RAPIDJSON_VENDORED 0)
-endif()
-message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
-include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
+if (ARROW_IPC)
+  # RapidJSON, header only dependency
+  if("$ENV{RAPIDJSON_HOME}" STREQUAL "")
+ExternalProject_Add(rapidjson_ep
+  PREFIX "${CMAKE_BINARY_DIR}"
+  URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz";
+  URL_MD5 "badd12c511e081fec6c89c43a7027bce"
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  BUILD_IN_SOURCE 1
+  INSTALL_COMMAND "")
+
+ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
+set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
+set(RAPIDJSON_VENDORED 1)
+  else()
+set(RAPIDJSON_INCLUDE_DIR "$ENV{RAPIDJSON_HOME}/include")
+set(RAPIDJSON_VENDORED 0)
+  endif()
+  message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
+  include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
+
+  ## Flatbuffers
+  if("$ENV{FLATBUFFERS_HOME}" STREQUAL "")
+set(FLATBUFFERS_PREFIX 
"${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
+ExternalProject_Add(flatbuffers_ep
+  URL 
"https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz";
+  CMAKE_ARGS
+  "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}"
+  "-DFLATBUFFERS_BUILD_TESTS=OFF")
+
+set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include")
+set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc")
+set(FLATBUFFERS_VENDORED 1)
+  else()
+find_package(Flatbuffers REQUIRED)
+set(FLATBUFFERS_VENDORED 0)
+  endif()
 
+  message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
+  message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
+  include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
+endif()
 #--
 
 if (MSVC)
@@ -705,28 +727,6 @@ endif()
 #   set(ARROW_TCMALLOC_AVAILABLE 1)
 # endif()
 
-## Flatbuffers
-
-if("$ENV{FLATBUFFERS_HOME}" STREQUAL "")
-  set(FLATBUFFERS_PREFIX 
"${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
-  ExternalProject_Add(flatbuffers_ep
-URL 
"https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz";
-CMAKE_ARGS
-"-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}"
-"-DFLATBUFFERS_BUILD_TESTS=OFF")
-
-  set(FLATBUFFERS_INCLUDE

arrow git commit: ARROW-839: [Python] Use mktime variant that is reliable on MSVC

2017-04-18 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master bb8514cc9 -> 0bcb7852f


ARROW-839: [Python] Use mktime variant that is reliable on MSVC

This also reverts an unintentional regression from 
https://github.com/apache/arrow/pull/544 when code from config.h was moved to 
platform.h

Author: Wes McKinney 

Closes #559 from wesm/ARROW-839 and squashes the following commits:

2e9b300 [Wes McKinney] Use _mkgmtime64 on MSVC
f182bab [Wes McKinney] Restore include order in platform.h
38c29bf [Wes McKinney] Add Windows build instructions for Python


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0bcb7852
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0bcb7852
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0bcb7852

Branch: refs/heads/master
Commit: 0bcb7852feb464790791cf5f9c4da1aaaf429970
Parents: bb8514c
Author: Wes McKinney 
Authored: Tue Apr 18 16:25:02 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Apr 18 16:25:02 2017 +0200

--
 cpp/CMakeLists.txt   |  4 ++-
 cpp/src/arrow/python/platform.h  |  2 +-
 cpp/src/arrow/python/util/datetime.h |  6 
 python/DEVELOPMENT.md| 48 +++
 4 files changed, 58 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/0bcb7852/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 08120e9..65fb2c9 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -837,7 +837,9 @@ if (${CLANG_FORMAT_FOUND})
   add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh 
${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1
 `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h |
 sed -e '/_generated/g' |
-sed -e '/windows_compatibility.h/g'`)
+sed -e '/windows_compatibility.h/g' |
+sed -e '/config.h/g' |
+sed -e '/platform.h/g'`)
 
   # runs clang format and exits with a non-zero exit code if any files need to 
be reformatted
   add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run-clang-format.sh 
${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 0

http://git-wip-us.apache.org/repos/asf/arrow/blob/0bcb7852/cpp/src/arrow/python/platform.h
--
diff --git a/cpp/src/arrow/python/platform.h b/cpp/src/arrow/python/platform.h
index 38f8e0f..a354b38 100644
--- a/cpp/src/arrow/python/platform.h
+++ b/cpp/src/arrow/python/platform.h
@@ -21,8 +21,8 @@
 #ifndef ARROW_PYTHON_PLATFORM_H
 #define ARROW_PYTHON_PLATFORM_H
 
-#include 
 #include 
+#include 
 
 // Work around C2528 error
 #if _MSC_VER >= 1900

http://git-wip-us.apache.org/repos/asf/arrow/blob/0bcb7852/cpp/src/arrow/python/util/datetime.h
--
diff --git a/cpp/src/arrow/python/util/datetime.h 
b/cpp/src/arrow/python/util/datetime.h
index 852f426..bd80d9f 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -33,7 +33,13 @@ static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
   epoch.tm_year = 70;
   epoch.tm_mday = 1;
   // Milliseconds since the epoch
+#ifdef _MSC_VER
+  const int64_t current_timestamp = static_cast(_mkgmtime64(&date));
+  const int64_t epoch_timestamp = static_cast(_mkgmtime64(&epoch));
+  return (current_timestamp - epoch_timestamp) * 1000LL;
+#else
   return lrint(difftime(mktime(&date), mktime(&epoch)) * 1000);
+#endif
 }
 
 static inline int32_t PyDate_to_days(PyDateTime_Date* pydate) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/0bcb7852/python/DEVELOPMENT.md
--
diff --git a/python/DEVELOPMENT.md b/python/DEVELOPMENT.md
index 280314f..ca74462 100644
--- a/python/DEVELOPMENT.md
+++ b/python/DEVELOPMENT.md
@@ -14,6 +14,8 @@
 
 ## Developer guide for conda users
 
+### Linux and macOS
+
 First, set up your thirdparty C++ toolchain using libraries from conda-forge:
 
 ```shell
@@ -134,3 +136,49 @@ pyarrow/tests/test_tensor.py 
 
 == 181 passed, 17 skipped in 0.98 seconds 
===
 ```
+
+### Windows
+
+First, make sure you can [build the C++ library][1].
+
+Now, we need to build and install the C++ libraries someplace.
+
+```shell
+mkdir cpp\build
+cd cpp\build
+set ARROW_HOME=C:\thirdparty
+cmake -G "Visual Studio 14 2015 Win64" ^
+  -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^
+  -DCMAKE_BUILD_TYPE=Release ^
+  -DARROW_BUILD_TESTS=off ^
+  -DARROW_PYTHON=on ..
+cmake --build . --target INSTALL --config Release
+cd ..\..
+```
+
+After that, we must put the install 

arrow git commit: ARROW-845: [Python] Sync changes from PARQUET-955; explicit ARROW_HOME will override pkgconfig

2017-04-18 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 0bcb7852f -> bb287e203


ARROW-845: [Python] Sync changes from PARQUET-955; explicit ARROW_HOME will 
override pkgconfig

This will avoid build failures due to a stale system-level Arrow install

Author: Wes McKinney 

Closes #558 from wesm/ARROW-845 and squashes the following commits:

4f89207 [Wes McKinney] Sync changes from PARQUET-955; explicit ARROW_HOME will 
override pkgconfig


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/bb287e20
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/bb287e20
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/bb287e20

Branch: refs/heads/master
Commit: bb287e2030c2b209edc4040099b138866e6e4692
Parents: 0bcb785
Author: Wes McKinney 
Authored: Tue Apr 18 16:34:08 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Apr 18 16:34:08 2017 +0200

--
 python/cmake_modules/FindArrow.cmake | 92 +--
 1 file changed, 50 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/bb287e20/python/cmake_modules/FindArrow.cmake
--
diff --git a/python/cmake_modules/FindArrow.cmake 
b/python/cmake_modules/FindArrow.cmake
index 8e13dd6..fbe4545 100644
--- a/python/cmake_modules/FindArrow.cmake
+++ b/python/cmake_modules/FindArrow.cmake
@@ -25,68 +25,75 @@
 
 include(FindPkgConfig)
 
-set(ARROW_SEARCH_HEADER_PATHS
-  $ENV{ARROW_HOME}/include
-)
+if ("$ENV{ARROW_HOME}" STREQUAL "")
+  pkg_check_modules(ARROW arrow)
+  if (ARROW_FOUND)
+pkg_get_variable(ARROW_ABI_VERSION arrow abi_version)
+message(STATUS "Arrow ABI version: ${ARROW_ABI_VERSION}")
+pkg_get_variable(ARROW_SO_VERSION arrow so_version)
+message(STATUS "Arrow SO version: ${ARROW_SO_VERSION}")
+set(ARROW_INCLUDE_DIR ${ARROW_INCLUDE_DIRS})
+set(ARROW_LIBS ${ARROW_LIBRARY_DIRS})
+  endif()
+else()
+  set(ARROW_HOME "$ENV{ARROW_HOME}")
 
-set(ARROW_SEARCH_LIB_PATH
-  $ENV{ARROW_HOME}/lib
-)
+  set(ARROW_SEARCH_HEADER_PATHS
+${ARROW_HOME}/include
+)
+
+  set(ARROW_SEARCH_LIB_PATH
+${ARROW_HOME}/lib
+)
 
-pkg_check_modules(ARROW arrow)
-if (ARROW_FOUND)
-  pkg_get_variable(ARROW_ABI_VERSION arrow abi_version)
-  message(STATUS "Arrow ABI version: ${ARROW_ABI_VERSION}")
-  pkg_get_variable(ARROW_SO_VERSION arrow so_version)
-  message(STATUS "Arrow SO version: ${ARROW_SO_VERSION}")
-  set(ARROW_INCLUDE_DIR ${ARROW_INCLUDE_DIRS})
-  set(ARROW_LIBS ${ARROW_LIBRARY_DIRS})
-else()
   find_path(ARROW_INCLUDE_DIR arrow/array.h PATHS
 ${ARROW_SEARCH_HEADER_PATHS}
 # make sure we don't accidentally pick up a different version
 NO_DEFAULT_PATH
-  )
+)
 
   find_library(ARROW_LIB_PATH NAMES arrow
 PATHS
 ${ARROW_SEARCH_LIB_PATH}
 NO_DEFAULT_PATH)
   get_filename_component(ARROW_LIBS ${ARROW_LIB_PATH} DIRECTORY)
-endif()
 
-find_library(ARROW_JEMALLOC_LIB_PATH NAMES arrow_jemalloc
-  PATHS
-  ${ARROW_SEARCH_LIB_PATH}
-  NO_DEFAULT_PATH)
+  find_library(ARROW_JEMALLOC_LIB_PATH NAMES arrow_jemalloc
+PATHS
+${ARROW_SEARCH_LIB_PATH}
+NO_DEFAULT_PATH)
 
-find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python
-  PATHS
-  ${ARROW_SEARCH_LIB_PATH}
-  NO_DEFAULT_PATH)
+  find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python
+PATHS
+${ARROW_SEARCH_LIB_PATH}
+NO_DEFAULT_PATH)
 
-if (ARROW_INCLUDE_DIR AND ARROW_LIBS)
-  set(ARROW_FOUND TRUE)
+  if (ARROW_INCLUDE_DIR AND ARROW_LIBS)
+set(ARROW_FOUND TRUE)
 
-  if (MSVC)
-set(ARROW_STATIC_LIB ${ARROW_LIB_PATH})
-set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH})
-set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_JEMALLOC_LIB_PATH})
-set(ARROW_SHARED_LIB ${ARROW_STATIC_LIB})
-set(ARROW_PYTHON_SHARED_LIB ${ARROW_PYTHON_STATIC_LIB})
-set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_JEMALLOC_STATIC_LIB})
-  else()
-set(ARROW_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow.a)
-set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_python.a)
-set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_jemalloc.a)
+if (MSVC)
+  set(ARROW_STATIC_LIB ${ARROW_LIB_PATH})
+  set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH})
+  set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_JEMALLOC_LIB_PATH})
+  set(ARROW_SHARED_LIB ${ARROW_STATIC_LIB})
+  set(ARROW_PYTHON_SHARED_LIB ${ARROW_PYTHON_STATIC_LIB})
+  set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_JEMALLOC_STATIC_LIB})
+else()
+  set(ARROW_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow.a)
+  set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_python.a)
+  set(ARROW_JEMALLOC_STATIC_LIB 
${ARROW_PYTHON_LIB_PATH}/libarrow_jemalloc.a)
 
-set(A

arrow git commit: ARROW-818: [Python] Expand Sphinx API docs, pyarrow.* namespace. Add factory functions for time32, time64

2017-04-18 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master bb287e203 -> 7f20f6e73


ARROW-818: [Python] Expand Sphinx API docs, pyarrow.* namespace. Add factory 
functions for time32, time64

Author: Wes McKinney 

Closes #557 from wesm/ARROW-818 and squashes the following commits:

96ce436 [Wes McKinney] Expand Sphinx API docs, pyarrow.* namespace. Add factory 
functions for time32, time64


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/7f20f6e7
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/7f20f6e7
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/7f20f6e7

Branch: refs/heads/master
Commit: 7f20f6e738a2e163b0b753416ee4c4ed00998f4b
Parents: bb287e2
Author: Wes McKinney 
Authored: Tue Apr 18 16:37:03 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Apr 18 16:37:03 2017 +0200

--
 python/doc/source/api.rst| 69 +++-
 python/pyarrow/__init__.py   | 33 ++
 python/pyarrow/_array.pxd| 10 +
 python/pyarrow/_array.pyx| 74 ++-
 python/pyarrow/_io.pyx   |  6 +--
 python/pyarrow/includes/libarrow.pxd |  3 ++
 python/pyarrow/tests/test_io.py  |  4 +-
 python/pyarrow/tests/test_schema.py  | 21 +
 8 files changed, 195 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/doc/source/api.rst
--
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 92e248b..08a0694 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -24,8 +24,8 @@ API Reference
 
 .. _api.functions:
 
-Type Metadata and Schemas
--
+Type and Schema Factory Functions
+-
 
 .. autosummary::
:toctree: generated/
@@ -43,6 +43,8 @@ Type Metadata and Schemas
float16
float32
float64
+   time32
+   time64
timestamp
date32
date64
@@ -53,10 +55,8 @@ Type Metadata and Schemas
struct
dictionary
field
-   DataType
-   Field
-   Schema
schema
+   from_numpy_dtype
 
 Scalar Value Types
 --
@@ -68,6 +68,7 @@ Scalar Value Types
NAType
Scalar
ArrayValue
+   BooleanValue
Int8Value
Int16Value
Int32Value
@@ -82,6 +83,11 @@ Scalar Value Types
BinaryValue
StringValue
FixedSizeBinaryValue
+   Date32Value
+   Date64Value
+   TimestampValue
+   DecimalValue
+
 
 Array Types and Constructors
 
@@ -91,21 +97,30 @@ Array Types and Constructors
 
array
Array
-   NullArray
-   NumericArray
-   IntegerArray
-   FloatingPointArray
BooleanArray
+   DictionaryArray
+   FloatingPointArray
+   IntegerArray
Int8Array
Int16Array
Int32Array
Int64Array
+   NullArray
+   NumericArray
UInt8Array
UInt16Array
UInt32Array
UInt64Array
-   DictionaryArray
+   BinaryArray
+   FixedSizeBinaryArray
StringArray
+   Time32Array
+   Time64Array
+   Date32Array
+   Date64Array
+   TimestampArray
+   DecimalArray
+   ListArray
 
 Tables and Record Batches
 -
@@ -113,9 +128,11 @@ Tables and Record Batches
 .. autosummary::
:toctree: generated/
 
+   ChunkedArray
Column
RecordBatch
Table
+   get_record_batch_size
 
 Tensor type and Functions
 -
@@ -141,7 +158,7 @@ Input / Output and Shared Memory
MemoryMappedFile
memory_map
create_memory_map
-   PythonFileInterface
+   PythonFile
 
 Interprocess Communication and Messaging
 
@@ -165,3 +182,33 @@ Memory Pools
jemalloc_memory_pool
total_allocated_bytes
set_memory_pool
+
+Type Classes
+
+
+.. autosummary::
+   :toctree: generated/
+
+   DataType
+   DecimalType
+   DictionaryType
+   FixedSizeBinaryType
+   Time32Type
+   Time64Type
+   TimestampType
+   Field
+   Schema
+
+.. currentmodule:: pyarrow.parquet
+
+Apache Parquet
+--
+
+.. autosummary::
+   :toctree: generated/
+
+   ParquetDataset
+   ParquetFile
+   read_table
+   write_metadata
+   write_table

http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/__init__.py
--
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 87f2352..4d8da9f 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -31,12 +31,20 @@ from pyarrow._config import cpu_count, set_cpu_count
 from pyarrow._array import (null, bool_,
 int8, int16, int32, int64,
 uint8, uint16, uint32, uint64,
-timestamp, date32, dat

arrow git commit: ARROW-853: [Python] Only set RPATH when bundling the shared libraries

2017-04-19 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 4555ab92b -> 41a8ff9ad


ARROW-853: [Python] Only set RPATH when bundling the shared libraries

See discussion in https://github.com/apache/arrow/pull/562. Modifying RPATH is 
no longer needed when libarrow/libarrow_python are installed someplace else in 
the loader path.

Author: Wes McKinney 

Closes #564 from wesm/ARROW-853 and squashes the following commits:

262f43a [Wes McKinney] Only set RPATH when bundling the shared libraries


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/41a8ff9a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/41a8ff9a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/41a8ff9a

Branch: refs/heads/master
Commit: 41a8ff9ad18a4970c16b674b56ade25b8e8986ec
Parents: 4555ab9
Author: Wes McKinney 
Authored: Wed Apr 19 19:42:49 2017 +0200
Committer: Uwe L. Korn 
Committed: Wed Apr 19 19:42:49 2017 +0200

--
 python/CMakeLists.txt | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/41a8ff9a/python/CMakeLists.txt
--
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3db7b7b..0d34bcd 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -346,21 +346,25 @@ foreach(module ${CYTHON_EXTENSIONS})
   LIBRARY_OUTPUT_DIRECTORY ${module_output_directory})
 endif()
 
-if(APPLE)
+if (PYARROW_BUNDLE_ARROW_CPP)
+  # In the event that we are bundling the shared libraries (e.g. in a
+  # manylinux1 wheel), we need to set the RPATH of the extensions to the
+  # root of the pyarrow/ package so that libarrow/libarrow_python are able
+  # to be loaded properly
+  if(APPLE)
 set(module_install_rpath "@loader_path")
-else()
+  else()
 set(module_install_rpath "\$ORIGIN")
-endif()
-list(LENGTH directories i)
-while(${i} GREATER 0)
+  endif()
+  list(LENGTH directories i)
+  while(${i} GREATER 0)
 set(module_install_rpath "${module_install_rpath}/..")
 math(EXPR i "${i} - 1" )
-endwhile(${i} GREATER 0)
+  endwhile(${i} GREATER 0)
 
-# for inplace development for now
-#set(module_install_rpath "${CMAKE_SOURCE_DIR}/pyarrow/")
+  set_target_properties(${module_name} PROPERTIES
+INSTALL_RPATH ${module_install_rpath})
+endif()
 
-set_target_properties(${module_name} PROPERTIES
-  INSTALL_RPATH ${module_install_rpath})
 target_link_libraries(${module_name} ${LINK_LIBS})
 endforeach(module)



arrow git commit: ARROW-860: [C++] Remove typed Tensor containers

2017-04-20 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 0dc6fe8f3 -> a68f31b0f


ARROW-860: [C++] Remove typed Tensor containers

cc @kou for opinions -- this patch breaks glib for the moment. Since tensors 
are all fixed width types, there's less reason to have strongly-typed 
containers for them (unlike the `arrow::Array` subclasses, where ListArray is 
quite different from Int8Array).

My view is that if the visitor pattern needs to be employed, we can do it using 
the `type()` member on the tensor (which also provides compile-time access to 
`TypeClass::c_type` if needed)

Author: Wes McKinney 
Author: Kouhei Sutou 

Closes #571 from wesm/ARROW-860 and squashes the following commits:

fe0b4d8 [Kouhei Sutou] Remove typed Tensors from glib
357f441 [Wes McKinney] Remove typed Tensor containers


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/a68f31b0
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/a68f31b0
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/a68f31b0

Branch: refs/heads/master
Commit: a68f31b0f3f2c094c5d6660a2d936baa05da3103
Parents: 0dc6fe8
Author: Wes McKinney 
Authored: Thu Apr 20 09:36:21 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Apr 20 09:36:21 2017 +0200

--
 c_glib/arrow-glib/Makefile.am|   5 --
 c_glib/arrow-glib/arrow-glib.h   |   2 -
 c_glib/arrow-glib/int8-tensor.cpp| 105 --
 c_glib/arrow-glib/int8-tensor.h  |  79 ---
 c_glib/arrow-glib/numeric-tensor.hpp |  64 
 c_glib/arrow-glib/tensor.cpp | 103 +
 c_glib/arrow-glib/tensor.h   |   8 ++
 c_glib/arrow-glib/uint8-tensor.cpp   | 105 --
 c_glib/arrow-glib/uint8-tensor.h |  79 ---
 c_glib/test/test-int8-tensor.rb  |  43 ---
 c_glib/test/test-tensor.rb   |   6 +-
 c_glib/test/test-uint8-tensor.rb |  43 ---
 cpp/src/arrow/compare.cc |  37 +++--
 cpp/src/arrow/ipc/ipc-read-write-test.cc |   6 +-
 cpp/src/arrow/ipc/reader.cc  |   3 +-
 cpp/src/arrow/python/numpy_convert.cc|   3 +-
 cpp/src/arrow/tensor-test.cc |  14 ++--
 cpp/src/arrow/tensor.cc  |  65 
 cpp/src/arrow/tensor.h   |  47 +---
 cpp/src/arrow/visitor_inline.h   |  25 --
 20 files changed, 94 insertions(+), 748 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/a68f31b0/c_glib/arrow-glib/Makefile.am
--
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index fbfe3a4..11b6508 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -65,7 +65,6 @@ libarrow_glib_la_headers =\
int8-array.h\
int8-array-builder.h\
int8-data-type.h\
-   int8-tensor.h   \
int16-array.h   \
int16-array-builder.h   \
int16-data-type.h   \
@@ -94,7 +93,6 @@ libarrow_glib_la_headers =\
uint8-array.h   \
uint8-array-builder.h   \
uint8-data-type.h   \
-   uint8-tensor.h  \
uint16-array.h  \
uint16-array-builder.h  \
uint16-data-type.h  \
@@ -155,7 +153,6 @@ libarrow_glib_la_sources =  \
int8-array.cpp  \
int8-array-builder.cpp  \
int8-data-type.cpp  \
-   int8-tensor.cpp \
int16-array.cpp \
int16-array-builder.cpp \
int16-data-type.cpp \
@@ -184,7 +181,6 @@ libarrow_glib_la_sources =  \
uint8-array.cpp \
uint8-array-builder.cpp \
uint8-data-type.cpp \
-   uint8-tensor.cpp\
uint16-array.cpp\
uint16-array-builder.cpp\
uint16-data-type.cpp\
@@ -226,7 +222,6 @@ libarrow_glib_la_cpp_headers =  \
data-type.hpp   \
error.hpp   \
field.hpp   \
-   numeric-tensor.hpp  \
record-b

arrow git commit: ARROW-890: [GLib] Add GArrowMutableBuffer

2017-04-24 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master eaf2118ef -> f00e2ab59


ARROW-890: [GLib] Add GArrowMutableBuffer

Author: Kouhei Sutou 

Closes #596 from kou/glib-mutable-buffer and squashes the following commits:

73c2663 [Kouhei Sutou] [GLib] Support running tests on Ubuntu 14.04
d211a22 [Kouhei Sutou] [GLib] Add GArrowMutableBuffer


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f00e2ab5
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f00e2ab5
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f00e2ab5

Branch: refs/heads/master
Commit: f00e2ab590ad8f04409e7bc09f70622e73ebd741
Parents: eaf2118
Author: Kouhei Sutou 
Authored: Tue Apr 25 08:45:01 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Apr 25 08:45:01 2017 +0200

--
 c_glib/arrow-glib/buffer.cpp   | 97 -
 c_glib/arrow-glib/buffer.h | 52 ++
 c_glib/arrow-glib/buffer.hpp   |  2 +
 c_glib/test/test-buffer.rb |  7 +++
 c_glib/test/test-mutable-buffer.rb | 38 +
 5 files changed, 195 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/f00e2ab5/c_glib/arrow-glib/buffer.cpp
--
diff --git a/c_glib/arrow-glib/buffer.cpp b/c_glib/arrow-glib/buffer.cpp
index 9853e89..5fc3b07 100644
--- a/c_glib/arrow-glib/buffer.cpp
+++ b/c_glib/arrow-glib/buffer.cpp
@@ -28,10 +28,16 @@ G_BEGIN_DECLS
 
 /**
  * SECTION: buffer
- * @short_description: Buffer class
+ * @section_id: buffer-classes
+ * @title: Buffer classes
+ * @include: arrow-glib/arrow-glib.h
  *
  * #GArrowBuffer is a class for keeping data. Other classes such as
  * #GArrowArray and #GArrowTensor can use data in buffer.
+ *
+ * #GArrowBuffer is immutable.
+ *
+ * #GArrowMutableBuffer is mutable.
  */
 
 typedef struct GArrowBufferPrivate_ {
@@ -183,6 +189,27 @@ garrow_buffer_get_data(GArrowBuffer *buffer)
 }
 
 /**
+ * garrow_buffer_get_mutable_data:
+ * @buffer: A #GArrowBuffer.
+ *
+ * Returns: (transfer full) (nullable): The data of the buffer. If the
+ *   buffer is imutable, it returns %NULL. The data is owned by the
+ *   buffer. You should not free the data.
+ *
+ * Since: 0.3.0
+ */
+GBytes *
+garrow_buffer_get_mutable_data(GArrowBuffer *buffer)
+{
+  auto arrow_buffer = garrow_buffer_get_raw(buffer);
+  if (!arrow_buffer->is_mutable()) {
+return NULL;
+  }
+  return g_bytes_new_static(arrow_buffer->mutable_data(),
+arrow_buffer->size());
+}
+
+/**
  * garrow_buffer_get_size:
  * @buffer: A #GArrowBuffer.
  *
@@ -271,6 +298,65 @@ garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, 
gint64 size)
   return garrow_buffer_new_raw(&arrow_buffer);
 }
 
+
+G_DEFINE_TYPE(GArrowMutableBuffer,  \
+  garrow_mutable_buffer,\
+  GARROW_TYPE_BUFFER)
+
+static void
+garrow_mutable_buffer_init(GArrowMutableBuffer *object)
+{
+}
+
+static void
+garrow_mutable_buffer_class_init(GArrowMutableBufferClass *klass)
+{
+}
+
+/**
+ * garrow_mutable_buffer_new:
+ * @data: (array length=size): Data for the buffer.
+ *   They aren't owned by the new buffer.
+ *   You must not free the data while the new buffer is alive.
+ * @size: The number of bytes of the data.
+ *
+ * Returns: A newly created #GArrowMutableBuffer.
+ *
+ * Since: 0.3.0
+ */
+GArrowMutableBuffer *
+garrow_mutable_buffer_new(guint8 *data, gint64 size)
+{
+  auto arrow_buffer = std::make_shared(data, size);
+  return garrow_mutable_buffer_new_raw(&arrow_buffer);
+}
+
+/**
+ * garrow_mutable_buffer_slice:
+ * @buffer: A #GArrowMutableBuffer.
+ * @offset: An offset in the buffer data in byte.
+ * @size: The number of bytes of the sliced data.
+ *
+ * Returns: (transfer full): A newly created #GArrowMutableBuffer that
+ *   shares data of the base #GArrowMutableBuffer. The created
+ *   #GArrowMutableBuffer has data start with offset from the base
+ *   buffer data and are the specified bytes size.
+ *
+ * Since: 0.3.0
+ */
+GArrowMutableBuffer *
+garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer,
+gint64 offset,
+gint64 size)
+{
+  auto arrow_parent_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer));
+  auto arrow_buffer =
+std::make_shared(arrow_parent_buffer,
+   offset,
+   size);
+  return garrow_mutable_buffer_new_raw(&arrow_buffer);
+}
+
 G_END_DECLS
 
 GArrowBuffer *
@@ -288,3 +374,12 @@ garrow_buffer_get_raw(GArrowBuffer *buffer)
   auto priv = GARROW_BUFFER_GET_PRIVATE(buffer);
   return priv->buffer;
 }
+
+GArrowMutableBuffer *
+garrow_mutable_buffer_new_raw(std::shared_ptr 

arrow git commit: ARROW-866: [Python] Be robust to PyErr_Fetch returning a null exc value

2017-04-27 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 909f826b5 -> 81be9c667


ARROW-866: [Python] Be robust to PyErr_Fetch returning a null exc value

cc @BryanCutler. This was a tricky one. I am not sure how to reproduce with our 
current code -- I reverted the patch from ARROW-822 to get a reproduction so I 
could fix this. Now, the error raised is:

```
/home/wesm/code/arrow/python/pyarrow/_error.pyx in pyarrow._error.check_status 
(/home/wesm/code/arrow/python/build/temp.linux-x86_64-2.7/_error.cxx:1324)()
 58 raise ArrowInvalid(message)
 59 elif status.IsIOError():
---> 60 raise ArrowIOError(message)
 61 elif status.IsOutOfMemory():
 62 raise ArrowMemoryError(message)

ArrowIOError: IOError: Error message was null
```

I'm not sure why calling `tell` on the socket object results in a bad exception 
state, but in any case it seems that the result of `PyErr_Fetch` cannot be 
relied upon to be non-null even when `PyErr_Occurred()` returns non-null

Author: Wes McKinney 

Closes #606 from wesm/ARROW-866 and squashes the following commits:

fa395cd [Wes McKinney] Enable other kinds of Status errors to be returned
0bd11c2 [Wes McKinney] Consolidate error handling code a bit
9d59dd2 [Wes McKinney] Be robust to PyErr_Fetch returning a null exc value


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/81be9c66
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/81be9c66
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/81be9c66

Branch: refs/heads/master
Commit: 81be9c6679466177d4b8e5dbca55f81185bb3ec6
Parents: 909f826
Author: Wes McKinney 
Authored: Thu Apr 27 18:10:24 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Apr 27 18:10:24 2017 +0200

--
 cpp/src/arrow/python/common.cc | 22 ++
 cpp/src/arrow/python/common.h  | 29 ++---
 cpp/src/arrow/python/io.cc | 29 +++--
 cpp/src/arrow/status.h |  3 +++
 4 files changed, 46 insertions(+), 37 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/common.cc
--
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 717cb5c..bedd458 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -64,5 +64,27 @@ PyBuffer::~PyBuffer() {
   Py_XDECREF(obj_);
 }
 
+Status CheckPyError(StatusCode code) {
+  if (PyErr_Occurred()) {
+PyObject *exc_type, *exc_value, *traceback;
+PyErr_Fetch(&exc_type, &exc_value, &traceback);
+PyObjectStringify stringified(exc_value);
+Py_XDECREF(exc_type);
+Py_XDECREF(exc_value);
+Py_XDECREF(traceback);
+PyErr_Clear();
+
+// ARROW-866: in some esoteric cases, formatting exc_value can fail. This
+// was encountered when calling tell() on a socket file
+if (stringified.bytes != nullptr) {
+  std::string message(stringified.bytes);
+  return Status(code, message);
+} else {
+  return Status(code, "Error message was null");
+}
+  }
+  return Status::OK();
+}
+
 }  // namespace py
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/common.h
--
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 0211823..c5745a5 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -98,27 +98,26 @@ struct ARROW_EXPORT PyObjectStringify {
 if (PyUnicode_Check(obj)) {
   bytes_obj = PyUnicode_AsUTF8String(obj);
   tmp_obj.reset(bytes_obj);
+  bytes = PyBytes_AsString(bytes_obj);
+  size = PyBytes_GET_SIZE(bytes_obj);
+} else if (PyBytes_Check(obj)) {
+  bytes = PyBytes_AsString(obj);
+  size = PyBytes_GET_SIZE(obj);
 } else {
-  bytes_obj = obj;
+  bytes = nullptr;
+  size = -1;
 }
-bytes = PyBytes_AsString(bytes_obj);
-size = PyBytes_GET_SIZE(bytes_obj);
   }
 };
 
+Status CheckPyError(StatusCode code = StatusCode::UnknownError);
+
 // TODO(wesm): We can just let errors pass through. To be explored later
-#define RETURN_IF_PYERROR() \
-  if (PyErr_Occurred()) {   \
-PyObject *exc_type, *exc_value, *traceback; \
-PyErr_Fetch(&exc_type, &exc_value, &traceback); \
-PyObjectStringify stringified(exc_value);   \
-std::string message(stringified.bytes); \
-Py_DECREF(exc_type);\
-Py_XDECREF(exc_value);  \
-Py_XDECREF(traceback);  \
-PyErr_Clear();  

arrow git commit: ARROW-900: [Python] Fix UnboundLocalError in ParquetDatasetPiece.read

2017-04-27 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 81be9c667 -> 03dce9dca


ARROW-900: [Python] Fix UnboundLocalError in ParquetDatasetPiece.read

Author: Wes McKinney 

Closes #607 from wesm/ARROW-900 and squashes the following commits:

81f8394 [Wes McKinney] Fix UnboundLocalError in ParquetDatasetPiece.read


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/03dce9dc
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/03dce9dc
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/03dce9dc

Branch: refs/heads/master
Commit: 03dce9dcab1df587f2293decf49708f872aaad3d
Parents: 81be9c6
Author: Wes McKinney 
Authored: Thu Apr 27 18:11:44 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Apr 27 18:11:44 2017 +0200

--
 python/pyarrow/parquet.py|  3 +++
 python/pyarrow/tests/test_parquet.py | 14 ++
 2 files changed, 17 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/03dce9dc/python/pyarrow/parquet.py
--
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 94ad227..21359f1 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -208,6 +208,9 @@ class ParquetDatasetPiece(object):
 reader = self._open(open_file_func)
 elif file is not None:
 reader = ParquetFile(file)
+else:
+# try to read the local path
+reader = ParquetFile(self.path)
 
 if self.row_group is not None:
 table = reader.read_row_group(self.row_group, columns=columns,

http://git-wip-us.apache.org/repos/asf/arrow/blob/03dce9dc/python/pyarrow/tests/test_parquet.py
--
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 8c446af..bb3a9ed 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -493,6 +493,20 @@ def test_read_single_row_group():
 
 
 @parquet
+def test_parquet_piece_read(tmpdir):
+df = _test_dataframe(1000)
+table = pa.Table.from_pandas(df)
+
+path = tmpdir.join('parquet_piece_read.parquet').strpath
+pq.write_table(table, path, version='2.0')
+
+piece1 = pq.ParquetDatasetPiece(path)
+
+result = piece1.read()
+assert result.equals(table)
+
+
+@parquet
 def test_parquet_piece_basics():
 path = '/baz.parq'
 



arrow git commit: ARROW-908: [GLib] Unify OutputStream files

2017-04-27 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 03dce9dca -> 14bec24c5


ARROW-908: [GLib] Unify OutputStream files

Author: Kouhei Sutou 

Closes #609 from kou/glib-unify-output-stream and squashes the following 
commits:

f62f869 [Kouhei Sutou] [GLib] Unify OutputStream files


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/14bec24c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/14bec24c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/14bec24c

Branch: refs/heads/master
Commit: 14bec24c584dc6fa05b84b6ed00d7474d62fd1d7
Parents: 03dce9d
Author: Kouhei Sutou 
Authored: Thu Apr 27 18:13:47 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Apr 27 18:13:47 2017 +0200

--
 c_glib/arrow-glib/Makefile.am |   3 -
 c_glib/arrow-glib/arrow-glib.h|   1 -
 c_glib/arrow-glib/arrow-glib.hpp  |   1 -
 c_glib/arrow-glib/file-output-stream.cpp  | 231 -
 c_glib/arrow-glib/file-output-stream.h|  72 
 c_glib/arrow-glib/file-output-stream.hpp  |  28 ---
 c_glib/arrow-glib/output-stream.cpp   | 201 -
 c_glib/arrow-glib/output-stream.h |  52 ++
 c_glib/arrow-glib/output-stream.hpp   |   5 +-
 c_glib/doc/reference/arrow-glib-docs.sgml |   3 +-
 10 files changed, 256 insertions(+), 341 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/14bec24c/c_glib/arrow-glib/Makefile.am
--
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index bbc1101..54fb7f8 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -59,7 +59,6 @@ libarrow_glib_la_headers =\
 libarrow_glib_la_headers +=\
file.h  \
file-mode.h \
-   file-output-stream.h\
input-stream.h  \
memory-mapped-file.h\
output-stream.h \
@@ -102,7 +101,6 @@ libarrow_glib_la_sources =  \
 libarrow_glib_la_sources +=\
file.cpp\
file-mode.cpp   \
-   file-output-stream.cpp  \
input-stream.cpp\
memory-mapped-file.cpp  \
output-stream.cpp   \
@@ -137,7 +135,6 @@ libarrow_glib_la_cpp_headers =  \
 libarrow_glib_la_cpp_headers +=\
file.hpp\
file-mode.hpp   \
-   file-output-stream.hpp  \
input-stream.hpp\
memory-mapped-file.hpp  \
output-stream.hpp   \

http://git-wip-us.apache.org/repos/asf/arrow/blob/14bec24c/c_glib/arrow-glib/arrow-glib.h
--
diff --git a/c_glib/arrow-glib/arrow-glib.h b/c_glib/arrow-glib/arrow-glib.h
index efff571..e88b66b 100644
--- a/c_glib/arrow-glib/arrow-glib.h
+++ b/c_glib/arrow-glib/arrow-glib.h
@@ -35,7 +35,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 

http://git-wip-us.apache.org/repos/asf/arrow/blob/14bec24c/c_glib/arrow-glib/arrow-glib.hpp
--
diff --git a/c_glib/arrow-glib/arrow-glib.hpp b/c_glib/arrow-glib/arrow-glib.hpp
index d6ef370..339773f 100644
--- a/c_glib/arrow-glib/arrow-glib.hpp
+++ b/c_glib/arrow-glib/arrow-glib.hpp
@@ -40,7 +40,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 

http://git-wip-us.apache.org/repos/asf/arrow/blob/14bec24c/c_glib/arrow-glib/file-output-stream.cpp
--
diff --git a/c_glib/arrow-glib/file-output-stream.cpp 
b/c_glib/arrow-glib/file-output-stream.cpp
deleted file mode 100644
index e1e1e27..000
--- a/c_glib/arrow-glib/file-output-stream.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distribu

arrow git commit: ARROW-923: Changelog generation Python script, add 0.1.0 and 0.2.0 changelog

2017-05-04 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 928b63f40 -> 2c3e111d4


ARROW-923: Changelog generation Python script, add 0.1.0 and 0.2.0 changelog

Author: Wes McKinney 

Closes #640 from wesm/ARROW-923 and squashes the following commits:

289d3cd [Wes McKinney] Add license header
96f55f8 [Wes McKinney] Add option to write Markdown JIRA links (for website)
6c808da [Wes McKinney] Changelog Python script, add 0.1.0 and 0.2.0 changelog


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2c3e111d
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2c3e111d
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2c3e111d

Branch: refs/heads/master
Commit: 2c3e111d45c056d429cef312533c9f3f96b08ae8
Parents: 928b63f
Author: Wes McKinney 
Authored: Fri May 5 08:18:53 2017 +0200
Committer: Uwe L. Korn 
Committed: Fri May 5 08:18:53 2017 +0200

--
 CHANGELOG.md  | 403 +
 dev/make_changelog.py |  85 ++
 2 files changed, 488 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/2c3e111d/CHANGELOG.md
--
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 000..3d54838
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,403 @@
+
+
+# Apache Arrow 0.2.0 (15 February 2017)
+
+## Bug
+
+* ARROW-112 - [C++]  Style fix for constants/enums
+* ARROW-202 - [C++] Integrate with appveyor ci for windows support and get 
arrow building on windows
+* ARROW-220 - [C++] Build conda artifacts in a build environment with better 
cross-linux ABI compatibility
+* ARROW-224 - [C++] Address static linking of boost dependencies
+* ARROW-230 - Python: Do not name modules like native ones (i.e. rename 
pyarrow.io)
+* ARROW-239 - [Python] HdfsFile.read called with no arguments should read 
remainder of file
+* ARROW-261 - [C++] Refactor BinaryArray/StringArray classes to not inherit 
from ListArray
+* ARROW-275 - Add tests for UnionVector in Arrow File
+* ARROW-294 - [C++] Do not use fopen / fclose / etc. methods for memory mapped 
file implementation
+* ARROW-322 - [C++] Do not build HDFS IO interface optionally
+* ARROW-323 - [Python] Opt-in to PyArrow parquet build rather than skipping 
silently on failure
+* ARROW-334 - [Python] OS X rpath issues on some configurations
+* ARROW-337 - UnionListWriter.list() is doing more than it should, this can 
cause data corruption
+* ARROW-339 - Make merge_arrow_pr script work with Python 3
+* ARROW-340 - [C++] Opening a writeable file on disk that already exists does 
not truncate to zero
+* ARROW-342 - Set Python version on release
+* ARROW-345 - libhdfs integration doesn't work for Mac
+* ARROW-346 - Python API Documentation
+* ARROW-348 - [Python] CMake build type should be configurable on the command 
line
+* ARROW-349 - Six is missing as a requirement in the python setup.py
+* ARROW-351 - Time type has no unit
+* ARROW-354 - Connot compare an array of empty strings to another
+* ARROW-357 - Default Parquet chunk_size of 64k is too small
+* ARROW-358 - [C++] libhdfs can be in non-standard locations in some Hadoop 
distributions
+* ARROW-362 - Python: Calling to_pandas on a table read from Parquet leaks 
memory
+* ARROW-371 - Python: Table with null timestamp becomes float in pandas
+* ARROW-375 - columns parameter in parquet.read_table() raises KeyError for 
valid column
+* ARROW-384 - Align Java and C++ RecordBatch data and metadata layout
+* ARROW-386 - [Java] Respect case of struct / map field names
+* ARROW-387 - [C++] arrow::io::BufferReader does not permit shared memory 
ownership in zero-copy reads
+* ARROW-390 - C++: CMake fails on json-integration-test with 
ARROW_BUILD_TESTS=OFF
+* ARROW-392 - Fix string/binary integration tests
+* ARROW-393 - [JAVA] JSON file reader fails to set the buffer size on String 
data vector
+* ARROW-395 - Arrow file format writes record batches in reverse order.
+* ARROW-398 - [Java] Java file format requires bitmaps of all 1's to be 
written when there are no nulls
+* ARROW-399 - [Java] ListVector.loadFieldBuffers ignores the ArrowFieldNode 
length metadata
+* ARROW-400 - [Java] ArrowWriter writes length 0 for Struct types
+* ARROW-401 - [Java] Floating point vectors should do an approximate 
comparison in integration tests
+* ARROW-402 - [Java] "refCnt gone negative" error in integration tests
+* ARROW-403 - [JAVA] UnionVector: Creating a transfer pair doesn't transfer 
the schema to destination vector
+* ARROW-404 - [Python] Closing an HdfsClient while there are still open file 
handles results in a crash
+* ARROW-405 - [C++] Be less stringent about finding include/hdfs.h in 
HADOOP_HOME
+* ARROW-406 - [C++] Large HDFS reads must utilize the set file buffer size 
w

arrow git commit: ARROW-929: Remove KEYS file from git

2017-05-06 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 5af8069d2 -> 995317ae9


ARROW-929: Remove KEYS file from git

I have updated the SVN KEYS file at 
https://dist.apache.org/repos/dist/release/arrow/KEYS

Author: Wes McKinney 

Closes #646 from wesm/ARROW-929 and squashes the following commits:

8ad3c0a [Wes McKinney] Remove KEYS file from git


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/995317ae
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/995317ae
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/995317ae

Branch: refs/heads/master
Commit: 995317ae9ecb54bc1aec02f7c7e133ab61ac387f
Parents: 5af8069
Author: Wes McKinney 
Authored: Sat May 6 12:56:46 2017 +0200
Committer: Uwe L. Korn 
Committed: Sat May 6 12:56:46 2017 +0200

--
 KEYS | 239 --
 1 file changed, 239 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/995317ae/KEYS
--
diff --git a/KEYS b/KEYS
deleted file mode 100644
index 05862c8..000
--- a/KEYS
+++ /dev/null
@@ -1,239 +0,0 @@
-This file contains the PGP keys of various developers.
-
-Users: pgp < KEYS
-  gpg --import KEYS
-Developers:
-  pgp -kxa  and append it to this file.
-  (pgpk -ll  && pgpk -xa ) >> this file.
-  (gpg --list-sigs 
-&& gpg --armor --export ) >> this file.
-
-pub   2048R/7AE7E47B 2013-04-10 [expires: 2017-04-10]
-uid  Julien Le Dem 
-sig 37AE7E47B 2013-04-10  Julien Le Dem 
-sig  D3924CCD 2014-09-08  Ryan Blue (CODE SIGNING KEY) 

-sig  71F0F13B 2014-09-08  Tianshuo Deng 
-sub   2048R/03C4E111 2013-04-10 [expires: 2017-04-10]
-sig  7AE7E47B 2013-04-10  Julien Le Dem 
-
-pub   4096R/1679D194 2016-09-19 [expires: 2020-09-19]
-uid  Julien Le Dem 
-sig 31679D194 2016-09-19  Julien Le Dem 
-sub   4096R/61C65CFD 2016-09-19 [expires: 2020-09-19]
-sig  1679D194 2016-09-19  Julien Le Dem 
-
--BEGIN PGP PUBLIC KEY BLOCK-
-
-mQENBFFll5kBCACk/tTfHSxUT2W9phkLQzJs6AV4GElqcFo7ZNE1DwAB/gk8uJwR
-Po7WYaO2/91hNu4y1SooDRGnqz0FvZzOA8sW/KujK13MMqmGYb1jJdwPjNq6KOK/
-3EygCxq9DxSS+TILvq3NsFgYGdopdJxRl9zh15Po/3c/jNMPtnGZzP39EsfMhgIS
-YwwiEHPVPB00Q0IGRQMhtJqh1AQ5KrxqK4+uEwwu3Sb52DpBjfgffl8GMGKfH/tk
-VvJ6L+7rPXtNqho5b7i8379//Bn9xwgO2YCtjPoZMVg37M6f6hVWMr3fFmX/OXgU
-UWwLGOTAeuLKWkikFJr5y0rzDaF2qcD9t7wfABEBAAG0IEp1bGllbiBMZSBEZW0g
-PGp1bGllbkBsZWRlbS5uZXQ+iQE9BBMBCgAnBQJRZZeZAhsvBQkHhh+ABQsJCAcD
-BRUKCQgLBRYCAwEAAh4BAheAAAoJEJfX6GR65+R7au4IAIfZVA9eWBZn9NuaWX7L
-Xi+xDtzrfUrsWZxMIP6zkQsIspiX9AThGv3zDn+Tpfw7svV1QfUQX0LHbwMMYqq+
-mRJB/kqYutpLxw7h63zrWR2k2Sdzvole2c3Rfk1vblIdWZk7ArLSivqTk/oGwr7d
-MejvOMmKSzqW0vQF6dNbYerLOiqPr4mKqONWm4nOLZEBzjE3IfbK3gNBSFq+92jV
-iWY6ozqAxydYafNUSZRrcniYskxd9JCSSLZiIZW3X9lToA/74LjpPbmzvQtkH68D
-0EnC1mkPTKCA4r+CLb3a9GJ9Surg2T0OptyPHsXipgViVryXgopD2odA3fh9SY5l
-Ee+JAhwEEAECAAYFAlQN+kQACgkQ/LPL2dOSTM3+OA//dYj9kiZhZNVb6hMfrubn
-OjTmY8Hcax8G+aJWxRrGE8HrCUjEJ4NThK523+fmol1PxNWsguljlsZvJ189YPOh
-weDJzNmKwhLntq/uBgtJyWBN1v9bUzkR9Ud+UdD1tPbNj7sNiIQE1ZqWMxra3sq/
-gcodVgqSADGgjKO9tenQhWvQXxBR55MOqZbxnyazRPEYS0mkN0A0DwtG82tHNRL7
-Z3vs/kG5hoW3kYifCZn5pW3wKtfIY5JH7usYOzA86p7GH4hOfO+dzhDANH+C+u9O
-ZRbCdUE8oEp3fAWY9+3VzlO5ixpFOeHGfbSJp44Jv6wUOxNwRmD/gk+DxVrsS/Yn
-rLFCZgDHgkFHGJ1D7PnxTy4qtwGasYxWYJOUiaAJbOvRa8nbhan2/wsrgnJTbXAH
-+7v5tFfCV77Po//V0fojYZNvbkEO8/yRpQL+uKiVRaRD5dMfHRb31OR0A59ssYX9
-63QpBEof/OeELC0VowG+KCc+4CfSMmAGnQMdEhMAUPz+79nJw7ijeF5C82Z5mQof
-v+nf+kdqr80UbG+RoODKtlHFETxJ5STQe6uiPOfvb+EADPA0cZ34u5tD3Z+SMV1k
-Gf7Jxi45jmkn9Z9AkVj6KgdDeSjV7EkRiY0pm43Vvd6WvV5t54cgJcwXrjG+h03f
-65w7F+KBrh7YAcUvrf4JeXKJARwEEAECAAYFAlQN/XwACgkQfNgniXHw8TtU9Af/
-b9CYFtsG9q1ZbnV9SChxjLLUipGsmKTUjCnz7oiZvJJ04e+0np1NQJKJbthGfEDM
-eLt1WiYpTDu66zAuLDA7ACcbv3UUXXsUTEfN76J+9DJHrtK1soHGLkKLW2hZeWKp
-PKya/HRF4Rv3/aAwWtRjEuQr9pLt/wAOedV6mrpyTngOKQn97tzo/yUeDNG7be8A
-xtUStQY/2zJmHkaLeULKOspgUchBQ1S+M4q46dE+tyel47BLyHIECqk/geLOlZmh
-lo6TtVgnBSXC5SqMwh5pz/P5ntQ8FVLedGQI9dwVhxbjoo5DNB/6ntfbwkheiak1
-CFBm0ZVPJjX7F2XFcq7VCrkBDQRRZZeZAQgA4eixR7xHvnTyF12CYLsnFE8x1tI+
-78FCjKm0n1YPCzEYa70bnnZmpW4KCwO0flN4RhhP+g2KRCCov2ZH7bxvhTxe4n/j
-T6I/+61Fpba4I7qExYqX+tylyjUKhynLcWCbvRQnyjOMTaLbMVrftV+ATVmj7fi0
-PdzRW/7QvCSrDsMFtTSaNBdeMbzptpoXAxTgVZOIoHbWOIfovN1uPnFItrmNnKXX
-KGyDPX2s2KCz10G1lrw0l9tqDg+BtqE9/xCtqWoZJMnT8jAJZeJ0V37R1jDBDEHK
-AfPOUKNYf5GWxJeCWYzL77ve8VdItKwPhtjW7zFKuyrqiBHE40fgTLKvNQARAQAB
-iQJEBBgBCgAPBQJRZZeZAhsuBQkHhh+AASkJEJfX6GR65+R7wF0gBBkBCgAGBQJR
-ZZeZAAoJECrRWHEDxOERzmEIAOCrfYGPdLyzBn/xAdymx2FaNTS48ybMIGjcu6Od
-nKzvgBJObLPQf0+WKhkbQf2HEHYinBVpX8K4dNY9RhzIRbQNhCWY5E5/leI/nQ9O
-ZBUMpT8Gw5saj0YtF3By4E9ywxNWiAyX2SAHjPv/lub0PEaUiWWe6s9MaX5fp71C
-TupkdElpxucEpVefUaUOSMQ2ecOniCh/9ltPLYcjwnC1ti+Et8/cAK2N

arrow git commit: ARROW-953: Use conda-forge cmake, curl in CI toolchain

2017-05-06 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 995317ae9 -> 8febd03f8


ARROW-953: Use conda-forge cmake, curl in CI toolchain

Author: Wes McKinney 

Closes #645 from wesm/ARROW-953 and squashes the following commits:

4f719c1 [Wes McKinney] Use conda-forge cmake, curl in CI toolchain


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8febd03f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8febd03f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8febd03f

Branch: refs/heads/master
Commit: 8febd03f862eab0ca83871e9ff8c5062550b646d
Parents: 995317a
Author: Wes McKinney 
Authored: Sat May 6 12:57:14 2017 +0200
Committer: Uwe L. Korn 
Committed: Sat May 6 12:57:14 2017 +0200

--
 .travis.yml| 2 --
 ci/travis_install_conda.sh | 4 ++--
 ci/travis_script_python.sh | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/8febd03f/.travis.yml
--
diff --git a/.travis.yml b/.travis.yml
index 19e71ae..d821b5a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,13 +4,11 @@ addons:
   apt:
 sources:
 - ubuntu-toolchain-r-test
-- kalakris-cmake
 packages:
 - gcc-4.9   # Needed for C++11
 - g++-4.9   # Needed for C++11
 - gdb
 - ccache
-- cmake
 - valgrind
 - libboost-dev
 - libboost-filesystem-dev

http://git-wip-us.apache.org/repos/asf/arrow/blob/8febd03f/ci/travis_install_conda.sh
--
diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh
index 7d185ee..369820b 100644
--- a/ci/travis_install_conda.sh
+++ b/ci/travis_install_conda.sh
@@ -40,7 +40,7 @@ conda config --add channels 
https://repo.continuum.io/pkgs/free
 conda config --add channels conda-forge
 conda info -a
 
-conda install --yes conda-build jinja2 anaconda-client
-
 # faster builds, please
 conda install -y nomkl
+
+conda install --y conda-build jinja2 anaconda-client cmake curl

http://git-wip-us.apache.org/repos/asf/arrow/blob/8febd03f/ci/travis_script_python.sh
--
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index c1426da..20b0f2a 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -23,7 +23,7 @@ export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env
 
 build_parquet_cpp() {
   export PARQUET_ARROW_VERSION=$(git rev-parse HEAD)
-  conda create -y -q -p $PARQUET_HOME python=3.6
+  conda create -y -q -p $PARQUET_HOME python=3.6 cmake curl
   source activate $PARQUET_HOME
 
   # In case some package wants to download the MKL
@@ -89,7 +89,7 @@ python_version_tests() {
   export ARROW_HOME=$TRAVIS_BUILD_DIR/arrow-install-$PYTHON_VERSION
   export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib
 
-  conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
+  conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION cmake curl
   source activate $CONDA_ENV_DIR
 
   python --version



arrow git commit: ARROW-958: [Python] Fix conda source build instructions

2017-05-07 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master c48f6493f -> d7a2a1e18


ARROW-958: [Python] Fix conda source build instructions

Author: Wes McKinney 

Closes #653 from wesm/ARROW-958 and squashes the following commits:

88c3c1d [Wes McKinney] Fix conda build instructions


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d7a2a1e1
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d7a2a1e1
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d7a2a1e1

Branch: refs/heads/master
Commit: d7a2a1e18457acb8a18cfcb7fbb3c3ba41543d4a
Parents: c48f649
Author: Wes McKinney 
Authored: Sun May 7 17:48:18 2017 +0200
Committer: Uwe L. Korn 
Committed: Sun May 7 17:48:18 2017 +0200

--
 python/doc/source/development.rst | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/d7a2a1e1/python/doc/source/development.rst
--
diff --git a/python/doc/source/development.rst 
b/python/doc/source/development.rst
index 01add11..440c1c4 100644
--- a/python/doc/source/development.rst
+++ b/python/doc/source/development.rst
@@ -93,8 +93,11 @@ about our build toolchain:
 .. code-block:: shell
 
export ARROW_BUILD_TYPE=release
+
export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
export PARQUET_BUILD_TOOLCHAIN=$CONDA_PREFIX
+   export ARROW_HOME=$CONDA_PREFIX
+   export PARQUET_HOME=$CONDA_PREFIX
 
 Now build and install the Arrow C++ libraries:
 
@@ -104,7 +107,7 @@ Now build and install the Arrow C++ libraries:
pushd arrow/cpp/build
 
cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
- -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
+ -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
  -DARROW_PYTHON=on \
  -DARROW_BUILD_TESTS=OFF \
  ..
@@ -121,7 +124,7 @@ toolchain:
pushd parquet-cpp/build
 
cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
- -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
+ -DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \
  -DPARQUET_BUILD_BENCHMARKS=off \
  -DPARQUET_BUILD_EXECUTABLES=off \
  -DPARQUET_ZLIB_VENDORED=off \



arrow git commit: ARROW-967: [GLib] Support initializing array with buffer

2017-05-08 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master cb5e7b6fa -> 3d1983171


ARROW-967: [GLib] Support initializing array with buffer

It's for zero-copy data conversion.

Author: Kouhei Sutou 

Closes #657 from kou/glib-array-new-with-buffer and squashes the following 
commits:

57f4266 [Kouhei Sutou] [GLib] Support initializing array with buffer


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3d198317
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3d198317
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3d198317

Branch: refs/heads/master
Commit: 3d19831717297e91a74e008d44c71695088b39fd
Parents: cb5e7b6
Author: Kouhei Sutou 
Authored: Mon May 8 20:10:12 2017 +0200
Committer: Uwe L. Korn 
Committed: Mon May 8 20:10:12 2017 +0200

--
 c_glib/arrow-glib/array.cpp   | 600 +++--
 c_glib/arrow-glib/array.h |  80 +
 c_glib/arrow-glib/buffer.cpp  |   3 +
 c_glib/test/helper/buildable.rb   |  55 +++
 c_glib/test/test-binary-array.rb  |  13 +
 c_glib/test/test-boolean-array.rb |  10 +
 c_glib/test/test-double-array.rb  |  10 +
 c_glib/test/test-float-array.rb   |  10 +
 c_glib/test/test-int16-array.rb   |  10 +
 c_glib/test/test-int32-array.rb   |  10 +
 c_glib/test/test-int64-array.rb   |  10 +
 c_glib/test/test-int8-array.rb|  10 +
 c_glib/test/test-list-array.rb|  15 +
 c_glib/test/test-string-array.rb  |  13 +
 c_glib/test/test-struct-array.rb  |  33 ++
 c_glib/test/test-uint16-array.rb  |  10 +
 c_glib/test/test-uint32-array.rb  |  10 +
 c_glib/test/test-uint64-array.rb  |  10 +
 c_glib/test/test-uint8-array.rb   |  10 +
 19 files changed, 889 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/3d198317/c_glib/arrow-glib/array.cpp
--
diff --git a/c_glib/arrow-glib/array.cpp b/c_glib/arrow-glib/array.cpp
index 8a78984..8bc6ea9 100644
--- a/c_glib/arrow-glib/array.cpp
+++ b/c_glib/arrow-glib/array.cpp
@@ -39,73 +39,89 @@ G_BEGIN_DECLS
  * #GArrowArray is a base class for all array classes such as
  * #GArrowBooleanArray.
  *
- * All array classes are immutable. You need to use array builder
- * class such as #GArrowBooleanArrayBuilder to create a new array
- * except #GArrowNullArray.
+ * All array classes are immutable. You need to use binary data or
+ * array builder to create a new array except #GArrowNullArray. If you
+ * have binary data that uses Arrow format data, you can create a new
+ * array with the binary data as #GArrowBuffer object. If you don't
+ * have binary data, you can use array builder class such as
+ * #GArrowBooleanArrayBuilder that creates Arrow format data
+ * internally and a new array from the data.
  *
  * #GArrowNullArray is a class for null array. It can store zero or
  * more null values. You need to specify an array length to create a
  * new array.
  *
  * #GArrowBooleanArray is a class for binary array. It can store zero
- * or more boolean data. You need to use #GArrowBooleanArrayBuilder to
- * create a new array.
+ * or more boolean data. If you don't have Arrow format data, you need
+ * to use #GArrowBooleanArrayBuilder to create a new array.
  *
  * #GArrowInt8Array is a class for 8-bit integer array. It can store
- * zero or more 8-bit integer data. You need to use
- * #GArrowInt8ArrayBuilder to create a new array.
+ * zero or more 8-bit integer data. If you don't have Arrow format
+ * data, you need to use #GArrowInt8ArrayBuilder to create a new
+ * array.
  *
  * #GArrowUInt8Array is a class for 8-bit unsigned integer array. It
- * can store zero or more 8-bit unsigned integer data. You need to use
- * #GArrowUInt8ArrayBuilder to create a new array.
+ * can store zero or more 8-bit unsigned integer data. If you don't
+ * have Arrow format data, you need to use #GArrowUInt8ArrayBuilder to
+ * create a new array.
  *
  * #GArrowInt16Array is a class for 16-bit integer array. It can store
- * zero or more 16-bit integer data. You need to use
- * #GArrowInt16ArrayBuilder to create a new array.
+ * zero or more 16-bit integer data. If you don't have Arrow format
+ * data, you need to use #GArrowInt16ArrayBuilder to create a new
+ * array.
  *
  * #GArrowUInt16Array is a class for 16-bit unsigned integer array. It
- * can store zero or more 16-bit unsigned integer data. You need to use
- * #GArrowUInt16ArrayBuilder to create a new array.
+ * can store zero or more 16-bit unsigned integer data. If you don't
+ * have Arrow format data, you need to use #GArrowUInt16ArrayBuilder
+ * to create a new array.
  *
  * #GArrowInt32Array is a class for 32-bit integer array. It can store
- * zero or more 32-bit integer data. You need to use
- * #GArrowInt32ArrayBuil

arrow git commit: ARROW-954: Flag for compiling Arrow with header-only boost

2017-05-09 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master ccf83f485 -> 670612e6f


ARROW-954: Flag for compiling Arrow with header-only boost

Author: Philipp Moritz 

Closes #647 from pcmoritz/boost-header-only and squashes the following commits:

3605341 [Philipp Moritz] run find_package(Boost) in header-only mode
445de50 [Philipp Moritz] bring back tests
3f1ef1f [Philipp Moritz] reintroduce ARROW_HDFS
a047ad4 [Philipp Moritz] deactivate json-integration-test for header-only boost
b2b2015 [Philipp Moritz] make it possible to compile arrow with header-only 
boost


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/670612e6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/670612e6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/670612e6

Branch: refs/heads/master
Commit: 670612e6fdf699486641ed0d39d22257eb8acdb2
Parents: ccf83f4
Author: Philipp Moritz 
Authored: Tue May 9 16:01:05 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue May 9 16:01:05 2017 +0200

--
 cpp/CMakeLists.txt   | 75 +++
 cpp/src/arrow/io/CMakeLists.txt  |  4 +-
 cpp/src/arrow/ipc/CMakeLists.txt | 29 --
 3 files changed, 69 insertions(+), 39 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/670612e6/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 72e5ea9..2146379 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -101,6 +101,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL 
"${CMAKE_CURRENT_SOURCE_DIR}")
 "Rely on jemalloc shared libraries where relevant"
 ON)
 
+  option(ARROW_HDFS
+"Build the Arrow HDFS bridge"
+ON)
+
   option(ARROW_BOOST_USE_SHARED
 "Rely on boost shared libraries where relevant"
 ON)
@@ -136,6 +140,12 @@ if(NOT ARROW_BUILD_BENCHMARKS)
   set(NO_BENCHMARKS 1)
 endif()
 
+if(ARROW_HDFS)
+  set(ARROW_BOOST_HEADER_ONLY 0)
+else()
+  set(ARROW_BOOST_HEADER_ONLY 1)
+endif()
+
 include(BuildUtils)
 
 
@@ -437,44 +447,54 @@ if (ARROW_BOOST_USE_SHARED)
 add_definitions(-DBOOST_ALL_DYN_LINK)
   endif()
 
-  find_package(Boost COMPONENTS system filesystem REQUIRED)
-  if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
-set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
-set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+  if (ARROW_BOOST_HEADER_ONLY)
+find_package(Boost)
   else()
-set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
-set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+find_package(Boost COMPONENTS system filesystem REQUIRED)
+if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+  set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
+  set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+else()
+  set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
+  set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+endif()
+set(BOOST_SYSTEM_LIBRARY boost_system_shared)
+set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
   endif()
-  set(BOOST_SYSTEM_LIBRARY boost_system_shared)
-  set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
 else()
   # Find static boost headers and libs
   # TODO Differentiate here between release and debug builds
   set(Boost_USE_STATIC_LIBS ON)
-  find_package(Boost COMPONENTS system filesystem regex REQUIRED)
-  if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
-set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
-set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+  if (ARROW_BOOST_HEADER_ONLY)
+find_package(Boost)
   else()
-set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
-set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+find_package(Boost COMPONENTS system filesystem regex REQUIRED)
+if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+  set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
+  set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+else()
+  set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
+  set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+endif()
+set(BOOST_SYSTEM_LIBRARY boost_system_static)
+set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
   endif()
-  set(BOOST_SYSTEM_LIBRARY boost_system_static)
-  set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
 endif()
 
 message(STATUS "Boost includ

arrow git commit: ARROW-1022: [Python] Add multithreaded read option to read_feather

2017-05-14 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 5739e04b3 -> d8d3d8435


ARROW-1022: [Python] Add multithreaded read option to read_feather

Author: Wes McKinney 

Closes #682 from wesm/ARROW-1022 and squashes the following commits:

8fd241e [Wes McKinney] Add multithreaded read option to read_feather


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d8d3d843
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d8d3d843
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d8d3d843

Branch: refs/heads/master
Commit: d8d3d84354d827e45c8267cd05aecd2aa36cf60b
Parents: 5739e04
Author: Wes McKinney 
Authored: Sun May 14 17:23:26 2017 +0200
Committer: Uwe L. Korn 
Committed: Sun May 14 17:23:26 2017 +0200

--
 python/pyarrow/feather.py| 10 ++
 python/pyarrow/tests/test_feather.py | 11 +--
 2 files changed, 15 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/d8d3d843/python/pyarrow/feather.py
--
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 3754aec..34783a7 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -37,7 +37,7 @@ class FeatherReader(ext.FeatherReader):
 self.source = source
 self.open(source)
 
-def read(self, columns=None):
+def read(self, columns=None, nthreads=1):
 if columns is not None:
 column_set = set(columns)
 else:
@@ -53,7 +53,7 @@ class FeatherReader(ext.FeatherReader):
 names.append(name)
 
 table = Table.from_arrays(columns, names=names)
-return table.to_pandas()
+return table.to_pandas(nthreads=nthreads)
 
 
 class FeatherWriter(object):
@@ -118,7 +118,7 @@ def write_feather(df, dest):
 raise
 
 
-def read_feather(source, columns=None):
+def read_feather(source, columns=None, nthreads=1):
 """
 Read a pandas.DataFrame from Feather format
 
@@ -128,10 +128,12 @@ def read_feather(source, columns=None):
 columns : sequence, optional
 Only read a specific set of columns. If not provided, all columns are
 read
+nthreads : int, default 1
+Number of CPU threads to use when reading to pandas.DataFrame
 
 Returns
 ---
 df : pandas.DataFrame
 """
 reader = FeatherReader(source)
-return reader.read(columns=columns)
+return reader.read(columns=columns, nthreads=nthreads)

http://git-wip-us.apache.org/repos/asf/arrow/blob/d8d3d843/python/pyarrow/tests/test_feather.py
--
diff --git a/python/pyarrow/tests/test_feather.py 
b/python/pyarrow/tests/test_feather.py
index 69c32be..287e0da 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -61,7 +61,8 @@ class TestFeatherReader(unittest.TestCase):
 return counts
 
 def _check_pandas_roundtrip(self, df, expected=None, path=None,
-columns=None, null_counts=None):
+columns=None, null_counts=None,
+nthreads=1):
 if path is None:
 path = random_path()
 
@@ -70,7 +71,7 @@ class TestFeatherReader(unittest.TestCase):
 if not os.path.exists(path):
 raise Exception('file not written')
 
-result = read_feather(path, columns)
+result = read_feather(path, columns, nthreads=nthreads)
 if expected is None:
 expected = df
 
@@ -293,6 +294,12 @@ class TestFeatherReader(unittest.TestCase):
 df = pd.DataFrame({'strings': [''] * 10})
 self._check_pandas_roundtrip(df)
 
+def test_multithreaded_read(self):
+data = {'c{0}'.format(i): [''] * 10
+for i in range(100)}
+df = pd.DataFrame(data)
+self._check_pandas_roundtrip(df, nthreads=4)
+
 def test_nan_as_null(self):
 # Create a nan that is not numpy.nan
 values = np.array(['foo', np.nan, np.nan * 2, 'bar'] * 10)



svn commit: r19815 - in /release/arrow: arrow-0.2.0/ arrow-0.3.0/

2017-05-27 Thread uwe
Author: uwe
Date: Sat May 27 15:00:04 2017
New Revision: 19815

Log:
Remove old Arrow releases

Removed:
release/arrow/arrow-0.2.0/
release/arrow/arrow-0.3.0/



arrow git commit: ARROW-1070: [C++] Use physical types for Feather date/time types

2017-06-03 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 0576ff55c -> 931a87757


ARROW-1070: [C++] Use physical types for Feather date/time types

This fixes a regression from Feather 0.3.1. Closes 
https://github.com/wesm/feather/issues/304

Author: Wes McKinney 

Closes #725 from wesm/ARROW-1070 and squashes the following commits:

fe1a5ba [Wes McKinney] Use physical types for date/time types


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/931a8775
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/931a8775
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/931a8775

Branch: refs/heads/master
Commit: 931a877570928486f5471f9850c38d68c1e18617
Parents: 0576ff5
Author: Wes McKinney 
Authored: Sat Jun 3 17:14:36 2017 +0200
Committer: Uwe L. Korn 
Committed: Sat Jun 3 17:14:36 2017 +0200

--
 cpp/src/arrow/ipc/feather.cc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/931a8775/cpp/src/arrow/ipc/feather.cc
--
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 7d0abdd..023e7ec 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -37,6 +37,7 @@
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace ipc {
@@ -467,16 +468,15 @@ fbs::Type ToFlatbufferType(Type::type type) {
 case Type::BINARY:
   return fbs::Type_BINARY;
 case Type::DATE32:
-  return fbs::Type_DATE;
+  return fbs::Type_INT32;
 case Type::TIMESTAMP:
-  return fbs::Type_TIMESTAMP;
+  return fbs::Type_INT64;
 case Type::TIME32:
-  return fbs::Type_TIME;
+  return fbs::Type_INT32;
 case Type::TIME64:
-  return fbs::Type_TIME;
-case Type::DICTIONARY:
-  return fbs::Type_CATEGORY;
+  return fbs::Type_INT64;
 default:
+  DCHECK(false) << "Cannot reach this code";
   break;
   }
   // prevent compiler warning



arrow git commit: ARROW-1082: [GLib] Add CI on macOS

2017-06-03 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 931a87757 -> a81aefbd8


ARROW-1082: [GLib] Add CI on macOS

Author: Kouhei Sutou 

Closes #722 from kou/glib-travis-macos and squashes the following commits:

1a08aa0 [Kouhei Sutou] [GLib] Don't run Torch example on macOS
651f9ff [Kouhei Sutou] [GLib] Add CI on macOS


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/a81aefbd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/a81aefbd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/a81aefbd

Branch: refs/heads/master
Commit: a81aefbd8cdcc081dbe8669cdaaf459a1c8127b4
Parents: 931a877
Author: Kouhei Sutou 
Authored: Sat Jun 3 17:17:41 2017 +0200
Committer: Uwe L. Korn 
Committed: Sat Jun 3 17:17:41 2017 +0200

--
 .travis.yml   | 12 +-
 ci/travis_before_script_c_glib.sh | 42 ++
 ci/travis_env_common.sh   |  4 
 ci/travis_script_c_glib.sh| 19 ++-
 4 files changed, 56 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/a81aefbd/.travis.yml
--
diff --git a/.travis.yml b/.travis.yml
index 36e3c18..7a8b16c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,7 +17,6 @@ addons:
 - gtk-doc-tools
 - autoconf-archive
 - libgirepository1.0-dev
-- gir1.2-gudev-1.0
 services:
   - docker
 
@@ -91,6 +90,17 @@ matrix:
 - $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh
 script:
 - $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh
+  - compiler: clang
+osx_image: xcode6.4
+os: osx
+cache:
+addons:
+rvm: 2.2
+before_script:
+- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
+- $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh
+script:
+- $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh
 
 before_install:
 - ulimit -c unlimited -S

http://git-wip-us.apache.org/repos/asf/arrow/blob/a81aefbd/ci/travis_before_script_c_glib.sh
--
diff --git a/ci/travis_before_script_c_glib.sh 
b/ci/travis_before_script_c_glib.sh
index e397b0e..6387f4d 100755
--- a/ci/travis_before_script_c_glib.sh
+++ b/ci/travis_before_script_c_glib.sh
@@ -19,28 +19,38 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
 
 if [ $TRAVIS_OS_NAME == "osx" ]; then
   brew install gtk-doc autoconf-archive gobject-introspection
+  brew upgrade git cmake wget libtool
+
+  export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/opt/libffi/lib/pkgconfig
 fi
 
-gem install gobject-introspection
-
-git clone \
-  --quiet \
-  --depth 1 \
-  --recursive \
-  https://github.com/torch/distro.git ~/torch
-pushd ~/torch
-./install-deps > /dev/null
-echo "yes" | ./install.sh > /dev/null
-. ~/torch/install/bin/torch-activate
-popd
+gem install test-unit gobject-introspection
+
+if [ $TRAVIS_OS_NAME == "osx" ]; then
+  brew install lua
+else
+  git clone \
+--quiet \
+--depth 1 \
+--recursive \
+https://github.com/torch/distro.git ~/torch
+  pushd ~/torch
+  ./install-deps > /dev/null
+  echo "yes" | ./install.sh > /dev/null
+  . ~/torch/install/bin/torch-activate
+  popd
+fi
 luarocks install lgi
 
 go get github.com/linuxdeepin/go-gir-generator || :
 pushd $GOPATH/src/github.com/linuxdeepin/go-gir-generator
+mv Makefile{,.orig}
+sed -e 's/ gudev-1.0//' Makefile.orig > Makefile
+mkdir -p out/src/gir/gudev-1.0
 make build copyfile
 mkdir -p $GOPATH/bin/
 cp -a out/gir-generator $GOPATH/bin/
-cp -a out/src/gir/ $GOPATH/src/
+cp -a out/src/gir/ $GOPATH/src/gir/
 popd
 
 pushd $ARROW_C_GLIB_DIR
@@ -50,7 +60,11 @@ pushd $ARROW_C_GLIB_DIR
 export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$ARROW_CPP_INSTALL/lib/pkgconfig
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ARROW_CPP_INSTALL/lib
 
-./configure --prefix=${ARROW_C_GLIB_INSTALL} --enable-gtk-doc
+CONFIGURE_OPTIONS="--prefix=$ARROW_C_GLIB_INSTALL"
+if [ $TRAVIS_OS_NAME != "osx" ]; then
+  CONFIGURE_OPTIONS="$CONFIGURE_OPTIONS --enable-gtk-doc"
+fi
+./configure $CONFIGURE_OPTIONS
 
 make -j4
 make install

http://git-wip-us.apache.org/repos/asf/arrow/blob/a81aefbd/ci/travis_env_common.sh
--
diff --git a/ci/travis_env_common.sh b/ci/travis_env_common.sh
index cef2d65..9ded3e0 100755
--- a/ci/travis_env_common.sh
+++ b/ci/travis_env_common.sh
@@ -26,3 +26,7 @@ export ARROW_INTEGRATION_DIR=$TRAVIS_BUILD_DIR/integration
 export ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install
 export ARROW_CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build
 export ARROW_C_GLIB_INSTALL=$TRAVIS_BUILD_DIR/c-glib-install
+
+if [ $TRAVIS_OS_NAME

arrow git commit: ARROW-1051: [Python] Opt in to Parquet unit tests to avoid accidental suppression of dynamic linking errors

2017-06-05 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master a81aefbd8 -> 8f2b44b89


ARROW-1051: [Python] Opt in to Parquet unit tests to avoid accidental 
suppression of dynamic linking errors

Author: Wes McKinney 

Closes #729 from wesm/ARROW-1051 and squashes the following commits:

019b9ec [Wes McKinney] Statically link boost in parquet-cpp
5103077 [Wes McKinney] See if updating conda helps
7eac948 [Wes McKinney] See if setting PATH solves problem
e246e19 [Wes McKinney] Red herring, issue was runtime library loading
6bc0492 [Wes McKinney] Set PARQUET_ARROW_VERSION in Windows build
a1f2d2b [Wes McKinney] Opt in to Parquet unit tests so that import errors from 
pyarrow.parquet bubble up


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8f2b44b8
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8f2b44b8
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8f2b44b8

Branch: refs/heads/master
Commit: 8f2b44b897b7083ee2a296c70397dc2d7d21d95e
Parents: a81aefb
Author: Wes McKinney 
Authored: Mon Jun 5 12:18:32 2017 +0200
Committer: Uwe L. Korn 
Committed: Mon Jun 5 12:18:32 2017 +0200

--
 ci/msvc-build.bat|  10 ++-
 ci/travis_script_python.sh   |   2 +-
 python/pyarrow/tests/conftest.py |   2 +-
 python/pyarrow/tests/test_parquet.py | 132 +++---
 4 files changed, 92 insertions(+), 54 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/8f2b44b8/ci/msvc-build.bat
--
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index d13c11f..263d4bc 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -17,6 +17,8 @@
 
 @echo on
 
+conda update --yes --quiet conda
+
 conda create -n arrow -q -y python=%PYTHON% ^
   six pytest setuptools numpy pandas cython
 conda install -n arrow -q -y -c conda-forge ^
@@ -43,7 +45,7 @@ cmake -G "%GENERATOR%" ^
 cmake --build . --target INSTALL --config Release  || exit /B
 
 @rem Needed so python-test.exe works
-set 
PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\python35.zip;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%
+set 
PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\python35.zip;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%;%PYTHONPATH%
 
 ctest -VV  || exit /B
 popd
@@ -59,15 +61,17 @@ set PARQUET_HOME=%CONDA_PREFIX%\Library
 cmake -G "%GENERATOR%" ^
  -DCMAKE_INSTALL_PREFIX=%PARQUET_HOME% ^
  -DCMAKE_BUILD_TYPE=Release ^
+ -DPARQUET_BOOST_USE_SHARED=OFF ^
  -DPARQUET_ZLIB_VENDORED=off ^
  -DPARQUET_BUILD_TESTS=off .. || exit /B
 cmake --build . --target INSTALL --config Release || exit /B
 popd
 
 @rem Build and import pyarrow
-set PYTHONPATH=
+@rem parquet-cpp has some additional runtime dependencies that we need to 
figure out
+@rem see PARQUET-1018
 
 pushd python
 python setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp 
bdist_wheel  || exit /B
-py.test pyarrow -v -s || exit /B
+py.test pyarrow -v -s --parquet || exit /B
 popd

http://git-wip-us.apache.org/repos/asf/arrow/blob/8f2b44b8/ci/travis_script_python.sh
--
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index c3735cc..904db52 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -111,7 +111,7 @@ python_version_tests() {
   python -c "import pyarrow.parquet"
   python -c "import pyarrow._jemalloc"
 
-  python -m pytest -vv -r sxX pyarrow
+  python -m pytest -vv -r sxX pyarrow --parquet
 
   # Build documentation once
   if [[ "$PYTHON_VERSION" == "3.6" ]]

http://git-wip-us.apache.org/repos/asf/arrow/blob/8f2b44b8/python/pyarrow/tests/conftest.py
--
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index d5b4b69..9b767fc 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -18,7 +18,7 @@
 from pytest import skip
 
 
-groups = ['hdfs']
+groups = ['hdfs', 'parquet']
 
 
 def pytest_configure(config):

http://git-wip-us.apache.org/repos/asf/arrow/blob/8f2b44b8/python/pyarrow/tests/test_parquet.py
--
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 5f65f28..052d395 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -32,13 +32,20 @@ import pandas as pd
 
 import pandas.util.testing as tm
 
-# Skip all parquet tests if we can't import pyarrow.parquet
-pq = pytest.importorskip('pyarrow.parquet&#x

arrow git commit: ARROW-986: [Format] Add brief explanation of dictionary batches in IPC.md

2017-06-05 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 8f2b44b89 -> a44155d6e


ARROW-986: [Format] Add brief explanation of dictionary batches in IPC.md

Author: Wes McKinney 

Closes #732 from wesm/ARROW-986 and squashes the following commits:

4321106 [Wes McKinney] Add brief explanation of dictionary batches in IPC.md


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/a44155d6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/a44155d6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/a44155d6

Branch: refs/heads/master
Commit: a44155d6ec5d0c6c255d3305a494f51a6b1d2316
Parents: 8f2b44b
Author: Wes McKinney 
Authored: Mon Jun 5 12:20:35 2017 +0200
Committer: Uwe L. Korn 
Committed: Mon Jun 5 12:20:35 2017 +0200

--
 format/IPC.md | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/a44155d6/format/IPC.md
--
diff --git a/format/IPC.md b/format/IPC.md
index bf2aaa7..7d68921 100644
--- a/format/IPC.md
+++ b/format/IPC.md
@@ -157,9 +157,24 @@ Some notes about this
 
 ### Dictionary Batches
 
-Dictionary batches have not yet been implemented, while they are provided for
-in the metadata. For the time being, the `DICTIONARY` segments shown above in
-the file do not appear in any of the file implementations.
+Dictionaries are written in the stream and file formats as a sequence of record
+batches, each having a single field. The complete semantic schema for a
+sequence of record batches, therefore, consists of the schema along with all of
+the dictionaries. The dictionary types are found in the schema, so it is
+necessary to read the schema to first determine the dictionary types so that
+the dictionaries can be properly interpreted.
+
+```
+table DictionaryBatch {
+  id: long;
+  data: RecordBatch;
+}
+```
+
+The dictionary `id` in the message metadata can be referenced one or more times
+in the schema, so that dictionaries can even be used for multiple fields. See
+the [Physical Layout][4] document for more about the semantics of
+dictionary-encoded data.
 
 ### Tensor (Multi-dimensional Array) Message Format
 
@@ -182,3 +197,4 @@ shared memory region) to be a multiple of 8:
 [1]: https://github.com/apache/arrow/blob/master/format/File.fbs
 [2]: https://github.com/apache/arrow/blob/master/format/Message.fbs
 [3]: https://github.com/google]/flatbuffers
+[4]: https://github.com/apache/arrow/blob/master/format/Layout.md



arrow git commit: ARROW-1050: [C++] Export arrow::ValidateArray

2017-06-05 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master a44155d6e -> cfaddabec


ARROW-1050: [C++] Export arrow::ValidateArray

Author: Wes McKinney 

Closes #730 from wesm/ARROW-1050 and squashes the following commits:

c50dd5a [Wes McKinney] Export arrow::ValidateArray


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/cfaddabe
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/cfaddabe
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/cfaddabe

Branch: refs/heads/master
Commit: cfaddabecd104595acf043e92ba8f3cb0841b278
Parents: a44155d
Author: Wes McKinney 
Authored: Mon Jun 5 12:22:38 2017 +0200
Committer: Uwe L. Korn 
Committed: Mon Jun 5 12:22:38 2017 +0200

--
 cpp/src/arrow/array.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/cfaddabe/cpp/src/arrow/array.h
--
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 2c96ce0..1c9769f 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -515,7 +515,7 @@ ARROW_EXTERN_TEMPLATE NumericArray;
 ///
 /// \param array an Array instance
 /// \return Status
-Status ValidateArray(const Array& array);
+Status ARROW_EXPORT ValidateArray(const Array& array);
 
 }  // namespace arrow
 



arrow git commit: ARROW-1056: [Python] Ignore pandas index in parquet+hdfs test

2017-06-05 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master cfaddabec -> 316930cf6


ARROW-1056: [Python] Ignore pandas index in parquet+hdfs test

I have all the tests passing again:

```shell
$ py.test pyarrow --hdfs
= test session starts 
=
platform linux -- Python 3.5.1, pytest-3.0.6, py-1.4.31, pluggy-0.4.0
rootdir: /home/wesm/code/arrow/python, inifile: setup.cfg
collected 227 items

pyarrow/tests/test_array.py ...
pyarrow/tests/test_convert_builtin.py ..
pyarrow/tests/test_convert_pandas.py x
pyarrow/tests/test_deprecations.py ..
pyarrow/tests/test_feather.py ...x
pyarrow/tests/test_hdfs.py ...
pyarrow/tests/test_io.py ..
pyarrow/tests/test_ipc.py .x
pyarrow/tests/test_jemalloc.py ..
pyarrow/tests/test_parquet.py ...
pyarrow/tests/test_scalars.py ..
pyarrow/tests/test_schema.py ..
pyarrow/tests/test_table.py ...
pyarrow/tests/test_tensor.py 
```

Author: Wes McKinney 

Closes #728 from wesm/ARROW-1056 and squashes the following commits:

aa3d468 [Wes McKinney] Ignore pandas index in parquet+hdfs test


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/316930cf
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/316930cf
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/316930cf

Branch: refs/heads/master
Commit: 316930cf6ce654aae042ee5ef269e6fef050a791
Parents: cfaddab
Author: Wes McKinney 
Authored: Mon Jun 5 12:26:13 2017 +0200
Committer: Uwe L. Korn 
Committed: Mon Jun 5 12:26:13 2017 +0200

--
 python/pyarrow/tests/test_hdfs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/316930cf/python/pyarrow/tests/test_hdfs.py
--
diff --git a/python/pyarrow/tests/test_hdfs.py 
b/python/pyarrow/tests/test_hdfs.py
index d2a5479..cea02fb 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -167,7 +167,7 @@ class HdfsTestCases(object):
 
 path = pjoin(tmpdir, '{0}.parquet'.format(i))
 
-table = pa.Table.from_pandas(df)
+table = pa.Table.from_pandas(df, preserve_index=False)
 with self.hdfs.open(path, 'wb') as f:
 pq.write_table(table, f)
 



arrow git commit: ARROW-1118: [Site] Website updates for 0.4.1

2017-06-15 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 697df1b50 -> d1de66bda


ARROW-1118: [Site] Website updates for 0.4.1

This has been deployed preliminarily for the release. I have not yet updated 
the subproject documentations yet; help would be appreciated

Author: Wes McKinney 

Closes #755 from wesm/ARROW-1118 and squashes the following commits:

0bdac0e [Wes McKinney] Website updates for 0.4.1


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d1de66bd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d1de66bd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d1de66bd

Branch: refs/heads/master
Commit: d1de66bdaffedd07616d37fde6fc0265bf51868f
Parents: 697df1b
Author: Wes McKinney 
Authored: Thu Jun 15 09:02:11 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Jun 15 09:02:11 2017 +0200

--
 CHANGELOG.md| 145 +++
 site/_posts/2017-06-14-0.4.1-release.md |  67 +
 site/_release/0.4.1.md  |  98 ++
 site/_release/index.md  |   2 +
 site/index.html |   4 +-
 site/install.md |  26 ++---
 6 files changed, 327 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/d1de66bd/CHANGELOG.md
--
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85a43ef..55b02e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,151 @@
   limitations under the License. See accompanying LICENSE file.
 -->
 
+# Apache Arrow 0.4.1 (9 June 2017)
+
+## Bug
+
+* ARROW-1039 - Python: pyarrow.Filesystem.read_parquet causing error if 
nthreads>1
+* ARROW-1050 - [C++] Export arrow::ValidateArray
+* ARROW-1051 - [Python] If pyarrow.parquet fails to import due to a shared 
library ABI conflict, the test_parquet.py tests silently do not run
+* ARROW-1056 - [Python] Parquet+HDFS test failure due to writing pandas index
+* ARROW-1057 - Fix cmake warning and msvc debug asserts
+* ARROW-1062 - [GLib] Examples use old API
+* ARROW-1066 - remove warning on feather for pandas >= 0.20.1
+* ARROW-1070 - [C++] Feather files for date/time types should be written with 
the physical types
+* ARROW-1075 - [GLib] Build error on macOS
+* ARROW-1085 - [java] Follow up on template cleanup. Missing method for 
IntervalYear
+* ARROW-1086 - [Python] pyarrow 0.4.0 on pypi is missing pxd files
+* ARROW-1088 - [Python] test_unicode_filename test fails when unicode 
filenames aren't supported by system
+* ARROW-1090 - [Python] build_ext usability
+* ARROW-1091 - Decimal scale and precision are flipped
+* ARROW-1092 - More Decimal and scale flipped follow-up
+* ARROW-1094 - [C++] Incomplete buffer reads in arrow::io::ReadableFile should 
exactly truncate returned buffer
+* ARROW-424 - [C++] Threadsafety in arrow/io/hdfs.h
+
+## Improvement
+
+* ARROW-1020 - [Format] Add additional language to Schema.fbs to clarify naive 
vs. localized Timestamp values
+* ARROW-1034 - [Python] Enable creation of binary wheels on Windows / MSVC
+* ARROW-1049 - [java] vector template cleanup
+* ARROW-1063 - [Website] Blog post and website updates for 0.4.0 release
+* ARROW-1078 - [Python] Account for PARQUET-967
+* ARROW-1080 - C++: Add tutorial about converting to/from row-wise 
representation
+* ARROW-897 - [GLib] Build arrow-glib as a separate build in the Travis CI 
build matrix
+* ARROW-986 - [Format] Update IPC.md to account for dictionary batches
+* ARROW-990 - [JS] Add tslint support for linting TypeScript
+
+## Task
+
+* ARROW-1068 - [Python] Create external repo with appveyor.yml configured for 
building Python wheel installers
+* ARROW-1069 - Add instructions for publishing maven artifacts
+* ARROW-1084 - Implementations of BufferAllocator should handle Netty's 
OutOfDirectMemoryError
+
+## Test
+
+* ARROW-1060 - [Python] Add unit test for ARROW-1053
+* ARROW-1082 - [GLib] Add CI on macOS
+
+# Apache Arrow 0.4.0 (22 May 2017)
+
+## Bug
+
+* ARROW-1003 - [C++] Hdfs and java dlls fail to load when built for Windows 
with MSVC
+* ARROW-1004 - ArrowInvalid: Invalid: Python object of type float is not None 
and is not a string, bool, or date object
+* ARROW-1017 - Python: Table.to_pandas leaks memory
+* ARROW-1023 - Python: Fix bundling of arrow-cpp for macOS
+* ARROW-1033 - [Python] pytest discovers scripts/test_leak.py
+* ARROW-1046 - [Python] Conform DataFrame metadata to pandas spec
+* ARROW-1053 - [Python] Memory leak with RecordBatchFileReader
+* ARROW-1054 - [Python] Test suite fails on pandas 0.19.2
+* ARROW-1061 - [C++] Harden decimal parsing against invalid strings
+* ARROW-1064 - ModuleNotFoundError: No module named 'pyarrow._parquet'
+* ARROW-813 - [Python]

arrow git commit: ARROW-1124: Increase numpy dependency to >=1.10.x

2017-06-17 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master d54bf4829 -> 1a23419fb


ARROW-1124: Increase numpy dependency to >=1.10.x

While we could still build with NumPy>=1.9 for Python 2, Python 3 builds
require >= 1.10 due to a bug in the C-headers.

Change-Id: I0f9e0ad72e4ce4b1c6b44883d5781347d33f7e5b

Author: Uwe L. Korn 

Closes #758 from xhochy/ARROW-1124 and squashes the following commits:

5fff1ea [Uwe L. Korn] ARROW-1124: Increase numpy dependency to >=1.10.x


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/1a23419f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/1a23419f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/1a23419f

Branch: refs/heads/master
Commit: 1a23419fb1ec1ead90eb45e4b3c2cacd8a85bba9
Parents: d54bf48
Author: Uwe L. Korn 
Authored: Sat Jun 17 19:27:08 2017 +0200
Committer: Uwe L. Korn 
Committed: Sat Jun 17 19:27:08 2017 +0200

--
 python/requirements.txt | 2 +-
 python/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/1a23419f/python/requirements.txt
--
diff --git a/python/requirements.txt b/python/requirements.txt
index f42c90c..103f490 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,3 +1,3 @@
 pytest
-numpy>=1.7.0
+numpy>=1.10.0
 six

http://git-wip-us.apache.org/repos/asf/arrow/blob/1a23419f/python/setup.py
--
diff --git a/python/setup.py b/python/setup.py
index e10b4b8..eddf88a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -366,7 +366,7 @@ setup(
 },
 use_scm_version={"root": "..", "relative_to": __file__},
 setup_requires=['setuptools_scm', 'cython >= 0.23'],
-install_requires=['numpy >= 1.9', 'six >= 1.0.0'],
+install_requires=['numpy >= 1.10', 'six >= 1.0.0'],
 tests_require=['pytest'],
 description="Python library for Apache Arrow",
 long_description=long_description,



arrow git commit: ARROW-742: [C++] Use gflags from toolchain; Resolve cmake FindGFlags …

2017-06-18 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 1a23419fb -> 5be05ac15


ARROW-742: [C++] Use gflags from toolchain; Resolve cmake FindGFlags …

…and FindGTest Windows issues.

Author: Max Risuhin 

Closes #759 from MaxRis/ARROW-784 and squashes the following commits:

358a9cc [Max Risuhin] ARROW-742: [C++] Use gflags from toolchain; Resolve cmake 
FindGFlags and FindGTest Windows issues.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/5be05ac1
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/5be05ac1
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/5be05ac1

Branch: refs/heads/master
Commit: 5be05ac154843908f87fb40f3e0bde5e256e8626
Parents: 1a23419
Author: Max Risuhin 
Authored: Sun Jun 18 18:43:30 2017 +0200
Committer: Uwe L. Korn 
Committed: Sun Jun 18 18:43:30 2017 +0200

--
 ci/msvc-build.bat  |  2 +-
 cpp/CMakeLists.txt |  7 ++-
 cpp/cmake_modules/FindGFlags.cmake | 12 
 cpp/cmake_modules/FindGTest.cmake  |  9 +
 4 files changed, 20 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/5be05ac1/ci/msvc-build.bat
--
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 263d4bc..366fc28 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -23,7 +23,7 @@ conda create -n arrow -q -y python=%PYTHON% ^
   six pytest setuptools numpy pandas cython
 conda install -n arrow -q -y -c conda-forge ^
   flatbuffers rapidjson ^
-  cmake git boost-cpp thrift-cpp snappy zlib brotli
+  cmake git boost-cpp thrift-cpp snappy zlib brotli gflags
 
 call activate arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/5be05ac1/cpp/CMakeLists.txt
--
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c295fa0..42e1f69 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -412,6 +412,7 @@ if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
   set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
 
   if (NOT DEFINED ENV{BOOST_ROOT})
 # Since we have to set this in the environment, we check whether
@@ -432,6 +433,10 @@ if (DEFINED ENV{JEMALLOC_HOME})
   set(JEMALLOC_HOME "$ENV{JEMALLOC_HOME}")
 endif()
 
+if (DEFINED ENV{GFLAGS_HOME})
+  set(GFLAGS_HOME "$ENV{GFLAGS_HOME}")
+endif()
+
 # --
 # Add Boost dependencies (code adapted from Apache Kudu (incubating))
 
@@ -560,7 +565,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
   endif()
 
   # gflags (formerly Googleflags) command line parsing
-  if("$ENV{GFLAGS_HOME}" STREQUAL "")
+  if("${GFLAGS_HOME}" STREQUAL "")
 set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS})
 
 set(GFLAGS_PREFIX 
"${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep")

http://git-wip-us.apache.org/repos/asf/arrow/blob/5be05ac1/cpp/cmake_modules/FindGFlags.cmake
--
diff --git a/cpp/cmake_modules/FindGFlags.cmake 
b/cpp/cmake_modules/FindGFlags.cmake
index eaea835..9eaf824 100644
--- a/cpp/cmake_modules/FindGFlags.cmake
+++ b/cpp/cmake_modules/FindGFlags.cmake
@@ -22,13 +22,17 @@
 #  GFLAGS_STATIC_LIB, path to libgflags static library
 #  GFLAGS_FOUND, whether gflags has been found
 
-if( NOT "$ENV{GFLAGS_HOME}" STREQUAL "")
-file( TO_CMAKE_PATH "$ENV{GFLAGS_HOME}" _native_path )
+if( NOT "${GFLAGS_HOME}" STREQUAL "")
+file( TO_CMAKE_PATH "${GFLAGS_HOME}" _native_path )
 list( APPEND _gflags_roots ${_native_path} )
 elseif ( GFlags_HOME )
 list( APPEND _gflags_roots ${GFlags_HOME} )
 endif()
 
+if (MSVC AND NOT GFLAGS_MSVC_STATIC_LIB_SUFFIX)
+  set(GFLAGS_MSVC_STATIC_LIB_SUFFIX "_static")
+endif()
+
 if ( _gflags_roots )
   find_path(GFLAGS_INCLUDE_DIR NAMES gflags/gflags.h
 PATHS ${_gflags_roots}
@@ -38,7 +42,7 @@ if ( _gflags_roots )
 PATHS ${_gflags_roots}
 NO_DEFAULT_PATH
 PATH_SUFFIXES "lib" )
-  find_library(GFLAGS_SHARED_LIB NAMES libgflags.a
+  find_library(GFLAGS_STATIC_LIB NAMES 
${CMAKE_STATIC_LIBRARY_PREFIX}gflags${GFLAGS_MSVC_STATIC_LIB_SUFFIX}
 PATHS ${_gflags_roots}
 NO_DEFAULT_PATH
 PATH_SUFFIXES "lib" )
@@ -50,7 +54,7 @@ else()
   find_library(GFLAGS_SHARED_LIB gflags
 NO_CMAKE_SYSTEM_PATH
 NO_SYSTEM_ENVIRONMENT_PATH)
-  find_li

arrow git commit: ARROW-1129: [C++] Fix gflags issue in Linux/macOS toolchain builds

2017-06-20 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master b5e8a4831 -> 86c67d061


ARROW-1129: [C++] Fix gflags issue in Linux/macOS toolchain builds

Minor regression introduced in #759

Author: Wes McKinney 

Closes #763 from wesm/ARROW-1129 and squashes the following commits:

afc4bb6 [Wes McKinney] Fix gflags library name being searched for in toolchain 
builds on Linux/macOS


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/86c67d06
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/86c67d06
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/86c67d06

Branch: refs/heads/master
Commit: 86c67d06146dee957c10e200a758be07c85da2d6
Parents: b5e8a48
Author: Wes McKinney 
Authored: Tue Jun 20 14:05:12 2017 +0200
Committer: Uwe L. Korn 
Committed: Tue Jun 20 14:05:12 2017 +0200

--
 cpp/cmake_modules/FindGFlags.cmake | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/86c67d06/cpp/cmake_modules/FindGFlags.cmake
--
diff --git a/cpp/cmake_modules/FindGFlags.cmake 
b/cpp/cmake_modules/FindGFlags.cmake
index 9eaf824..63a55d7 100644
--- a/cpp/cmake_modules/FindGFlags.cmake
+++ b/cpp/cmake_modules/FindGFlags.cmake
@@ -33,6 +33,12 @@ if (MSVC AND NOT GFLAGS_MSVC_STATIC_LIB_SUFFIX)
   set(GFLAGS_MSVC_STATIC_LIB_SUFFIX "_static")
 endif()
 
+set(GFLAGS_STATIC_LIB_SUFFIX
+  "${GFLAGS_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+
+set(GFLAGS_STATIC_LIB_NAME
+  ${CMAKE_STATIC_LIBRARY_PREFIX}gflags${GFLAGS_STATIC_LIB_SUFFIX})
+
 if ( _gflags_roots )
   find_path(GFLAGS_INCLUDE_DIR NAMES gflags/gflags.h
 PATHS ${_gflags_roots}
@@ -42,7 +48,7 @@ if ( _gflags_roots )
 PATHS ${_gflags_roots}
 NO_DEFAULT_PATH
 PATH_SUFFIXES "lib" )
-  find_library(GFLAGS_STATIC_LIB NAMES 
${CMAKE_STATIC_LIBRARY_PREFIX}gflags${GFLAGS_MSVC_STATIC_LIB_SUFFIX}
+  find_library(GFLAGS_STATIC_LIB NAMES ${GFLAGS_STATIC_LIB_NAME}
 PATHS ${_gflags_roots}
 NO_DEFAULT_PATH
 PATH_SUFFIXES "lib" )
@@ -54,7 +60,7 @@ else()
   find_library(GFLAGS_SHARED_LIB gflags
 NO_CMAKE_SYSTEM_PATH
 NO_SYSTEM_ENVIRONMENT_PATH)
-  find_library(GFLAGS_STATIC_LIB 
${CMAKE_STATIC_LIBRARY_PREFIX}gflags${GFLAGS_MSVC_STATIC_LIB_SUFFIX}
+  find_library(GFLAGS_STATIC_LIB ${GFLAGS_STATIC_LIB_NAME}
 NO_CMAKE_SYSTEM_PATH
 NO_SYSTEM_ENVIRONMENT_PATH)
 endif()



arrow git commit: ARROW-1138: Travis: Use OpenJDK7 instead of OracleJDK7

2017-06-22 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master 86c67d061 -> f0f1ca60d


ARROW-1138: Travis: Use OpenJDK7 instead of OracleJDK7

Will merge on green build to unbreak Travis but it would be very helpful if a 
Java expert can review this change.

Author: Uwe L. Korn 

Closes #765 from xhochy/fix-jdk and squashes the following commits:

262f6f9 [Uwe L. Korn] Switch to openjdk7
751444a [Uwe L. Korn] Add JDK 7 to package list


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f0f1ca60
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f0f1ca60
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f0f1ca60

Branch: refs/heads/master
Commit: f0f1ca60d8581dc252568c27fa3481aa5e1cf9ac
Parents: 86c67d0
Author: Uwe L. Korn 
Authored: Thu Jun 22 09:12:43 2017 +0200
Committer: Uwe L. Korn 
Committed: Thu Jun 22 09:12:43 2017 +0200

--
 .travis.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/f0f1ca60/.travis.yml
--
diff --git a/.travis.yml b/.travis.yml
index 7a8b16c..a32562f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,6 +17,7 @@ addons:
 - gtk-doc-tools
 - autoconf-archive
 - libgirepository1.0-dev
+
 services:
   - docker
 
@@ -59,13 +60,13 @@ matrix:
 - $TRAVIS_BUILD_DIR/ci/travis_script_manylinux.sh
   - language: java
 os: linux
-jdk: oraclejdk7
+jdk: openjdk7
 script:
 - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
   - language: java
 os: linux
 env: ARROW_TEST_GROUP=integration
-jdk: oraclejdk7
+jdk: openjdk7
 before_script:
 - export CC="gcc-4.9"
 - export CXX="g++-4.9"



arrow git commit: ARROW-1039: Python: Remove duplicate column

2017-06-23 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master a16c1246e -> c1ec0c723


ARROW-1039: Python: Remove duplicate column

Note that part of this problem was related to the fix I made in 
https://github.com/apache/parquet-cpp/pull/358/files#diff-2f5ceebd1726b16db561185cc620d18e

Author: Uwe L. Korn 

Closes #773 from xhochy/ARROW-1039 and squashes the following commits:

44a002a [Uwe L. Korn] ARROW-1039: Python: Remove duplicate column


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c1ec0c72
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c1ec0c72
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c1ec0c72

Branch: refs/heads/master
Commit: c1ec0c723e43b9450ef2655e6415eb40301c4ce8
Parents: a16c124
Author: Uwe L. Korn 
Authored: Fri Jun 23 15:49:55 2017 +0200
Committer: Uwe L. Korn 
Committed: Fri Jun 23 15:49:55 2017 +0200

--
 python/pyarrow/array.pxi | 29 +
 python/pyarrow/includes/libarrow.pxd |  6 ++
 python/pyarrow/tests/test_parquet.py |  4 ++--
 3 files changed, 37 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/array.pxi
--
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5930de3..c7563c8 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -432,6 +432,7 @@ cdef set PRIMITIVE_TYPES = set([
 _Type_UINT32, _Type_INT32,
 _Type_UINT64, _Type_INT64,
 _Type_TIMESTAMP, _Type_DATE32,
+_Type_TIME32, _Type_TIME64,
 _Type_DATE64,
 _Type_HALF_FLOAT,
 _Type_FLOAT,
@@ -816,6 +817,32 @@ cdef class Date64Value(ArrayValue):
 ap.Value(self.index) / 1000).date()
 
 
+cdef class Time32Value(ArrayValue):
+
+def as_py(self):
+cdef:
+CTime32Array* ap =  self.sp_array.get()
+CTime32Type* dtype =  ap.type().get()
+
+if dtype.unit() == TimeUnit_SECOND:
+return (datetime.datetime(1970, 1, 1) + 
datetime.timedelta(seconds=ap.Value(self.index))).time()
+else:
+return (datetime.datetime(1970, 1, 1) + 
datetime.timedelta(milliseconds=ap.Value(self.index))).time()
+
+
+cdef class Time64Value(ArrayValue):
+
+def as_py(self):
+cdef:
+CTime64Array* ap =  self.sp_array.get()
+CTime64Type* dtype =  ap.type().get()
+
+if dtype.unit() == TimeUnit_MICRO:
+return (datetime.datetime(1970, 1, 1) + 
datetime.timedelta(microseconds=ap.Value(self.index))).time()
+else:
+return (datetime.datetime(1970, 1, 1) + 
datetime.timedelta(microseconds=ap.Value(self.index) / 1000)).time()
+
+
 cdef dict DATETIME_CONVERSION_FUNCTIONS
 
 try:
@@ -975,6 +1002,8 @@ cdef dict _scalar_classes = {
 _Type_INT64: Int64Value,
 _Type_DATE32: Date32Value,
 _Type_DATE64: Date64Value,
+_Type_TIME32: Time32Value,
+_Type_TIME64: Time64Value,
 _Type_TIMESTAMP: TimestampValue,
 _Type_FLOAT: FloatValue,
 _Type_DOUBLE: DoubleValue,

http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/includes/libarrow.pxd
--
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 9df31c8..f712274 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -249,6 +249,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 cdef cppclass CDate64Array" arrow::Date64Array"(CArray):
 int64_t Value(int i)
 
+cdef cppclass CTime32Array" arrow::Time32Array"(CArray):
+int32_t Value(int i)
+
+cdef cppclass CTime64Array" arrow::Time64Array"(CArray):
+int64_t Value(int i)
+
 cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray):
 int64_t Value(int i)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/tests/test_parquet.py
--
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 052d395..3c2b73e 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -449,13 +449,13 @@ def test_date_time_types():
 
 table = pa.Table.from_arrays([a1, a2, a3, a4, a5, a6],
  ['date32', 'date64', 'timestamp[us]',
-  'time32[s]', 'time64[us]', 'time32[s]'])
+  'time32[s]', 'time64[us]', 
'time32_from64[s]'])
 
 # date64

arrow git commit: ARROW-1131: [Python] Enable the Parquet unit tests by default if the extension imports

2017-06-26 Thread uwe
Repository: arrow
Updated Branches:
  refs/heads/master bea30d6c2 -> fc3f8c2a0


ARROW-1131: [Python] Enable the Parquet unit tests by default if the extension 
imports

@xhochy what do you think about this?

Author: Wes McKinney 

Closes #781 from wesm/ARROW-1131 and squashes the following commits:

8803e52 [Wes McKinney] Enable the Parquet unit tests by default if the 
extension imports


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/fc3f8c2a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/fc3f8c2a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/fc3f8c2a

Branch: refs/heads/master
Commit: fc3f8c2a05a2e3b2aec9b319f2686fe678e307f4
Parents: bea30d6
Author: Wes McKinney 
Authored: Mon Jun 26 08:59:35 2017 +0200
Committer: Uwe L. Korn 
Committed: Mon Jun 26 08:59:35 2017 +0200

--
 python/pyarrow/tests/conftest.py | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/arrow/blob/fc3f8c2a/python/pyarrow/tests/conftest.py
--
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 9b767fc..fa9608f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -20,6 +20,17 @@ from pytest import skip
 
 groups = ['hdfs', 'parquet']
 
+defaults = {
+'hdfs': False,
+'parquet': False
+}
+
+try:
+import pyarrow.parquet
+defaults['parquet'] = True
+except ImportError:
+pass
+
 
 def pytest_configure(config):
 pass
@@ -28,7 +39,7 @@ def pytest_configure(config):
 def pytest_addoption(parser):
 for group in groups:
 parser.addoption('--{0}'.format(group), action='store_true',
- default=False,
+ default=defaults[group],
  help=('Enable the {0} test group'.format(group)))
 
 for group in groups:



  1   2   3   4   5   6   7   8   9   >