This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new eae80e5 ARROW-4820: [Python] hadoop class path derived not correct
eae80e5 is described below
commit eae80e54c143ebd3ce4993122be891cee48a7c95
Author: tigerchen <[email protected]>
AuthorDate: Wed Mar 13 13:49:42 2019 +0100
ARROW-4820: [Python] hadoop class path derived not correct
> hdfs.py method _derive_hadoop_classpath add hadoop config dir to hadoop
classpath
Fix for https://issues.apache.org/jira/browse/ARROW-4820
Author: tigerchen <[email protected]>
Author: Krisztián Szűcs <[email protected]>
Closes #3872 from chenfj068/master and squashes the following commits:
9ff5ca61 <Krisztián Szűcs> don't install libhdfs3
4a65baf6 <tigerchen> ARROW-4820: hadoop class path derived not correct
---
integration/hdfs/Dockerfile | 10 ++++++----
python/pyarrow/hdfs.py | 6 +++++-
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/integration/hdfs/Dockerfile b/integration/hdfs/Dockerfile
index 4f72e25..7ad22f3 100644
--- a/integration/hdfs/Dockerfile
+++ b/integration/hdfs/Dockerfile
@@ -17,10 +17,6 @@
FROM arrow:python-3.6
-# installing libhdfs3, it needs to be pinned, see ARROW-1465 and ARROW-1445
-RUN conda install -y -c conda-forge hdfs3 libhdfs3=2.2.31 && \
- conda clean --all
-
# installing libhdfs (JNI)
ARG HADOOP_VERSION=2.6.5
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
@@ -35,6 +31,12 @@ RUN apt-get update -y && \
mv /hadoop-$HADOOP_VERSION /usr/local/hadoop
ADD integration/hdfs/hdfs-site.xml $HADOOP_HOME/etc/hadoop/
+# installing libhdfs3, it needs to be pinned, see ARROW-1465 and ARROW-1445
+# after the conda-forge migration it's failing with abi incompatibilities, so
+# turning it off for now
+# RUN conda install -y -c conda-forge libhdfs3=2.2.31 && \
+# conda clean --all
+
# build cpp with tests
ENV CC=gcc \
CXX=g++ \
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index 0191481..bf489ff 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -146,8 +146,12 @@ def _derive_hadoop_classpath():
xargs_echo = subprocess.Popen(('xargs', 'echo'),
stdin=find.stdout,
stdout=subprocess.PIPE)
- return subprocess.check_output(('tr', "' '", "':'"),
+ jars = subprocess.check_output(('tr', "' '", "':'"),
stdin=xargs_echo.stdout)
+ hadoop_conf = os.environ["HADOOP_CONF_DIR"] \
+ if "HADOOP_CONF_DIR" in os.environ \
+ else os.environ["HADOOP_HOME"] + "/etc/hadoop"
+ return (hadoop_conf + ":").encode("utf-8") + jars
def _hadoop_classpath_glob(hadoop_bin):