This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new eae80e5  ARROW-4820: [Python] hadoop class path derived not correct
eae80e5 is described below

commit eae80e54c143ebd3ce4993122be891cee48a7c95
Author: tigerchen <[email protected]>
AuthorDate: Wed Mar 13 13:49:42 2019 +0100

    ARROW-4820: [Python] hadoop class path derived not correct
    
    > hdfs.py method _derive_hadoop_classpath add hadoop config dir to hadoop 
classpath
    
    Fix for  https://issues.apache.org/jira/browse/ARROW-4820
    
    Author: tigerchen <[email protected]>
    Author: Krisztián Szűcs <[email protected]>
    
    Closes #3872 from chenfj068/master and squashes the following commits:
    
    9ff5ca61 <Krisztián Szűcs> don't install libhdfs3
    4a65baf6 <tigerchen> ARROW-4820: hadoop class path derived not correct
---
 integration/hdfs/Dockerfile | 10 ++++++----
 python/pyarrow/hdfs.py      |  6 +++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/integration/hdfs/Dockerfile b/integration/hdfs/Dockerfile
index 4f72e25..7ad22f3 100644
--- a/integration/hdfs/Dockerfile
+++ b/integration/hdfs/Dockerfile
@@ -17,10 +17,6 @@
 
 FROM arrow:python-3.6
 
-# installing libhdfs3, it needs to be pinned, see ARROW-1465 and ARROW-1445
-RUN conda install -y -c conda-forge hdfs3 libhdfs3=2.2.31 && \
-    conda clean --all
-
 # installing libhdfs (JNI)
 ARG HADOOP_VERSION=2.6.5
 ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
@@ -35,6 +31,12 @@ RUN apt-get update -y && \
     mv /hadoop-$HADOOP_VERSION /usr/local/hadoop
 ADD integration/hdfs/hdfs-site.xml $HADOOP_HOME/etc/hadoop/
 
+# installing libhdfs3, it needs to be pinned, see ARROW-1465 and ARROW-1445
+# after the conda-forge migration it's failing with abi incompatibilities, so
+# turning it off for now
+# RUN conda install -y -c conda-forge libhdfs3=2.2.31 && \
+#     conda clean --all
+
 # build cpp with tests
 ENV CC=gcc \
     CXX=g++ \
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index 0191481..bf489ff 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -146,8 +146,12 @@ def _derive_hadoop_classpath():
     xargs_echo = subprocess.Popen(('xargs', 'echo'),
                                   stdin=find.stdout,
                                   stdout=subprocess.PIPE)
-    return subprocess.check_output(('tr', "' '", "':'"),
+    jars = subprocess.check_output(('tr', "' '", "':'"),
                                    stdin=xargs_echo.stdout)
+    hadoop_conf = os.environ["HADOOP_CONF_DIR"] \
+        if "HADOOP_CONF_DIR" in os.environ \
+        else os.environ["HADOOP_HOME"] + "/etc/hadoop"
+    return (hadoop_conf + ":").encode("utf-8") + jars
 
 
 def _hadoop_classpath_glob(hadoop_bin):

Reply via email to