This is an automated email from the ASF dual-hosted git repository.

chamikara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 24c0034  [BEAM-3060] HDFS cluster configuration, kubernetes scripts, 
filebased io support … (#4261)
24c0034 is described below

commit 24c003496dc1930129169b29e5c677c6fb839dd5
Author: Kamil Szewczyk <[email protected]>
AuthorDate: Fri Jan 12 02:27:26 2018 +0100

    [BEAM-3060] HDFS cluster configuration, kubernetes scripts, filebased io 
support … (#4261)
    
    HDFS cluster configuration, kubernetes scripts, filebased io support for 
hdfs tests.
---
 .../hdfs-single-datanode-cluster-for-local-dev.yml | 46 ++++++++++++
 .../hdfs-single-datanode-cluster.yml               | 83 ++++++++++++++++++++++
 .../kubernetes/hadoop/SmallITCluster/setup-all.sh  | 42 +++++++++++
 .../kubernetes/hadoop/SmallITCluster/setup.sh      | 21 ++++++
 .../hadoop/SmallITCluster/teardown-all.sh          | 33 +++++++++
 .../kubernetes/hadoop/SmallITCluster/teardown.sh   | 22 ++++++
 .../kubernetes/hadoop/config-files/core-site.xml   | 22 ++++++
 .../kubernetes/hadoop/config-files/hdfs-envs       | 23 ++++++
 .../kubernetes/hadoop/config-files/hdfs-site.xml   | 27 +++++++
 sdks/java/io/file-based-io-tests/pom.xml           | 34 +++++++++
 10 files changed, 353 insertions(+)

diff --git 
a/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml
 
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml
new file mode 100644
index 0000000..b761137
--- /dev/null
+++ 
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml
@@ -0,0 +1,46 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+# This script creates hadoop-external service that allows to connect to hdfs 
cluster from
+# outside world. Running:
+#
+#   kubectl get svc hadoop-external
+#
+# allows to read LoadBalancer EXTERNAL-IP which should be used to interact 
with the hdfs cluster.
+#
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: hadoop-external
+  labels:
+    name: hadoop-external
+spec:
+  ports:
+    - name: sshd
+      port: 2122
+    - name: hdfs
+      port: 9000
+    - name: web
+      port: 50070
+    - name: datanode
+      port: 50010
+    - name: datanode-icp
+      port: 50020
+    - name: datanode-http
+      port: 50075
+  selector:
+    name: hadoop
+  type: LoadBalancer
diff --git 
a/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml 
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml
new file mode 100644
index 0000000..483c296
--- /dev/null
+++ 
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml
@@ -0,0 +1,83 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+# This script contains definition of hdfs single node cluster. In this 
configuration hdfs datanode
+# and namenode are running on the same pod. Service hadoop allows to connect 
to pods labeled as
+# hadoop, this service also provides connectivity from outside of the cluster.
+# Replication controller creates pods using docker image 
sequenceiq/hadoop-docker:2.7.1.
+# Each pod created will expose hdfs standard ports.
+#
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: hadoop
+  labels:
+    name: hadoop
+spec:
+  ports:
+    - name: sshd
+      port: 2122
+    - name: hdfs
+      port: 9000
+    - name: web
+      port: 50070
+    - name: datanode
+      port: 50010
+    - name: datanode-icp
+      port: 50020
+    - name: datanode-http
+      port: 50075
+  selector:
+    name: hadoop
+  type: NodePort
+
+---
+
+apiVersion: v1
+kind: ReplicationController
+metadata:
+  name: hadoop
+  labels:
+    name: hadoop
+spec:
+  replicas: 1
+  selector:
+    name: hadoop
+  template:
+    metadata:
+      labels:
+        name: hadoop
+    spec:
+      containers:
+        - name: hadoop
+          image: sequenceiq/hadoop-docker:2.7.1
+          ports:
+            - name: sshd
+              containerPort: 2122
+            - name: namenode-hdfs
+              containerPort: 9000
+            - name: datanode
+              containerPort: 50010
+            - name: datanode-icp
+              containerPort: 50020
+            - name: namenode-http
+              containerPort: 50070
+            - name: datanode-http
+              containerPort: 50075
+          lifecycle:
+            postStart:
+              exec:
+                command: ["/bin/sh", "-c", "hostname > 
/usr/local/hadoop/etc/hadoop/slaves"]
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/setup-all.sh 
b/.test-infra/kubernetes/hadoop/SmallITCluster/setup-all.sh
new file mode 100755
index 0000000..020d3ad
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/setup-all.sh
@@ -0,0 +1,42 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+# This script starts hdfs cluster and hadoop-external service that allows to 
reach cluster
+# from developer's machine. Once the cluster is working, scripts waits till
+# external cluster endpoint will be available. It prints out configuration 
line that
+# should be added to /etc/hosts file in order to work with hdfs cluster.
+#
+
+#!/bin/sh
+set -e
+
+kubectl create -f hdfs-single-datanode-cluster.yml
+
+kubectl create -f hdfs-single-datanode-cluster-for-local-dev.yml
+
+external_ip="$(kubectl get svc hadoop-external -o 
jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+
+echo "Waiting for the Hadoop service to come up ........"
+while [ -z "$external_ip" ]
+do
+   sleep 10s
+   external_ip="$(kubectl get svc hadoop-external -o 
jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+   echo "."
+done
+
+hadoop_master_pod_name="$(kubectl get pods --selector=name=hadoop -o 
jsonpath='{.items[*].metadata.name}')"
+
+echo "For local tests please add the following entry to /etc/hosts file"
+echo $external_ip$'\t'$hadoop_master_pod_name
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/setup.sh 
b/.test-infra/kubernetes/hadoop/SmallITCluster/setup.sh
new file mode 100755
index 0000000..725f938
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/setup.sh
@@ -0,0 +1,21 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+# Simply starts hdfs cluster.
+#
+#!/bin/sh
+set -e
+
+kubectl create -f hdfs-single-datanode-cluster.yml
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/teardown-all.sh 
b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown-all.sh
new file mode 100755
index 0000000..0e111c8
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown-all.sh
@@ -0,0 +1,33 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+# This script terminates hdfs cluster and hadoop-external service. It checks 
/etc/hosts file
+# for any unneeded entries and notifies user about them.
+#
+
+#!/bin/sh
+set -e
+
+external_ip="$(kubectl get svc hadoop-external -o 
jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+
+hadoop_master_pod_name="$(kubectl get pods --selector=name=hadoop -o 
jsonpath='{.items[*].metadata.name}')"
+
+kubectl delete -f hdfs-single-datanode-cluster.yml
+
+kubectl delete -f hdfs-single-datanode-cluster-for-local-dev.yml
+
+if grep "$external_ip\|$hadoop_master_pod_name" /etc/hosts ; then
+    echo "Remove entry from /etc/hosts."
+fi
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/teardown.sh 
b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown.sh
new file mode 100755
index 0000000..30475ac
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown.sh
@@ -0,0 +1,22 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+# Hdfs cluster termination script.
+#
+
+#!/bin/sh
+set -e
+
+kubectl delete -f hdfs-single-datanode-cluster.yml
diff --git a/.test-infra/kubernetes/hadoop/config-files/core-site.xml 
b/.test-infra/kubernetes/hadoop/config-files/core-site.xml
new file mode 100644
index 0000000..c46211b
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/config-files/core-site.xml
@@ -0,0 +1,22 @@
+<!--
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+# -->
+<configuration>
+    <property>
+        <name>fs.defaultFS</name>
+        <value>hdfs://hadoop-master:9000</value>
+    </property>
+</configuration>
diff --git a/.test-infra/kubernetes/hadoop/config-files/hdfs-envs 
b/.test-infra/kubernetes/hadoop/config-files/hdfs-envs
new file mode 100644
index 0000000..4561c66
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/config-files/hdfs-envs
@@ -0,0 +1,23 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+# The cluster default user is root
+
+export HADOOP_USER_NAME=root
+
+# We are using hadoop configuration provided here for small cluster
+
+export HADOOP_CONF_DIR=`pwd`/.test-infra/kubernetes/hadoop/config-files
diff --git a/.test-infra/kubernetes/hadoop/config-files/hdfs-site.xml 
b/.test-infra/kubernetes/hadoop/config-files/hdfs-site.xml
new file mode 100644
index 0000000..234b30d
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/config-files/hdfs-site.xml
@@ -0,0 +1,27 @@
+<!--
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+# -->
+
+<configuration>
+    <property>
+        <name>dfs.replication</name>
+        <value>1</value>
+    </property>
+    <property>
+        <name>dfs.client.use.datanode.hostname</name>
+        <value>true</value>
+    </property>
+</configuration>
diff --git a/sdks/java/io/file-based-io-tests/pom.xml 
b/sdks/java/io/file-based-io-tests/pom.xml
index 4de2e70..bd04104 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -163,8 +163,42 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <!-- Include the hadoop connectivity dependencies activated by 
-Dfilesystem=hdfs
+            Support for protocol scheme hdfs:// - allow to read/write to HDFS 
-->
+            <id>hadoop-distributed-file-system</id>
+            <activation>
+                <property>
+                    <name>filesystem</name>
+                    <value>hdfs</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.beam</groupId>
+                    
<artifactId>beam-sdks-java-io-hadoop-file-system</artifactId>
+                    <scope>runtime</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-hdfs</artifactId>
+                    <version>${apache.hadoop.version}</version>
+                    <scope>runtime</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-client</artifactId>
+                    <version>${apache.hadoop.version}</version>
+                    <scope>runtime</scope>
+                </dependency>
+            </dependencies>
+        </profile>
     </profiles>
 
+    <properties>
+        <apache.hadoop.version>2.7.1</apache.hadoop.version>
+    </properties>
+
     <dependencies>
         <dependency>
             <groupId>org.apache.beam</groupId>

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to