This is an automated email from the ASF dual-hosted git repository.
chamikara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 24c0034 [BEAM-3060] HDFS cluster configuration, kubernetes scripts,
filebased io support … (#4261)
24c0034 is described below
commit 24c003496dc1930129169b29e5c677c6fb839dd5
Author: Kamil Szewczyk <[email protected]>
AuthorDate: Fri Jan 12 02:27:26 2018 +0100
[BEAM-3060] HDFS cluster configuration, kubernetes scripts, filebased io
support … (#4261)
HDFS cluster configuration, kubernetes scripts, filebased io support for
hdfs tests.
---
.../hdfs-single-datanode-cluster-for-local-dev.yml | 46 ++++++++++++
.../hdfs-single-datanode-cluster.yml | 83 ++++++++++++++++++++++
.../kubernetes/hadoop/SmallITCluster/setup-all.sh | 42 +++++++++++
.../kubernetes/hadoop/SmallITCluster/setup.sh | 21 ++++++
.../hadoop/SmallITCluster/teardown-all.sh | 33 +++++++++
.../kubernetes/hadoop/SmallITCluster/teardown.sh | 22 ++++++
.../kubernetes/hadoop/config-files/core-site.xml | 22 ++++++
.../kubernetes/hadoop/config-files/hdfs-envs | 23 ++++++
.../kubernetes/hadoop/config-files/hdfs-site.xml | 27 +++++++
sdks/java/io/file-based-io-tests/pom.xml | 34 +++++++++
10 files changed, 353 insertions(+)
diff --git
a/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml
new file mode 100644
index 0000000..b761137
--- /dev/null
+++
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This script creates hadoop-external service that allows to connect to hdfs
cluster from
+# outside world. Running:
+#
+# kubectl get svc hadoop-external
+#
+# allows to read LoadBalancer EXTERNAL-IP which should be used to interact
with the hdfs cluster.
+#
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: hadoop-external
+ labels:
+ name: hadoop-external
+spec:
+ ports:
+ - name: sshd
+ port: 2122
+ - name: hdfs
+ port: 9000
+ - name: web
+ port: 50070
+ - name: datanode
+ port: 50010
+ - name: datanode-icp
+ port: 50020
+ - name: datanode-http
+ port: 50075
+ selector:
+ name: hadoop
+ type: LoadBalancer
diff --git
a/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml
new file mode 100644
index 0000000..483c296
--- /dev/null
+++
b/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This script contains definition of hdfs single node cluster. In this
configuration hdfs datanode
+# and namenode are running on the same pod. Service hadoop allows to connect
to pods labeled as
+# hadoop, this service also provides connectivity from outside of the cluster.
+# Replication controller creates pods using docker image
sequenceiq/hadoop-docker:2.7.1.
+# Each pod created will expose hdfs standard ports.
+#
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: hadoop
+ labels:
+ name: hadoop
+spec:
+ ports:
+ - name: sshd
+ port: 2122
+ - name: hdfs
+ port: 9000
+ - name: web
+ port: 50070
+ - name: datanode
+ port: 50010
+ - name: datanode-icp
+ port: 50020
+ - name: datanode-http
+ port: 50075
+ selector:
+ name: hadoop
+ type: NodePort
+
+---
+
+apiVersion: v1
+kind: ReplicationController
+metadata:
+ name: hadoop
+ labels:
+ name: hadoop
+spec:
+ replicas: 1
+ selector:
+ name: hadoop
+ template:
+ metadata:
+ labels:
+ name: hadoop
+ spec:
+ containers:
+ - name: hadoop
+ image: sequenceiq/hadoop-docker:2.7.1
+ ports:
+ - name: sshd
+ containerPort: 2122
+ - name: namenode-hdfs
+ containerPort: 9000
+ - name: datanode
+ containerPort: 50010
+ - name: datanode-icp
+ containerPort: 50020
+ - name: namenode-http
+ containerPort: 50070
+ - name: datanode-http
+ containerPort: 50075
+ lifecycle:
+ postStart:
+ exec:
+ command: ["/bin/sh", "-c", "hostname >
/usr/local/hadoop/etc/hadoop/slaves"]
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/setup-all.sh
b/.test-infra/kubernetes/hadoop/SmallITCluster/setup-all.sh
new file mode 100755
index 0000000..020d3ad
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/setup-all.sh
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This script starts hdfs cluster and hadoop-external service that allows to
reach cluster
+# from developer's machine. Once the cluster is working, scripts waits till
+# external cluster endpoint will be available. It prints out configuration
line that
+# should be added to /etc/hosts file in order to work with hdfs cluster.
+#
+
+#!/bin/sh
+set -e
+
+kubectl create -f hdfs-single-datanode-cluster.yml
+
+kubectl create -f hdfs-single-datanode-cluster-for-local-dev.yml
+
+external_ip="$(kubectl get svc hadoop-external -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+
+echo "Waiting for the Hadoop service to come up ........"
+while [ -z "$external_ip" ]
+do
+ sleep 10s
+ external_ip="$(kubectl get svc hadoop-external -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+ echo "."
+done
+
+hadoop_master_pod_name="$(kubectl get pods --selector=name=hadoop -o
jsonpath='{.items[*].metadata.name}')"
+
+echo "For local tests please add the following entry to /etc/hosts file"
+echo $external_ip$'\t'$hadoop_master_pod_name
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/setup.sh
b/.test-infra/kubernetes/hadoop/SmallITCluster/setup.sh
new file mode 100755
index 0000000..725f938
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/setup.sh
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Simply starts hdfs cluster.
+#
+#!/bin/sh
+set -e
+
+kubectl create -f hdfs-single-datanode-cluster.yml
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/teardown-all.sh
b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown-all.sh
new file mode 100755
index 0000000..0e111c8
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown-all.sh
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This script terminates hdfs cluster and hadoop-external service. It checks
/etc/hosts file
+# for any unneeded entries and notifies user about them.
+#
+
+#!/bin/sh
+set -e
+
+external_ip="$(kubectl get svc hadoop-external -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+
+hadoop_master_pod_name="$(kubectl get pods --selector=name=hadoop -o
jsonpath='{.items[*].metadata.name}')"
+
+kubectl delete -f hdfs-single-datanode-cluster.yml
+
+kubectl delete -f hdfs-single-datanode-cluster-for-local-dev.yml
+
+if grep "$external_ip\|$hadoop_master_pod_name" /etc/hosts ; then
+ echo "Remove entry from /etc/hosts."
+fi
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/teardown.sh
b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown.sh
new file mode 100755
index 0000000..30475ac
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/teardown.sh
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Hdfs cluster termination script.
+#
+
+#!/bin/sh
+set -e
+
+kubectl delete -f hdfs-single-datanode-cluster.yml
diff --git a/.test-infra/kubernetes/hadoop/config-files/core-site.xml
b/.test-infra/kubernetes/hadoop/config-files/core-site.xml
new file mode 100644
index 0000000..c46211b
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/config-files/core-site.xml
@@ -0,0 +1,22 @@
+<!--
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -->
+<configuration>
+ <property>
+ <name>fs.defaultFS</name>
+ <value>hdfs://hadoop-master:9000</value>
+ </property>
+</configuration>
diff --git a/.test-infra/kubernetes/hadoop/config-files/hdfs-envs
b/.test-infra/kubernetes/hadoop/config-files/hdfs-envs
new file mode 100644
index 0000000..4561c66
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/config-files/hdfs-envs
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The cluster default user is root
+
+export HADOOP_USER_NAME=root
+
+# We are using hadoop configuration provided here for small cluster
+
+export HADOOP_CONF_DIR=`pwd`/.test-infra/kubernetes/hadoop/config-files
diff --git a/.test-infra/kubernetes/hadoop/config-files/hdfs-site.xml
b/.test-infra/kubernetes/hadoop/config-files/hdfs-site.xml
new file mode 100644
index 0000000..234b30d
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/config-files/hdfs-site.xml
@@ -0,0 +1,27 @@
+<!--
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -->
+
+<configuration>
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+ <property>
+ <name>dfs.client.use.datanode.hostname</name>
+ <value>true</value>
+ </property>
+</configuration>
diff --git a/sdks/java/io/file-based-io-tests/pom.xml
b/sdks/java/io/file-based-io-tests/pom.xml
index 4de2e70..bd04104 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -163,8 +163,42 @@
</dependency>
</dependencies>
</profile>
+ <profile>
+ <!-- Include the hadoop connectivity dependencies activated by
-Dfilesystem=hdfs
+ Support for protocol scheme hdfs:// - allow to read/write to HDFS
-->
+ <id>hadoop-distributed-file-system</id>
+ <activation>
+ <property>
+ <name>filesystem</name>
+ <value>hdfs</value>
+ </property>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.beam</groupId>
+
<artifactId>beam-sdks-java-io-hadoop-file-system</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${apache.hadoop.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${apache.hadoop.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ </dependencies>
+ </profile>
</profiles>
+ <properties>
+ <apache.hadoop.version>2.7.1</apache.hadoop.version>
+ </properties>
+
<dependencies>
<dependency>
<groupId>org.apache.beam</groupId>
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].