Repository: beam Updated Branches: refs/heads/master b0030aeb3 -> bbc231c8e
[BEAM-2600] Add minimal python SDK harness container Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/79c55db9 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/79c55db9 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/79c55db9 Branch: refs/heads/master Commit: 79c55db9dc1f3f961f5826f8e585758b26dc162b Parents: b0030ae Author: Henning Rohde <[email protected]> Authored: Mon Oct 9 15:28:25 2017 -0700 Committer: Ahmet Altay <[email protected]> Committed: Tue Oct 10 14:28:27 2017 -0700 ---------------------------------------------------------------------- ...ob_beam_PreCommit_Python_MavenInstall.groovy | 2 +- sdks/go/cmd/beamctl/cmd/root.go | 1 + sdks/python/container/Dockerfile | 27 ++++ sdks/python/container/boot.go | 122 +++++++++++++++ sdks/python/container/pom.xml | 154 +++++++++++++++++++ sdks/python/pom.xml | 7 + 6 files changed, 312 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy ---------------------------------------------------------------------- diff --git a/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy b/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy index eae129b..feadb89 100644 --- a/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy +++ b/.test-infra/jenkins/job_beam_PreCommit_Python_MavenInstall.groovy @@ -45,7 +45,7 @@ mavenJob('beam_PreCommit_Python_MavenInstall') { --batch-mode \ --errors \ --activate-profiles release,jenkins-precommit,direct-runner,dataflow-runner,spark-runner,flink-runner,apex-runner \ - --projects sdks/python \ + --projects sdks/python,!sdks/python/container \ --also-make \ --also-make-dependents \ -D pullRequest=$ghprbPullId \ http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/go/cmd/beamctl/cmd/root.go ---------------------------------------------------------------------- diff --git a/sdks/go/cmd/beamctl/cmd/root.go b/sdks/go/cmd/beamctl/cmd/root.go index 53ee83c..a4e7945 100644 --- a/sdks/go/cmd/beamctl/cmd/root.go +++ b/sdks/go/cmd/beamctl/cmd/root.go @@ -27,6 +27,7 @@ import ( ) var ( + // RootCmd is the root for beamctl commands. RootCmd = &cobra.Command{ Use: "beamctl", Short: "Apache Beam command line client", http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/Dockerfile ---------------------------------------------------------------------- diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile new file mode 100644 index 0000000..826e36c --- /dev/null +++ b/sdks/python/container/Dockerfile @@ -0,0 +1,27 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +FROM python:2 +MAINTAINER "Apache Beam <[email protected]>" + +# TODO(herohde): preinstall various packages for better startup +# performance and reliability. + +ADD target/linux_amd64/boot /opt/apache/beam/ + +ENTRYPOINT ["/opt/apache/beam/boot"] http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/boot.go ---------------------------------------------------------------------- diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go new file mode 100644 index 0000000..18b9900 --- /dev/null +++ b/sdks/python/container/boot.go @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// boot is the boot code for the Python SDK harness container. It is responsible +// for retrieving and install staged files and invoking python correctly. +package main + +import ( +"context" +"flag" +"fmt" +"log" +"os" +"path/filepath" +"strings" + +"github.com/apache/beam/sdks/go/pkg/beam/artifact" +"github.com/apache/beam/sdks/go/pkg/beam/provision" +"github.com/apache/beam/sdks/go/pkg/beam/util/execx" +"github.com/apache/beam/sdks/go/pkg/beam/util/grpcx" +) + +var ( + // Contract: https://s.apache.org/beam-fn-api-container-contract. + + id = flag.String("id", "", "Local identifier (required).") + loggingEndpoint = flag.String("logging_endpoint", "", "Logging endpoint (required).") + artifactEndpoint = flag.String("artifact_endpoint", "", "Artifact endpoint (required).") + provisionEndpoint = flag.String("provision_endpoint", "", "Provision endpoint (required).") + controlEndpoint = flag.String("control_endpoint", "", "Control endpoint (required).") + semiPersistDir = flag.String("semi_persist_dir", "/tmp", "Local semi-persistent directory (optional).") +) + +func main() { + flag.Parse() + if *id == "" { + log.Fatal("No id provided.") + } + if *loggingEndpoint == "" { + log.Fatal("No logging endpoint provided.") + } + if *artifactEndpoint == "" { + log.Fatal("No artifact endpoint provided.") + } + if *provisionEndpoint == "" { + log.Fatal("No provision endpoint provided.") + } + if *controlEndpoint == "" { + log.Fatal("No control endpoint provided.") + } + + log.Printf("Initializing python harness: %v", strings.Join(os.Args, " ")) + + ctx := grpcx.WriteWorkerID(context.Background(), *id) + + // (1) Obtain the pipeline options + + info, err := provision.Info(ctx, *provisionEndpoint) + if err != nil { + log.Fatalf("Failed to obtain provisioning information: %v", err) + } + options, err := provision.ProtoToJSON(info.GetPipelineOptions()) + if err != nil { + log.Fatalf("Failed to convert pipeline options: %v", err) + } + + // (2) Retrieve and install the staged packages. + + dir := filepath.Join(*semiPersistDir, "staged") + + _, err = artifact.Materialize(ctx, *artifactEndpoint, dir) + if err != nil { + log.Fatalf("Failed to retrieve staged files: %v", err) + } + + // TODO(herohde): the packages to install should be specified explicitly. It + // would also be possible to install the SDK in the Dockerfile. + if err := pipInstall(joinPaths(dir, "dataflow_python_sdk.tar[gcp]")); err != nil { + log.Fatalf("Failed to install SDK: %v", err) + } + + // (3) Invoke python + + os.Setenv("PIPELINE_OPTIONS", options) + os.Setenv("LOGGING_API_SERVICE_DESCRIPTOR", fmt.Sprintf("url: \"%v\"\n", *loggingEndpoint)) + os.Setenv("CONTROL_API_SERVICE_DESCRIPTOR", fmt.Sprintf("url: \"%v\"\n", *controlEndpoint)) + + args := []string{ + "-m", + "apache_beam.runners.worker.sdk_worker_main", + } + log.Printf("Executing: python %v", strings.Join(args, " ")) + + log.Fatalf("Python exited: %v", execx.Execute("python", args...)) +} + +// pipInstall runs pip install with the given args. +func pipInstall(args []string) error { + return execx.Execute("pip", append([]string{"install"}, args...)...) +} + +// joinPaths joins the dir to every artifact path. Each / in the path is +// interpreted as a directory separator. +func joinPaths(dir string, paths ...string) []string { + var ret []string + for _, p := range paths { + ret = append(ret, filepath.Join(dir, filepath.FromSlash(p))) + } + return ret +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/container/pom.xml ---------------------------------------------------------------------- diff --git a/sdks/python/container/pom.xml b/sdks/python/container/pom.xml new file mode 100644 index 0000000..45b8cbf --- /dev/null +++ b/sdks/python/container/pom.xml @@ -0,0 +1,154 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.beam</groupId> + <artifactId>beam-sdks-python</artifactId> + <version>2.3.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <artifactId>beam-sdks-python-container</artifactId> + + <packaging>pom</packaging> + + <name>Apache Beam :: SDKs :: Python :: Container</name> + + <properties> + <!-- Add full path directory structure for 'go get' compatibility --> + <go.source.base>${project.basedir}/target/src</go.source.base> + <go.source.dir>${go.source.base}/github.com/apache/beam/sdks/go</go.source.dir> + </properties> + + <build> + <sourceDirectory>${go.source.base}</sourceDirectory> + <plugins> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <executions> + <execution> + <id>copy-go-cmd-source</id> + <phase>generate-sources</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${go.source.base}/github.com/apache/beam/cmd/boot</outputDirectory> + <resources> + <resource> + <directory>.</directory> + <includes> + <include>*.go</include> + </includes> + <filtering>false</filtering> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> + + <!-- CAVEAT: for latest shared files, run mvn install in sdks/go --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>copy-dependency</id> + <phase>generate-sources</phase> + <goals> + <goal>unpack</goal> + </goals> + <configuration> + <artifactItems> + <artifactItem> + <groupId>org.apache.beam</groupId> + <artifactId>beam-sdks-go</artifactId> + <version>${project.version}</version> + <type>zip</type> + <classifier>pkg-sources</classifier> + <overWrite>true</overWrite> + <outputDirectory>${go.source.dir}</outputDirectory> + </artifactItem> + </artifactItems> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>com.igormaznitsa</groupId> + <artifactId>mvn-golang-wrapper</artifactId> + <executions> + <execution> + <id>go-get-imports</id> + <goals> + <goal>get</goal> + </goals> + <phase>compile</phase> + <configuration> + <packages> + <package>google.golang.org/grpc</package> + <package>golang.org/x/oauth2/google</package> + <package>google.golang.org/api/storage/v1</package> + </packages> + </configuration> + </execution> + <execution> + <id>go-build</id> + <goals> + <goal>build</goal> + </goals> + <phase>compile</phase> + <configuration> + <packages> + <package>github.com/apache/beam/cmd/boot</package> + </packages> + <resultName>boot</resultName> + </configuration> + </execution> + <execution> + <id>go-build-linux-amd64</id> + <goals> + <goal>build</goal> + </goals> + <phase>compile</phase> + <configuration> + <packages> + <package>github.com/apache/beam/cmd/boot</package> + </packages> + <resultName>linux_amd64/boot</resultName> + <targetArch>amd64</targetArch> + <targetOs>linux</targetOs> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>com.spotify</groupId> + <artifactId>dockerfile-maven-plugin</artifactId> + <configuration> + <repository>${docker-repository-root}/python</repository> + </configuration> + </plugin> + </plugins> + </build> +</project> http://git-wip-us.apache.org/repos/asf/beam/blob/79c55db9/sdks/python/pom.xml ---------------------------------------------------------------------- diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml index 624048f..62135e8 100644 --- a/sdks/python/pom.xml +++ b/sdks/python/pom.xml @@ -32,6 +32,10 @@ <name>Apache Beam :: SDKs :: Python</name> + <modules> + <module>container</module> + </modules> + <properties> <!-- python.interpreter.bin & python.pip.bin is set dynamically by findSupportedPython.groovy --> @@ -59,6 +63,7 @@ <groupId>org.codehaus.gmaven</groupId> <artifactId>groovy-maven-plugin</artifactId> <version>${groovy-maven-plugin.version}</version> + <inherited>false</inherited> <executions> <execution> <id>find-supported-python-for-clean</id> @@ -85,6 +90,7 @@ <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>exec-maven-plugin</artifactId> + <inherited>false</inherited> <executions> <execution> <id>setuptools-clean</id> @@ -189,6 +195,7 @@ <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>exec-maven-plugin</artifactId> + <inherited>false</inherited> <executions> <execution> <id>setuptools-test</id>
