[ https://issues.apache.org/jira/browse/BEAM-4176?focusedWorklogId=152187&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-152187 ]
ASF GitHub Bot logged work on BEAM-4176: ---------------------------------------- Author: ASF GitHub Bot Created on: 08/Oct/18 10:25 Start Date: 08/Oct/18 10:25 Worklog Time Spent: 10m Work Description: mxm closed pull request #6563: [BEAM-4176] Cleanup SDK Harness docker container URL: https://github.com/apache/beam/pull/6563 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PipelineOptionsTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PipelineOptionsTranslation.java index 6a8988e76fa..4c3d5bf690b 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PipelineOptionsTranslation.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PipelineOptionsTranslation.java @@ -28,6 +28,7 @@ import java.util.Map; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.util.common.ReflectHelpers; +import org.apache.beam.vendor.protobuf.v3.com.google.protobuf.InvalidProtocolBufferException; import org.apache.beam.vendor.protobuf.v3.com.google.protobuf.Struct; import org.apache.beam.vendor.protobuf.v3.com.google.protobuf.util.JsonFormat; @@ -66,38 +67,50 @@ public static Struct toProto(PipelineOptions options) { JsonFormat.parser().merge(MAPPER.writeValueAsString(optionsUsingUrns), builder); return builder.build(); } catch (IOException e) { - throw new RuntimeException(e); + throw new RuntimeException("Failed to convert PipelineOptions to Protocol", e); } } /** Converts the provided {@link Struct} into {@link PipelineOptions}. */ - public static PipelineOptions fromProto(Struct protoOptions) throws IOException { - Map<String, TreeNode> mapWithoutUrns = new HashMap<>(); - TreeNode rootOptions = MAPPER.readTree(JsonFormat.printer().print(protoOptions)); - Iterator<String> optionsKeys = rootOptions.fieldNames(); - while (optionsKeys.hasNext()) { - String optionKey = optionsKeys.next(); - TreeNode optionValue = rootOptions.get(optionKey); - mapWithoutUrns.put( - CaseFormat.LOWER_UNDERSCORE.to( - CaseFormat.LOWER_CAMEL, - optionKey.substring("beam:option:".length(), optionKey.length() - ":v1".length())), - optionValue); + public static PipelineOptions fromProto(Struct protoOptions) { + try { + Map<String, TreeNode> mapWithoutUrns = new HashMap<>(); + TreeNode rootOptions = MAPPER.readTree(JsonFormat.printer().print(protoOptions)); + Iterator<String> optionsKeys = rootOptions.fieldNames(); + while (optionsKeys.hasNext()) { + String optionKey = optionsKeys.next(); + TreeNode optionValue = rootOptions.get(optionKey); + mapWithoutUrns.put( + CaseFormat.LOWER_UNDERSCORE.to( + CaseFormat.LOWER_CAMEL, + optionKey.substring("beam:option:".length(), optionKey.length() - ":v1".length())), + optionValue); + } + return MAPPER.readValue( + MAPPER.writeValueAsString(ImmutableMap.of("options", mapWithoutUrns)), + PipelineOptions.class); + } catch (IOException e) { + throw new RuntimeException("Failed to read PipelineOptions from Protocol", e); } - return MAPPER.readValue( - MAPPER.writeValueAsString(ImmutableMap.of("options", mapWithoutUrns)), - PipelineOptions.class); } /** Converts the provided Json{@link String} into {@link PipelineOptions}. */ - public static PipelineOptions fromJson(String optionsJson) throws IOException { - Struct.Builder builder = Struct.newBuilder(); - JsonFormat.parser().merge(optionsJson, builder); - return fromProto(builder.build()); + public static PipelineOptions fromJson(String optionsJson) { + try { + Struct.Builder builder = Struct.newBuilder(); + JsonFormat.parser().merge(optionsJson, builder); + return fromProto(builder.build()); + } catch (IOException e) { + throw new RuntimeException("Failed to read PipelineOptions from JSON", e); + } } /** Converts the provided {@link PipelineOptions} into Json{@link String}. */ - public static String toJson(PipelineOptions options) throws IOException { - return JsonFormat.printer().print(toProto(options)); + public static String toJson(PipelineOptions options) { + try { + return JsonFormat.printer().print(toProto(options)); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException("Failed to convert PipelineOptions to JSON", e); + } } } diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/portable/ReferenceRunner.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/portable/ReferenceRunner.java index f4bbf2c8d38..e24858e7076 100644 --- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/portable/ReferenceRunner.java +++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/portable/ReferenceRunner.java @@ -52,6 +52,7 @@ import org.apache.beam.runners.core.construction.ModelCoders; import org.apache.beam.runners.core.construction.ModelCoders.KvCoderComponents; import org.apache.beam.runners.core.construction.PTransformTranslation; +import org.apache.beam.runners.core.construction.PipelineOptionsTranslation; import org.apache.beam.runners.core.construction.graph.ExecutableStage; import org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser; import org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode; @@ -236,7 +237,7 @@ private EnvironmentFactory createEnvironmentFactory( ControlClientPool controlClient) { switch (environmentType) { case DOCKER: - return new DockerEnvironmentFactory.Provider() + return new DockerEnvironmentFactory.Provider(PipelineOptionsTranslation.fromProto(options)) .createEnvironmentFactory( control, logging, diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDefaultExecutableStageContext.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDefaultExecutableStageContext.java index 775d64c5a84..e50d6f90566 100644 --- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDefaultExecutableStageContext.java +++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDefaultExecutableStageContext.java @@ -21,6 +21,7 @@ import org.apache.beam.model.pipeline.v1.RunnerApi.StandardEnvironments; import org.apache.beam.runners.core.construction.BeamUrns; import org.apache.beam.runners.core.construction.Environments; +import org.apache.beam.runners.core.construction.PipelineOptionsTranslation; import org.apache.beam.runners.core.construction.graph.ExecutableStage; import org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory; import org.apache.beam.runners.fnexecution.control.JobBundleFactory; @@ -40,7 +41,8 @@ private static FlinkDefaultExecutableStageContext create(JobInfo jobInfo) { jobInfo, ImmutableMap.of( BeamUrns.getUrn(StandardEnvironments.Environments.DOCKER), - new DockerEnvironmentFactory.Provider(), + new DockerEnvironmentFactory.Provider( + PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions())), BeamUrns.getUrn(StandardEnvironments.Environments.PROCESS), new ProcessEnvironmentFactory.Provider(), Environments.ENVIRONMENT_EMBEDDED, // Non Public urn for testing. diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java index c403ff94b65..7d880c957cc 100644 --- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java +++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactory.java @@ -40,6 +40,8 @@ import org.apache.beam.runners.fnexecution.logging.GrpcLoggingService; import org.apache.beam.runners.fnexecution.provisioning.StaticGrpcProvisionService; import org.apache.beam.sdk.fn.IdGenerator; +import org.apache.beam.sdk.options.ManualDockerEnvironmentOptions; +import org.apache.beam.sdk.options.PipelineOptions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,7 +61,8 @@ static DockerEnvironmentFactory forServicesWithDocker( GrpcFnServer<ArtifactRetrievalService> retrievalServiceServer, GrpcFnServer<StaticGrpcProvisionService> provisioningServiceServer, ControlClientPool.Source clientSource, - IdGenerator idGenerator) { + IdGenerator idGenerator, + boolean retainDockerContainer) { return new DockerEnvironmentFactory( docker, controlServiceServer, @@ -67,7 +70,8 @@ static DockerEnvironmentFactory forServicesWithDocker( retrievalServiceServer, provisioningServiceServer, idGenerator, - clientSource); + clientSource, + retainDockerContainer); } private final DockerCommand docker; @@ -77,6 +81,7 @@ static DockerEnvironmentFactory forServicesWithDocker( private final GrpcFnServer<StaticGrpcProvisionService> provisioningServiceServer; private final IdGenerator idGenerator; private final ControlClientPool.Source clientSource; + private final boolean retainDockerContainer; private DockerEnvironmentFactory( DockerCommand docker, @@ -85,7 +90,8 @@ private DockerEnvironmentFactory( GrpcFnServer<ArtifactRetrievalService> retrievalServiceServer, GrpcFnServer<StaticGrpcProvisionService> provisioningServiceServer, IdGenerator idGenerator, - ControlClientPool.Source clientSource) { + ControlClientPool.Source clientSource, + boolean retainDockerContainer) { this.docker = docker; this.controlServiceServer = controlServiceServer; this.loggingServiceServer = loggingServiceServer; @@ -93,6 +99,7 @@ private DockerEnvironmentFactory( this.provisioningServiceServer = provisioningServiceServer; this.idGenerator = idGenerator; this.clientSource = clientSource; + this.retainDockerContainer = retainDockerContainer; } /** Creates a new, active {@link RemoteEnvironment} backed by a local Docker container. */ @@ -116,14 +123,17 @@ public RemoteEnvironment createEnvironment(Environment environment) throws Excep String provisionEndpoint = provisioningServiceServer.getApiServiceDescriptor().getUrl(); String controlEndpoint = controlServiceServer.getApiServiceDescriptor().getUrl(); - List<String> volArg = + ImmutableList.Builder<String> dockerArgsBuilder = ImmutableList.<String>builder() .addAll(gcsCredentialArgs()) // NOTE: Host networking does not work on Mac, but the command line flag is accepted. .add("--network=host") // We need to pass on the information about Docker-on-Mac environment (due to missing host networking on Mac) - .add("--env=DOCKER_MAC_CONTAINER=" + System.getenv("DOCKER_MAC_CONTAINER")) - .build(); + .add("--env=DOCKER_MAC_CONTAINER=" + System.getenv("DOCKER_MAC_CONTAINER")); + + if (!retainDockerContainer) { + dockerArgsBuilder.add("--rm"); + } List<String> args = ImmutableList.of( @@ -138,7 +148,7 @@ public RemoteEnvironment createEnvironment(Environment environment) throws Excep String containerId = null; InstructionRequestHandler instructionHandler = null; try { - containerId = docker.runImage(containerImage, volArg, args); + containerId = docker.runImage(containerImage, dockerArgsBuilder.build(), args); LOG.debug("Created Docker Container with Container ID {}", containerId); // Wait on a client from the gRPC server. while (instructionHandler == null) { @@ -225,6 +235,12 @@ private static ServerFactory getServerFactory() { /** Provider for DockerEnvironmentFactory. */ public static class Provider implements EnvironmentFactory.Provider { + private final boolean retainDockerContainer; + + public Provider(PipelineOptions options) { + this.retainDockerContainer = + options.as(ManualDockerEnvironmentOptions.class).getRetainDockerContainers(); + } @Override public EnvironmentFactory createEnvironmentFactory( @@ -241,7 +257,8 @@ public EnvironmentFactory createEnvironmentFactory( retrievalServiceServer, provisioningServiceServer, clientPool.getSource(), - idGenerator); + idGenerator, + retainDockerContainer); } @Override diff --git a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java index ba959d64374..b9a173c2b54 100644 --- a/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java +++ b/runners/java-fn-execution/src/test/java/org/apache/beam/runners/fnexecution/environment/DockerEnvironmentFactoryTest.java @@ -81,7 +81,8 @@ public void initMocks() { retrievalServiceServer, provisioningServiceServer, (workerId, timeout) -> client, - ID_GENERATOR); + ID_GENERATOR, + false); } @Test diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/ManualDockerEnvironmentOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/ManualDockerEnvironmentOptions.java new file mode 100644 index 00000000000..e68de87d152 --- /dev/null +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/ManualDockerEnvironmentOptions.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.options; + +import com.google.auto.service.AutoService; +import com.google.common.collect.ImmutableList; +import org.apache.beam.sdk.annotations.Experimental; + +/** Pipeline options to tune DockerEnvironment. */ +@Experimental +@Hidden +public interface ManualDockerEnvironmentOptions extends PipelineOptions { + + @Description("Retain dynamically created Docker container Environments.") + @Default.Boolean(false) + boolean getRetainDockerContainers(); + + void setRetainDockerContainers(boolean retainDockerContainers); + + /** Register the {@link ManualDockerEnvironmentOptions}. */ + @AutoService(PipelineOptionsRegistrar.class) + class Options implements PipelineOptionsRegistrar { + @Override + public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() { + return ImmutableList.of(ManualDockerEnvironmentOptions.class); + } + } +} ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 152187) Time Spent: 30h 40m (was: 30.5h) > Java: Portable batch runner passes all ValidatesRunner tests that > non-portable runner passes > -------------------------------------------------------------------------------------------- > > Key: BEAM-4176 > URL: https://issues.apache.org/jira/browse/BEAM-4176 > Project: Beam > Issue Type: Bug > Components: runner-flink > Reporter: Ben Sidhom > Assignee: Ankur Goenka > Priority: Major > Attachments: 81VxNWtFtke.png, Screen Shot 2018-08-14 at 4.18.31 > PM.png, Screen Shot 2018-09-03 at 11.07.38 AM.png > > Time Spent: 30h 40m > Remaining Estimate: 0h > > We need this as a sanity check that runner execution is correct. -- This message was sent by Atlassian JIRA (v7.6.3#76005)