[ https://issues.apache.org/jira/browse/FLINK-8910?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16473806#comment-16473806 ]
ASF GitHub Bot commented on FLINK-8910: --------------------------------------- Github user tzulitai commented on a diff in the pull request: https://github.com/apache/flink/pull/5676#discussion_r187845478 --- Diff: flink-end-to-end-tests/flink-local-recovery-and-allocation-test/src/main/java/org/apache/flink/streaming/tests/StickyAllocationAndLocalRecoveryTestJob.java --- @@ -0,0 +1,480 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.common.functions.RichFlatMapFunction; +import org.apache.flink.api.common.functions.RuntimeContext; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.common.state.ListState; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.state.ValueState; +import org.apache.flink.api.common.state.ValueStateDescriptor; +import org.apache.flink.api.java.functions.KeySelector; +import org.apache.flink.api.java.utils.ParameterTool; +import org.apache.flink.contrib.streaming.state.RocksDBStateBackend; +import org.apache.flink.runtime.state.CheckpointListener; +import org.apache.flink.runtime.state.FunctionInitializationContext; +import org.apache.flink.runtime.state.FunctionSnapshotContext; +import org.apache.flink.runtime.state.filesystem.FsStateBackend; +import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; +import org.apache.flink.streaming.api.environment.CheckpointConfig; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.sink.PrintSinkFunction; +import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.util.Collector; +import org.apache.flink.util.Preconditions; + +import org.apache.commons.lang3.RandomStringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * Automatic end-to-end test for local recovery (including sticky allocation). + * + * <p>List of possible input parameters for this job: + * <ul> + * <li>checkpointDir: the checkpoint directory, required.</li> + * <li>parallelism: the parallelism of the job, default 1.</li> + * <li>maxParallelism: the maximum parallelism of the job, default 1.</li> + * <li>checkpointInterval: the checkpointing interval in milliseconds, default 1000.</li> + * <li>restartDelay: the delay of the fixed delay restart strategy, default 0.</li> + * <li>externalizedCheckpoints: flag to activate externalized checkpoints, default <code>false</code>.</li> + * <li>stateBackend: choice for state backend between <code>file</code> and <code>rocks</code>, default <code>file</code>.</li> + * <li>killJvmOnFail: flag that determines whether or not an artificial failure induced by the test kills the JVM or not.</li> + * <li>asyncCheckpoints: flag for async checkpoints with file state backend, default <code>true</code>.</li> + * <li>incrementalCheckpoints: flag for incremental checkpoint with rocks state backend, default <code>false</code>.</li> + * <li>delay: sleep delay to throttle down the production of the source, default 0.</li> + * <li>maxAttempts: the maximum number of run attempts, before the job finishes with success, default 3.</li> + * <li>valueSize: size of the artificial value for each key in bytes, default 10.</li> + * </ul> + */ +public class StickyAllocationAndLocalRecoveryTestJob { + + private static final Logger LOG = LoggerFactory.getLogger(StickyAllocationAndLocalRecoveryTestJob.class); + + public static void main(String[] args) throws Exception { + + final ParameterTool pt = ParameterTool.fromArgs(args); + + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + env.setParallelism(pt.getInt("parallelism", 1)); + env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1))); + env.enableCheckpointing(pt.getInt("checkpointInterval", 1000)); + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0))); + if (pt.getBoolean("externalizedCheckpoints", false)) { + env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); + } + + String stateBackend = pt.get("stateBackend", "file"); + String checkpointDir = pt.getRequired("checkpointDir"); + + boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false); + + if ("file".equals(stateBackend)) { + boolean asyncCheckpoints = pt.getBoolean("asyncCheckpoints", true); + env.setStateBackend(new FsStateBackend(checkpointDir, asyncCheckpoints)); + } else if ("rocks".equals(stateBackend)) { + boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false); + env.setStateBackend(new RocksDBStateBackend(checkpointDir, incrementalCheckpoints)); + } else { + throw new IllegalArgumentException("Unknown backend: " + stateBackend); + } + + // make parameters available in the web interface + env.getConfig().setGlobalJobParameters(pt); + + // delay to throttle down the production of the source + long delay = pt.getLong("delay", 0L); + + // the maximum number of attempts, before the job finishes with success + int maxAttempts = pt.getInt("maxAttempts", 3); + + // size of one artificial value + int valueSize = pt.getInt("valueSize", 10); + + env.addSource(new RandomLongSource(maxAttempts, delay)) + .keyBy((KeySelector<Long, Long>) aLong -> aLong) + .flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail)) + .map((MapFunction<String, String>) value -> value) --- End diff -- Why is this map necessary for the test? > Introduce automated end-to-end test for local recovery (including sticky > scheduling) > ------------------------------------------------------------------------------------ > > Key: FLINK-8910 > URL: https://issues.apache.org/jira/browse/FLINK-8910 > Project: Flink > Issue Type: Sub-task > Components: State Backends, Checkpointing > Affects Versions: 1.5.0 > Reporter: Stefan Richter > Assignee: Stefan Richter > Priority: Major > Fix For: 1.5.0 > > > We should have an automated end-to-end test that can run nightly to check > that sticky allocation and local recovery work as expected. -- This message was sent by Atlassian JIRA (v7.6.3#76005)