This is an automated email from the ASF dual-hosted git repository. trohrmann pushed a commit to branch release-1.6 in repository https://gitbox.apache.org/repos/asf/flink.git
commit 45cffa17252c5fb9bc38a6b771c2a75aaa8c10ee Author: gyao <g...@data-artisans.com> AuthorDate: Thu Aug 9 13:27:01 2018 +0200 [FLINK-9795][mesos, docs] Update Mesos documentation [FLINK-9795][mesos, docs] Remove unnecessary remark about task reconciliation. The config key high-availability.zookeeper.path.mesos-workers already has a default value. Even without explicitly setting the key, the task reconciliation will work. Moreover, if there would not be a default key, the code would throw an NPE. So either way, the remark is only confusing the reader. [FLINK-9795][mesos, docs] Remove configuration keys from Mesos Setup page. - Remove the Mesos specific configuration keys from the Mesos Setup page because they duplicate what is already on the configuration page. - Add missing descriptions for some of the keys that are under the Mesos section of the configuration page. - Improve formatting of the descriptions. [FLINK-9795][mesos, docs] Document which config options are only used in legacy mode. [FLINK-9795][mesos, docs] Document that mesos.initial-tasks is only needed in legacy mode. [FLINK-9795][mesos, docs] Clarify necessity of Marathon in documentation. [FLINK-9795][mesos, docs] Rewrite "Flink's JobManager and Web Interface" section. [FLINK-9795][mesos, docs] Add missing period at the end of sentence. This closes #6533. --- .../high_availability_zookeeper_configuration.html | 2 +- docs/_includes/generated/mesos_configuration.html | 8 +-- .../mesos_task_manager_configuration.html | 8 +-- docs/ops/deployment/mesos.md | 83 ++++------------------ .../configuration/HighAvailabilityOptions.java | 5 +- .../flink/mesos/configuration/MesosOptions.java | 31 +++++--- .../MesosTaskManagerParameters.java | 22 ++++-- 7 files changed, 68 insertions(+), 91 deletions(-) diff --git a/docs/_includes/generated/high_availability_zookeeper_configuration.html b/docs/_includes/generated/high_availability_zookeeper_configuration.html index a49d160..6577878 100644 --- a/docs/_includes/generated/high_availability_zookeeper_configuration.html +++ b/docs/_includes/generated/high_availability_zookeeper_configuration.html @@ -60,7 +60,7 @@ <tr> <td><h5>high-availability.zookeeper.path.mesos-workers</h5></td> <td style="word-wrap: break-word;">"/mesos-workers"</td> - <td>ZooKeeper root path (ZNode) for Mesos workers.</td> + <td>The ZooKeeper root path for persisting the Mesos worker information.</td> </tr> <tr> <td><h5>high-availability.zookeeper.path.root</h5></td> diff --git a/docs/_includes/generated/mesos_configuration.html b/docs/_includes/generated/mesos_configuration.html index c514c86..54e92e5 100644 --- a/docs/_includes/generated/mesos_configuration.html +++ b/docs/_includes/generated/mesos_configuration.html @@ -15,17 +15,17 @@ <tr> <td><h5>mesos.initial-tasks</h5></td> <td style="word-wrap: break-word;">0</td> - <td>The initial workers to bring up when the master starts</td> + <td>The initial workers to bring up when the master starts. This option is ignored unless Flink is in <a href="#legacy">legacy mode</a>.</td> </tr> <tr> <td><h5>mesos.master</h5></td> <td style="word-wrap: break-word;">(none)</td> - <td>The Mesos master URL. The value should be in one of the following forms: "host:port", "zk://host1:port1,host2:port2,.../path", "zk://username:password@host1:port1,host2:port2,.../path" or "file:///path/to/file"</td> + <td>The Mesos master URL. The value should be in one of the following forms: <ul><li>host:port</li><li>zk://host1:port1,host2:port2,.../path</li><li>zk://username:password@host1:port1,host2:port2,.../path</li><li>file:///path/to/file</li></ul></td> </tr> <tr> <td><h5>mesos.maximum-failed-tasks</h5></td> <td style="word-wrap: break-word;">-1</td> - <td>The maximum number of failed workers before the cluster fails. May be set to -1 to disable this feature</td> + <td>The maximum number of failed workers before the cluster fails. May be set to -1 to disable this feature. This option is ignored unless Flink is in <a href="#legacy">legacy mode</a>.</td> </tr> <tr> <td><h5>mesos.resourcemanager.artifactserver.port</h5></td> @@ -65,7 +65,7 @@ <tr> <td><h5>mesos.resourcemanager.tasks.port-assignments</h5></td> <td style="word-wrap: break-word;">(none)</td> - <td>Comma-separated list of configuration keys which represent a configurable port.All port keys will dynamically get a port assigned through Mesos.</td> + <td>Comma-separated list of configuration keys which represent a configurable port. All port keys will dynamically get a port assigned through Mesos.</td> </tr> </tbody> </table> diff --git a/docs/_includes/generated/mesos_task_manager_configuration.html b/docs/_includes/generated/mesos_task_manager_configuration.html index 0af844d..1e67f84 100644 --- a/docs/_includes/generated/mesos_task_manager_configuration.html +++ b/docs/_includes/generated/mesos_task_manager_configuration.html @@ -10,12 +10,12 @@ <tr> <td><h5>mesos.constraints.hard.hostattribute</h5></td> <td style="word-wrap: break-word;">(none)</td> - <td>Constraints for task placement on mesos.</td> + <td>Constraints for task placement on Mesos based on agent attributes. Takes a comma-separated list of key:value pairs corresponding to the attributes exposed by the target mesos agents. Example: az:eu-west-1a,series:t2</td> </tr> <tr> <td><h5>mesos.resourcemanager.tasks.bootstrap-cmd</h5></td> <td style="word-wrap: break-word;">(none)</td> - <td></td> + <td>A command which is executed before the TaskManager is started.</td> </tr> <tr> <td><h5>mesos.resourcemanager.tasks.container.docker.force-pull-image</h5></td> @@ -50,12 +50,12 @@ <tr> <td><h5>mesos.resourcemanager.tasks.gpus</h5></td> <td style="word-wrap: break-word;">0</td> - <td></td> + <td>GPUs to assign to the Mesos workers.</td> </tr> <tr> <td><h5>mesos.resourcemanager.tasks.hostname</h5></td> <td style="word-wrap: break-word;">(none)</td> - <td></td> + <td>Optional value to define the TaskManager’s hostname. The pattern _TASK_ is replaced by the actual id of the Mesos task. This can be used to configure the TaskManager to use Mesos DNS (e.g. _TASK_.flink-service.mesos) for name lookups.</td> </tr> <tr> <td><h5>mesos.resourcemanager.tasks.mem</h5></td> diff --git a/docs/ops/deployment/mesos.md b/docs/ops/deployment/mesos.md index aca6f23..1ff8afa 100644 --- a/docs/ops/deployment/mesos.md +++ b/docs/ops/deployment/mesos.md @@ -59,13 +59,11 @@ or configuration files. For instance, in non-containerized environments, the artifact server will provide the Flink binaries. What files will be served depends on the configuration overlay used. -### Flink's JobManager and Web Interface +### Flink's Dispatcher and Web Interface -The Mesos scheduler currently resides with the JobManager but will be started -independently of the JobManager in future versions (see -[FLIP-6](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=65147077)). The -proposed changes will also add a Dispatcher component which will be the central -point for job submission and monitoring. +The Dispatcher and the web interface provide a central point for monitoring, +job submission, and other client interaction with the cluster +(see [FLIP-6](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=65147077)). ### Startup script and configuration overlays @@ -139,7 +137,7 @@ More information about the deployment scripts can be found [here](http://mesos.a ### Installing Marathon -Optionally, you may also [install Marathon](https://mesosphere.github.io/marathon/docs/) which will be necessary to run Flink in high availability (HA) mode. +Optionally, you may also [install Marathon](https://mesosphere.github.io/marathon/docs/) which enables you to run Flink in [high availability (HA) mode](#high-availability). ### Pre-installing Flink vs Docker/Mesos containers @@ -171,8 +169,6 @@ which manage the Flink processes in a Mesos cluster: It is automatically launched by the Mesos worker node to bring up a new TaskManager. In order to run the `mesos-appmaster.sh` script you have to define `mesos.master` in the `flink-conf.yaml` or pass it via `-Dmesos.master=...` to the Java process. -Additionally, you should define the number of task managers which are started by Mesos via `mesos.initial-tasks`. -This value can also be defined in the `flink-conf.yaml` or passed as a Java property. When executing `mesos-appmaster.sh`, it will create a job manager on the machine where you executed the script. In contrast to that, the task managers will be run as Mesos tasks in the Mesos cluster. @@ -188,19 +184,21 @@ For example: -Djobmanager.heap.mb=1024 \ -Djobmanager.rpc.port=6123 \ -Drest.port=8081 \ - -Dmesos.initial-tasks=10 \ -Dmesos.resourcemanager.tasks.mem=4096 \ -Dtaskmanager.heap.mb=3500 \ -Dtaskmanager.numberOfTaskSlots=2 \ -Dparallelism.default=10 +<div class="alert alert-info"> + <strong>Note:</strong> If Flink is in <a href="{{ site.baseurl }}/ops/config.html#legacy">legacy mode</a>, + you should additionally define the number of task managers that are started by Mesos via + <a href="{{ site.baseurl }}/ops/config.html#mesos-initial-tasks"><code>mesos.initial-tasks</code></a>. +</div> ### High Availability You will need to run a service like Marathon or Apache Aurora which takes care of restarting the Flink master process in case of node or process failures. -In addition, Zookeeper needs to be configured like described in the [High Availability section of the Flink docs]({{ site.baseurl }}/ops/jobmanager_high_availability.html) - -For the reconciliation of tasks to work correctly, please also set `high-availability.zookeeper.path.mesos-workers` to a valid Zookeeper path. +In addition, Zookeeper needs to be configured like described in the [High Availability section of the Flink docs]({{ site.baseurl }}/ops/jobmanager_high_availability.html). #### Marathon @@ -211,7 +209,7 @@ Here is an example configuration for Marathon: { "id": "flink", - "cmd": "$FLINK_HOME/bin/mesos-appmaster.sh -Djobmanager.heap.mb=1024 -Djobmanager.rpc.port=6123 -Drest.port=8081 -Dmesos.initial-tasks=1 -Dmesos.resourcemanager.tasks.mem=1024 -Dtaskmanager.heap.mb=1024 -Dtaskmanager.numberOfTaskSlots=2 -Dparallelism.default=2 -Dmesos.resourcemanager.tasks.cpus=1", + "cmd": "$FLINK_HOME/bin/mesos-appmaster.sh -Djobmanager.heap.mb=1024 -Djobmanager.rpc.port=6123 -Drest.port=8081 -Dmesos.resourcemanager.tasks.mem=1024 -Dtaskmanager.heap.mb=1024 -Dtaskmanager.numberOfTaskSlots=2 -Dparallelism.default=2 -Dmesos.resourcemanager.tasks.cpus=1", "cpus": 1.0, "mem": 1024 } @@ -220,60 +218,7 @@ When running Flink with Marathon, the whole Flink cluster including the job mana ### Configuration parameters -`mesos.initial-tasks`: The initial workers to bring up when the master starts (**DEFAULT**: The number of workers specified at cluster startup). - -`mesos.constraints.hard.hostattribute`: Constraints for task placement on Mesos based on agent attributes (**DEFAULT**: None). -Takes a comma-separated list of key:value pairs corresponding to the attributes exposed by the target -mesos agents. Example: `az:eu-west-1a,series:t2` - -`mesos.maximum-failed-tasks`: The maximum number of failed workers before the cluster fails (**DEFAULT**: Number of initial workers). -May be set to -1 to disable this feature. - -`mesos.master`: The Mesos master URL. The value should be in one of the following forms: - -* `host:port` -* `zk://host1:port1,host2:port2,.../path` -* `zk://username:password@host1:port1,host2:port2,.../path` -* `file:///path/to/file` - -`mesos.failover-timeout`: The failover timeout in seconds for the Mesos scheduler, after which running tasks are automatically shut down (**DEFAULT:** 600). - -`mesos.resourcemanager.artifactserver.port`:The config parameter defining the Mesos artifact server port to use. Setting the port to 0 will let the OS choose an available port. - -`mesos.resourcemanager.framework.name`: Mesos framework name (**DEFAULT:** Flink) - -`mesos.resourcemanager.framework.role`: Mesos framework role definition (**DEFAULT:** *) - -`high-availability.zookeeper.path.mesos-workers`: The ZooKeeper root path for persisting the Mesos worker information. - -`mesos.resourcemanager.framework.principal`: Mesos framework principal (**NO DEFAULT**) - -`mesos.resourcemanager.framework.secret`: Mesos framework secret (**NO DEFAULT**) - -`mesos.resourcemanager.framework.user`: Mesos framework user (**DEFAULT:**"") - -`mesos.resourcemanager.artifactserver.ssl.enabled`: Enables SSL for the Flink artifact server (**DEFAULT**: true). Note that `security.ssl.enabled` also needs to be set to `true` encryption to enable encryption. - -`mesos.resourcemanager.tasks.mem`: Memory to assign to the Mesos workers in MB (**DEFAULT**: 1024) - -`mesos.resourcemanager.tasks.cpus`: CPUs to assign to the Mesos workers (**DEFAULT**: 0.0) - -`mesos.resourcemanager.tasks.gpus`: GPUs to assign to the Mesos workers (**DEFAULT**: 0.0) - -`mesos.resourcemanager.tasks.container.type`: Type of the containerization used: "mesos" or "docker" (DEFAULT: mesos); - -`mesos.resourcemanager.tasks.container.image.name`: Image name to use for the container (**NO DEFAULT**) - -`mesos.resourcemanager.tasks.container.volumes`: A comma separated list of `[host_path:]`container_path`[:RO|RW]`. This allows for mounting additional volumes into your container. (**NO DEFAULT**) - -`mesos.resourcemanager.tasks.container.docker.parameters`: Custom parameters to be passed into docker run command when using the docker containerizer. Comma separated list of `key=value` pairs. `value` may contain '=' (**NO DEFAULT**) - -`mesos.resourcemanager.tasks.uris`: A comma separated list of URIs of custom artifacts to be downloaded into the sandbox of Mesos workers. (**NO DEFAULT**) - -`mesos.resourcemanager.tasks.container.docker.force-pull-image`: Instruct the docker containerizer to forcefully pull the image rather than reuse a cached version. (**DEFAULT**: false) - -`mesos.resourcemanager.tasks.hostname`: Optional value to define the TaskManager's hostname. The pattern `_TASK_` is replaced by the actual id of the Mesos task. This can be used to configure the TaskManager to use Mesos DNS (e.g. `_TASK_.flink-service.mesos`) for name lookups. (**NO DEFAULT**) - -`mesos.resourcemanager.tasks.bootstrap-cmd`: A command which is executed before the TaskManager is started (**NO DEFAULT**). +For a list of Mesos specific configuration, refer to the [Mesos section]({{ site.baseurl }}/ops/config.html#mesos) +of the configuration documentation. {% top %} diff --git a/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java b/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java index c8b8ae9..787efff 100644 --- a/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java +++ b/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java @@ -22,6 +22,7 @@ import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.annotation.docs.ConfigGroup; import org.apache.flink.annotation.docs.ConfigGroups; import org.apache.flink.annotation.docs.Documentation; +import org.apache.flink.configuration.description.Description; import static org.apache.flink.configuration.ConfigOptions.key; @@ -157,7 +158,9 @@ public class HighAvailabilityOptions { key("high-availability.zookeeper.path.mesos-workers") .defaultValue("/mesos-workers") .withDeprecatedKeys("recovery.zookeeper.path.mesos-workers") - .withDescription("ZooKeeper root path (ZNode) for Mesos workers."); + .withDescription(Description.builder() + .text("The ZooKeeper root path for persisting the Mesos worker information.") + .build()); // ------------------------------------------------------------------------ // ZooKeeper Client Settings diff --git a/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java b/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java index 7046605..426a891 100644 --- a/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java +++ b/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java @@ -19,6 +19,9 @@ package org.apache.flink.mesos.configuration; import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.description.Description; +import org.apache.flink.configuration.description.LinkElement; +import org.apache.flink.configuration.description.TextElement; import static org.apache.flink.configuration.ConfigOptions.key; @@ -33,7 +36,10 @@ public class MesosOptions { public static final ConfigOption<Integer> INITIAL_TASKS = key("mesos.initial-tasks") .defaultValue(0) - .withDescription("The initial workers to bring up when the master starts"); + .withDescription(Description.builder() + .text("The initial workers to bring up when the master starts. ") + .text("This option is ignored unless Flink is in %s.", LinkElement.link("#legacy", "legacy mode")) + .build()); /** * The maximum number of failed Mesos tasks before entirely stopping @@ -44,8 +50,10 @@ public class MesosOptions { public static final ConfigOption<Integer> MAX_FAILED_TASKS = key("mesos.maximum-failed-tasks") .defaultValue(-1) - .withDescription("The maximum number of failed workers before the cluster fails. May be set to -1 to disable" + - " this feature"); + .withDescription(Description.builder() + .text("The maximum number of failed workers before the cluster fails. May be set to -1 to disable this feature. ") + .text("This option is ignored unless Flink is in %s.", LinkElement.link("#legacy", "legacy mode")) + .build()); /** * The Mesos master URL. @@ -63,9 +71,14 @@ public class MesosOptions { public static final ConfigOption<String> MASTER_URL = key("mesos.master") .noDefaultValue() - .withDescription("The Mesos master URL. The value should be in one of the following forms:" + - " \"host:port\", \"zk://host1:port1,host2:port2,.../path\"," + - " \"zk://username:password@host1:port1,host2:port2,.../path\" or \"file:///path/to/file\""); + .withDescription(Description.builder() + .text("The Mesos master URL. The value should be in one of the following forms: ") + .list( + TextElement.text("host:port"), + TextElement.text("zk://host1:port1,host2:port2,.../path"), + TextElement.text("zk://username:password@host1:port1,host2:port2,.../path"), + TextElement.text("file:///path/to/file")) + .build()); /** * The failover timeout for the Mesos scheduler, after which running tasks are automatically shut down. @@ -125,7 +138,9 @@ public class MesosOptions { */ public static final ConfigOption<String> PORT_ASSIGNMENTS = key("mesos.resourcemanager.tasks.port-assignments") .defaultValue("") - .withDescription("Comma-separated list of configuration keys which represent a configurable port." + - "All port keys will dynamically get a port assigned through Mesos."); + .withDescription(Description.builder() + .text("Comma-separated list of configuration keys which represent a configurable port. " + + "All port keys will dynamically get a port assigned through Mesos.") + .build()); } diff --git a/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java b/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java index d915b36..0315629 100644 --- a/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java +++ b/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java @@ -22,6 +22,7 @@ import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.IllegalConfigurationException; import org.apache.flink.configuration.TaskManagerOptions; +import org.apache.flink.configuration.description.Description; import org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters; import org.apache.flink.util.Preconditions; @@ -65,7 +66,8 @@ public class MesosTaskManagerParameters { public static final ConfigOption<Integer> MESOS_RM_TASKS_GPUS = key("mesos.resourcemanager.tasks.gpus") - .defaultValue(0); + .defaultValue(0) + .withDescription(Description.builder().text("GPUs to assign to the Mesos workers.").build()); public static final ConfigOption<String> MESOS_RM_CONTAINER_TYPE = key("mesos.resourcemanager.tasks.container.type") @@ -79,7 +81,12 @@ public class MesosTaskManagerParameters { public static final ConfigOption<String> MESOS_TM_HOSTNAME = key("mesos.resourcemanager.tasks.hostname") - .noDefaultValue(); + .noDefaultValue() + .withDescription(Description.builder() + .text("Optional value to define the TaskManager’s hostname. " + + "The pattern _TASK_ is replaced by the actual id of the Mesos task. " + + "This can be used to configure the TaskManager to use Mesos DNS (e.g. _TASK_.flink-service.mesos) for name lookups.") + .build()); public static final ConfigOption<String> MESOS_TM_CMD = key("mesos.resourcemanager.tasks.taskmanager-cmd") @@ -87,7 +94,10 @@ public class MesosTaskManagerParameters { public static final ConfigOption<String> MESOS_TM_BOOTSTRAP_CMD = key("mesos.resourcemanager.tasks.bootstrap-cmd") - .noDefaultValue(); + .noDefaultValue() + .withDescription(Description.builder() + .text("A command which is executed before the TaskManager is started.") + .build()); public static final ConfigOption<String> MESOS_TM_URIS = key("mesos.resourcemanager.tasks.uris") @@ -116,7 +126,11 @@ public class MesosTaskManagerParameters { public static final ConfigOption<String> MESOS_CONSTRAINTS_HARD_HOSTATTR = key("mesos.constraints.hard.hostattribute") .noDefaultValue() - .withDescription("Constraints for task placement on mesos."); + .withDescription(Description.builder() + .text("Constraints for task placement on Mesos based on agent attributes. " + + "Takes a comma-separated list of key:value pairs corresponding to the attributes exposed by the target mesos agents. " + + "Example: az:eu-west-1a,series:t2") + .build()); /** * Value for {@code MESOS_RESOURCEMANAGER_TASKS_CONTAINER_TYPE} setting. Tells to use the Mesos containerizer.