Repository: flink Updated Branches: refs/heads/master e94a488dd -> d47778632
[FLINK-8475][config][docs] Integrate akka options This closes #5384. Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/d4777863 Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/d4777863 Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/d4777863 Branch: refs/heads/master Commit: d477786329bc0c3bc9ed75ca1586ce9be6321f98 Parents: e94a488 Author: zentol <[email protected]> Authored: Mon Jan 22 16:29:55 2018 +0100 Committer: zentol <[email protected]> Committed: Wed Jan 31 09:31:58 2018 +0100 ---------------------------------------------------------------------- .../_includes/generated/akka_configuration.html | 96 ++++++++++++++++++++ docs/ops/config.md | 30 +----- .../apache/flink/configuration/AkkaOptions.java | 77 ++++++++++++---- 3 files changed, 155 insertions(+), 48 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/d4777863/docs/_includes/generated/akka_configuration.html ---------------------------------------------------------------------- diff --git a/docs/_includes/generated/akka_configuration.html b/docs/_includes/generated/akka_configuration.html new file mode 100644 index 0000000..3ccbff6 --- /dev/null +++ b/docs/_includes/generated/akka_configuration.html @@ -0,0 +1,96 @@ +<table class="table table-bordered"> + <thead> + <tr> + <th class="text-left" style="width: 20%">Key</th> + <th class="text-left" style="width: 15%">Default</th> + <th class="text-left" style="width: 65%">Description</th> + </tr> + </thead> + <tbody> + <tr> + <td><h5>akka.ask.timeout</h5></td> + <td>"10 s"</td> + <td>Timeout used for all futures and blocking Akka calls. If Flink fails due to timeouts then you should try to increase this value. Timeouts can be caused by slow machines or a congested network. The timeout value requires a time-unit specifier (ms/s/min/h/d).</td> + </tr> + <tr> + <td><h5>akka.client.timeout</h5></td> + <td>"60 s"</td> + <td>Timeout for all blocking calls on the client side.</td> + </tr> + <tr> + <td><h5>akka.framesize</h5></td> + <td>"10485760b"</td> + <td>Maximum size of messages which are sent between the JobManager and the TaskManagers. If Flink fails because messages exceed this limit, then you should increase it. The message size requires a size-unit specifier.</td> + </tr> + <tr> + <td><h5>akka.jvm-exit-on-fatal-error</h5></td> + <td>true</td> + <td>Exit JVM on fatal Akka errors.</td> + </tr> + <tr> + <td><h5>akka.log.lifecycle.events</h5></td> + <td>false</td> + <td>Turns on the Akkaâs remote logging of events. Set this value to âtrueâ in case of debugging.</td> + </tr> + <tr> + <td><h5>akka.lookup.timeout</h5></td> + <td>"10 s"</td> + <td>Timeout used for the lookup of the JobManager. The timeout value has to contain a time-unit specifier (ms/s/min/h/d).</td> + </tr> + <tr> + <td><h5>akka.retry-gate-closed-for</h5></td> + <td>50</td> + <td>Milliseconds a gate should be closed for after a remote connection was disconnected.</td> + </tr> + <tr> + <td><h5>akka.ssl.enabled</h5></td> + <td>true</td> + <td>Turns on SSL for Akkaâs remote communication. This is applicable only when the global ssl flag security.ssl.enabled is set to true.</td> + </tr> + <tr> + <td><h5>akka.startup-timeout</h5></td> + <td>(none)</td> + <td>Timeout after which the startup of a remote component is considered being failed.</td> + </tr> + <tr> + <td><h5>akka.tcp.timeout</h5></td> + <td>"20 s"</td> + <td>Timeout for all outbound connections. If you should experience problems with connecting to a TaskManager due to a slow network, you should increase this value.</td> + </tr> + <tr> + <td><h5>akka.throughput</h5></td> + <td>15</td> + <td>Number of messages that are processed in a batch before returning the thread to the pool. Low values denote a fair scheduling whereas high values can increase the performance at the cost of unfairness.</td> + </tr> + <tr> + <td><h5>akka.transport.heartbeat.interval</h5></td> + <td>"1000 s"</td> + <td>Heartbeat interval for Akkaâs transport failure detector. Since Flink uses TCP, the detector is not necessary. Therefore, the detector is disabled by setting the interval to a very high value. In case you should need the transport failure detector, set the interval to some reasonable value. The interval value requires a time-unit specifier (ms/s/min/h/d).</td> + </tr> + <tr> + <td><h5>akka.transport.heartbeat.pause</h5></td> + <td>"6000 s"</td> + <td>Acceptable heartbeat pause for Akkaâs transport failure detector. Since Flink uses TCP, the detector is not necessary. Therefore, the detector is disabled by setting the pause to a very high value. In case you should need the transport failure detector, set the pause to some reasonable value. The pause value requires a time-unit specifier (ms/s/min/h/d).</td> + </tr> + <tr> + <td><h5>akka.transport.threshold</h5></td> + <td>300.0</td> + <td>Threshold for the transport failure detector. Since Flink uses TCP, the detector is not necessary and, thus, the threshold is set to a high value.</td> + </tr> + <tr> + <td><h5>akka.watch.heartbeat.interval</h5></td> + <td>"10 s"</td> + <td>Heartbeat interval for Akkaâs DeathWatch mechanism to detect dead TaskManagers. If TaskManagers are wrongly marked dead because of lost or delayed heartbeat messages, then you should decrease this value or increase akka.watch.heartbeat.pause. A thorough description of Akkaâs DeathWatch can be found <a href="http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector">here</a>.</td> + </tr> + <tr> + <td><h5>akka.watch.heartbeat.pause</h5></td> + <td>"60 s"</td> + <td>Acceptable heartbeat pause for Akkaâs DeathWatch mechanism. A low value does not allow an irregular heartbeat. If TaskManagers are wrongly marked dead because of lost or delayed heartbeat messages, then you should increase this value or decrease akka.watch.heartbeat.interval. Higher value increases the time to detect a dead TaskManager. A thorough description of Akkaâs DeathWatch can be found <a href="http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector">here</a>.</td> + </tr> + <tr> + <td><h5>akka.watch.threshold</h5></td> + <td>12</td> + <td>Threshold for the DeathWatch failure detector. A low value is prone to false positives whereas a high value increases the time to detect a dead TaskManager. A thorough description of Akkaâs DeathWatch can be found <a href="http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector">here</a>.</td> + </tr> + </tbody> +</table> http://git-wip-us.apache.org/repos/asf/flink/blob/d4777863/docs/ops/config.md ---------------------------------------------------------------------- diff --git a/docs/ops/config.md b/docs/ops/config.md index 315f349..995306c 100644 --- a/docs/ops/config.md +++ b/docs/ops/config.md @@ -326,35 +326,7 @@ The following parameters configure Flink's JobManager and TaskManagers. ### Distributed Coordination (via Akka) -- `akka.ask.timeout`: Timeout used for all futures and blocking Akka calls. If Flink fails due to timeouts then you should try to increase this value. Timeouts can be caused by slow machines or a congested network. The timeout value requires a time-unit specifier (ms/s/min/h/d) (DEFAULT: **10 s**). - -- `akka.lookup.timeout`: Timeout used for the lookup of the JobManager. The timeout value has to contain a time-unit specifier (ms/s/min/h/d) (DEFAULT: **10 s**). - -- `akka.client.timeout`: Timeout used by Flink clients (e.g. `CliFrontend`, `ClusterClient`) when communicating with the Flink cluster. The timeout value has to contain a time-unit specifier (ms/s/min/h/d) (DEFAULT: **60 s**). - -- `akka.framesize`: Maximum size of messages which are sent between the JobManager and the TaskManagers. If Flink fails because messages exceed this limit, then you should increase it. The message size requires a size-unit specifier (DEFAULT: **10485760b**). - -- `akka.watch.heartbeat.interval`: Heartbeat interval for Akka's DeathWatch mechanism to detect dead TaskManagers. If TaskManagers are wrongly marked dead because of lost or delayed heartbeat messages, then you should decrease this value or increase `akka.watch.heartbeat.pause`. A thorough description of Akka's DeathWatch can be found [here](http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector) (DEFAULT: **10 s**). - -- `akka.watch.heartbeat.pause`: Acceptable heartbeat pause for Akka's DeathWatch mechanism. A low value does not allow an irregular heartbeat. If TaskManagers are wrongly marked dead because of lost or delayed heartbeat messages, then you should increase this value or decrease `akka.watch.heartbeat.interval`. Higher value increases the time to detect a dead TaskManager. A thorough description of Akka's DeathWatch can be found [here](http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector) (DEFAULT: **60 s**). - -- `akka.watch.threshold`: Threshold for the DeathWatch failure detector. A low value is prone to false positives whereas a high value increases the time to detect a dead TaskManager. A thorough description of Akka's DeathWatch can be found [here](http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector) (DEFAULT: **12**). - -- `akka.transport.heartbeat.interval`: Heartbeat interval for Akka's transport failure detector. Since Flink uses TCP, the detector is not necessary. Therefore, the detector is disabled by setting the interval to a very high value. In case you should need the transport failure detector, set the interval to some reasonable value. The interval value requires a time-unit specifier (ms/s/min/h/d) (DEFAULT: **1000 s**). - -- `akka.transport.heartbeat.pause`: Acceptable heartbeat pause for Akka's transport failure detector. Since Flink uses TCP, the detector is not necessary. Therefore, the detector is disabled by setting the pause to a very high value. In case you should need the transport failure detector, set the pause to some reasonable value. The pause value requires a time-unit specifier (ms/s/min/h/d) (DEFAULT: **6000 s**). - -- `akka.transport.threshold`: Threshold for the transport failure detector. Since Flink uses TCP, the detector is not necessary and, thus, the threshold is set to a high value (DEFAULT: **300**). - -- `akka.tcp.timeout`: Timeout for all outbound connections. If you should experience problems with connecting to a TaskManager due to a slow network, you should increase this value (DEFAULT: **20 s**). - -- `akka.throughput`: Number of messages that are processed in a batch before returning the thread to the pool. Low values denote a fair scheduling whereas high values can increase the performance at the cost of unfairness (DEFAULT: **15**). - -- `akka.log.lifecycle.events`: Turns on the Akka's remote logging of events. Set this value to 'true' in case of debugging (DEFAULT: **false**). - -- `akka.startup-timeout`: Timeout after which the startup of a remote component is considered being failed (DEFAULT: **akka.ask.timeout**). - -- `akka.ssl.enabled`: Turns on SSL for Akka's remote communication. This is applicable only when the global ssl flag security.ssl.enabled is set to true (DEFAULT: **true**). +{% include generated/akka_configuration.html %} ### SSL Settings http://git-wip-us.apache.org/repos/asf/flink/blob/d4777863/flink-core/src/main/java/org/apache/flink/configuration/AkkaOptions.java ---------------------------------------------------------------------- diff --git a/flink-core/src/main/java/org/apache/flink/configuration/AkkaOptions.java b/flink-core/src/main/java/org/apache/flink/configuration/AkkaOptions.java index 09ea490..360ba86 100644 --- a/flink-core/src/main/java/org/apache/flink/configuration/AkkaOptions.java +++ b/flink-core/src/main/java/org/apache/flink/configuration/AkkaOptions.java @@ -22,7 +22,6 @@ import org.apache.flink.annotation.PublicEvolving; /** * Akka configuration options. - * TODO: Migrate other akka config options to this file */ @PublicEvolving public class AkkaOptions { @@ -32,117 +31,157 @@ public class AkkaOptions { */ public static final ConfigOption<String> ASK_TIMEOUT = ConfigOptions .key("akka.ask.timeout") - .defaultValue("10 s"); + .defaultValue("10 s") + .withDescription("Timeout used for all futures and blocking Akka calls. If Flink fails due to timeouts then you" + + " should try to increase this value. Timeouts can be caused by slow machines or a congested network. The" + + " timeout value requires a time-unit specifier (ms/s/min/h/d)."); /** * The Akka death watch heartbeat interval. */ public static final ConfigOption<String> WATCH_HEARTBEAT_INTERVAL = ConfigOptions .key("akka.watch.heartbeat.interval") - .defaultValue(ASK_TIMEOUT.defaultValue()); + .defaultValue(ASK_TIMEOUT.defaultValue()) + .withDescription("Heartbeat interval for Akkaâs DeathWatch mechanism to detect dead TaskManagers. If" + + " TaskManagers are wrongly marked dead because of lost or delayed heartbeat messages, then you should" + + " decrease this value or increase akka.watch.heartbeat.pause. A thorough description of Akkaâs DeathWatch" + + " can be found <a href=\"http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector\">here</a>."); /** * The maximum acceptable Akka death watch heartbeat pause. */ public static final ConfigOption<String> WATCH_HEARTBEAT_PAUSE = ConfigOptions .key("akka.watch.heartbeat.pause") - .defaultValue("60 s"); - + .defaultValue("60 s") + .withDescription("Acceptable heartbeat pause for Akkaâs DeathWatch mechanism. A low value does not allow an" + + " irregular heartbeat. If TaskManagers are wrongly marked dead because of lost or delayed heartbeat messages," + + " then you should increase this value or decrease akka.watch.heartbeat.interval. Higher value increases the" + + " time to detect a dead TaskManager. A thorough description of Akkaâs DeathWatch can be found" + + " <a href=\"http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector\">here</a>."); /** * The Akka tcp connection timeout. */ public static final ConfigOption<String> TCP_TIMEOUT = ConfigOptions .key("akka.tcp.timeout") - .defaultValue("20 s"); + .defaultValue("20 s") + .withDescription("Timeout for all outbound connections. If you should experience problems with connecting to a" + + " TaskManager due to a slow network, you should increase this value."); /** * Timeout for the startup of the actor system. */ public static final ConfigOption<String> STARTUP_TIMEOUT = ConfigOptions .key("akka.startup-timeout") - .noDefaultValue(); + .noDefaultValue() + .withDescription("Timeout after which the startup of a remote component is considered being failed."); /** * Heartbeat interval of the transport failure detector. */ public static final ConfigOption<String> TRANSPORT_HEARTBEAT_INTERVAL = ConfigOptions .key("akka.transport.heartbeat.interval") - .defaultValue("1000 s"); + .defaultValue("1000 s") + .withDescription("Heartbeat interval for Akkaâs transport failure detector. Since Flink uses TCP, the detector" + + " is not necessary. Therefore, the detector is disabled by setting the interval to a very high value. In" + + " case you should need the transport failure detector, set the interval to some reasonable value. The" + + " interval value requires a time-unit specifier (ms/s/min/h/d)."); /** * Allowed heartbeat pause for the transport failure detector. */ public static final ConfigOption<String> TRANSPORT_HEARTBEAT_PAUSE = ConfigOptions .key("akka.transport.heartbeat.pause") - .defaultValue("6000 s"); + .defaultValue("6000 s") + .withDescription("Acceptable heartbeat pause for Akkaâs transport failure detector. Since Flink uses TCP, the" + + " detector is not necessary. Therefore, the detector is disabled by setting the pause to a very high value." + + " In case you should need the transport failure detector, set the pause to some reasonable value." + + " The pause value requires a time-unit specifier (ms/s/min/h/d)."); /** * Detection threshold of transport failure detector. */ public static final ConfigOption<Double> TRANSPORT_THRESHOLD = ConfigOptions .key("akka.transport.threshold") - .defaultValue(300.0); + .defaultValue(300.0) + .withDescription("Threshold for the transport failure detector. Since Flink uses TCP, the detector is not" + + " necessary and, thus, the threshold is set to a high value."); /** * Detection threshold for the phi accrual watch failure detector. */ public static final ConfigOption<Integer> WATCH_THRESHOLD = ConfigOptions .key("akka.watch.threshold") - .defaultValue(12); + .defaultValue(12) + .withDescription("Threshold for the DeathWatch failure detector. A low value is prone to false positives whereas" + + " a high value increases the time to detect a dead TaskManager. A thorough description of Akkaâs DeathWatch" + + " can be found <a href=\"http://doc.akka.io/docs/akka/snapshot/scala/remoting.html#failure-detector\">here</a>."); /** * Override SSL support for the Akka transport. */ public static final ConfigOption<Boolean> SSL_ENABLED = ConfigOptions .key("akka.ssl.enabled") - .defaultValue(true); + .defaultValue(true) + .withDescription("Turns on SSL for Akkaâs remote communication. This is applicable only when the global ssl flag" + + " security.ssl.enabled is set to true."); /** * Maximum framesize of akka messages. */ public static final ConfigOption<String> FRAMESIZE = ConfigOptions .key("akka.framesize") - .defaultValue("10485760b"); + .defaultValue("10485760b") + .withDescription("Maximum size of messages which are sent between the JobManager and the TaskManagers. If Flink" + + " fails because messages exceed this limit, then you should increase it. The message size requires a" + + " size-unit specifier."); /** * Maximum number of messages until another actor is executed by the same thread. */ public static final ConfigOption<Integer> DISPATCHER_THROUGHPUT = ConfigOptions .key("akka.throughput") - .defaultValue(15); + .defaultValue(15) + .withDescription("Number of messages that are processed in a batch before returning the thread to the pool. Low" + + " values denote a fair scheduling whereas high values can increase the performance at the cost of unfairness."); /** * Log lifecycle events. */ public static final ConfigOption<Boolean> LOG_LIFECYCLE_EVENTS = ConfigOptions .key("akka.log.lifecycle.events") - .defaultValue(false); + .defaultValue(false) + .withDescription("Turns on the Akkaâs remote logging of events. Set this value to âtrueâ in case of debugging."); /** * Timeout for all blocking calls that look up remote actors. */ public static final ConfigOption<String> LOOKUP_TIMEOUT = ConfigOptions .key("akka.lookup.timeout") - .defaultValue("10 s"); + .defaultValue("10 s") + .withDescription("Timeout used for the lookup of the JobManager. The timeout value has to contain a time-unit" + + " specifier (ms/s/min/h/d)."); /** * Timeout for all blocking calls on the client side. */ public static final ConfigOption<String> CLIENT_TIMEOUT = ConfigOptions .key("akka.client.timeout") - .defaultValue("60 s"); + .defaultValue("60 s") + .withDescription("Timeout for all blocking calls on the client side."); /** * Exit JVM on fatal Akka errors. */ public static final ConfigOption<Boolean> JVM_EXIT_ON_FATAL_ERROR = ConfigOptions .key("akka.jvm-exit-on-fatal-error") - .defaultValue(true); + .defaultValue(true) + .withDescription("Exit JVM on fatal Akka errors."); /** * Milliseconds a gate should be closed for after a remote connection was disconnected. */ public static final ConfigOption<Long> RETRY_GATE_CLOSED_FOR = ConfigOptions .key("akka.retry-gate-closed-for") - .defaultValue(50L); + .defaultValue(50L) + .withDescription("Milliseconds a gate should be closed for after a remote connection was disconnected."); }
