This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
commit 304f7f293068bdb3b55208e473ee01c256c8bd2a Author: beliefer <[email protected]> AuthorDate: Tue Mar 31 12:32:04 2020 +0900 [SPARK-31269][DOC] Supplement version for configuration only appear in configuration doc ### What changes were proposed in this pull request? The `configuration.md` exists some config not organized by `ConfigEntry`. This PR supplements version for configuration only appear in configuration doc. I sorted out some information show below. Item name | Since version | JIRA ID | Commit ID | Note -- | -- | -- | -- | -- spark.app.name | 0.9.0 | None | 994f080f8ae3372366e6004600ba791c8a372ff0#diff-529fc5c06b9731c1fbda6f3db60b16aa | spark.driver.resource.{resourceName}.amount | 3.0.0 | SPARK-27760 | d30284b5a51dd784f663eb4eea37087b35a54d00#diff-76e731333fb756df3bff5ddb3b731c46 | spark.driver.resource.{resourceName}.discoveryScript | 3.0.0 | SPARK-27488 | 74e5e41eebf9ed596b48e6db52a2a9c642e5cbc3#diff-76e731333fb756df3bff5ddb3b731c46 | spark.driver.resource.{resourceName}.vendor | 3.0.0 | SPARK-27362 | 1277f8fa92da85d9e39d9146e3099fcb75c71a8f#diff-76e731333fb756df3bff5ddb3b731c46 | spark.executor.resource.{resourceName}.amount | 3.0.0 | SPARK-27760 | d30284b5a51dd784f663eb4eea37087b35a54d00#diff-76e731333fb756df3bff5ddb3b731c46 | spark.executor.resource.{resourceType}.discoveryScript | 3.0.0 | SPARK-27024 | db2e3c43412e4a7fb4a46c58d73d9ab304a1e949#diff-76e731333fb756df3bff5ddb3b731c46 | spark.executor.resource.{resourceName}.vendor | 3.0.0 | SPARK-27362 | 1277f8fa92da85d9e39d9146e3099fcb75c71a8f#diff-76e731333fb756df3bff5ddb3b731c46 | spark.local.dir | 0.5.0 | None | 0e93891d3d7df849cff6442038c111ffd42a5243#diff-17fd275d280b667722664ed833c6402a | spark.logConf | 0.9.0 | None | d8bcc8e9a095c1b20dd7a17b6535800d39bff80e#diff-364713d7776956cb8b0a771e9b62f82d | spark.master | 0.9.0 | SPARK-544 | 2573add94cf920a88f74d80d8ea94218d812704d#diff-529fc5c06b9731c1fbda6f3db60b16aa | spark.driver.defaultJavaOptions | 3.0.0 | SPARK-23472 | f83000597f250868de9722d8285fed013abc5ecf#diff-a78ecfc6a89edfaf0b60a5eaa0381970 | spark.executor.defaultJavaOptions | 3.0.0 | SPARK-23472 | f83000597f250868de9722d8285fed013abc5ecf#diff-a78ecfc6a89edfaf0b60a5eaa0381970 | spark.executorEnv.[EnvironmentVariableName] | 0.9.0 | None | 642029e7f43322f84abe4f7f36bb0b1b95d8101d#diff-529fc5c06b9731c1fbda6f3db60b16aa | spark.python.profile | 1.2.0 | SPARK-3478 | 1aa549ba9839565274a12c52fa1075b424f138a6#diff-d6fe2792e44f6babc94aabfefc8b9bce | spark.python.profile.dump | 1.2.0 | SPARK-3478 | 1aa549ba9839565274a12c52fa1075b424f138a6#diff-d6fe2792e44f6babc94aabfefc8b9bce | spark.python.worker.memory | 1.1.0 | SPARK-2538 | 14174abd421318e71c16edd24224fd5094bdfed4#diff-d6fe2792e44f6babc94aabfefc8b9bce | spark.jars.packages | 1.5.0 | SPARK-9263 | 34335719a372c1951fdb4dd25b75b086faf1076f#diff-63a5d817d2d45ae24de577f6a1bd80f9 | spark.jars.excludes | 1.5.0 | SPARK-9263 | 34335719a372c1951fdb4dd25b75b086faf1076f#diff-63a5d817d2d45ae24de577f6a1bd80f9 | spark.jars.ivy | 1.3.0 | SPARK-5341 | 3b7acd22ab4a134c74746e3b9a803dbd34d43855#diff-63a5d817d2d45ae24de577f6a1bd80f9 | spark.jars.ivySettings | 2.2.0 | SPARK-17568 | 3bc2eff8880a3ba8d4318118715ea1a47048e3de#diff-4d2ab44195558d5a9d5f15b8803ef39d | spark.jars.repositories | 2.3.0 | SPARK-21403 | d8257b99ddae23f702f312640a5335ddb4554403#diff-4d2ab44195558d5a9d5f15b8803ef39d | spark.shuffle.io.maxRetries | 1.2.0 | SPARK-4188 | c1ea5c542f3267c0b23a7775887e3a6ece793fe3#diff-d2ce9b38bdc38ca9d7119f9c2cf79907 | spark.shuffle.io.numConnectionsPerPeer | 1.2.1 | SPARK-4740 | 441ec3451730c7ae3dbef8952e313071d6147ab6#diff-d2ce9b38bdc38ca9d7119f9c2cf79907 | spark.shuffle.io.preferDirectBufs | 1.2.0 | SPARK-4188 | c1ea5c542f3267c0b23a7775887e3a6ece793fe3#diff-d2ce9b38bdc38ca9d7119f9c2cf79907 | spark.shuffle.io.retryWait | 1.2.1 | None | 5e5d8f469a1bea9bbe606f772ccdcab7c184c651#diff-d2ce9b38bdc38ca9d7119f9c2cf79907 | spark.shuffle.io.backLog | 1.1.1 | SPARK-2468 | 66b4c81db7e826c00f7fb449b8a8af810cf7dd9a#diff-bdee8e601924d41e93baa7287189e878 | spark.shuffle.service.index.cache.size | 2.3.0 | SPARK-21501 | 1662e93119d68498942386906de309d35f4a135f#diff-97d5edc927a83a678e013ae00343df94 | spark.shuffle.maxChunksBeingTransferred | 2.3.0 | SPARK-21175 | 799e13161e89f1ea96cb1bc7b507a05af2e89cd0#diff-0ac65da2bc6b083fb861fe410c7688c2 | spark.sql.ui.retainedExecutions | 1.5.0 | SPARK-8861 and SPARK-8862 | ebc3aad272b91cf58e2e1b4aa92b49b8a947a045#diff-81764e4d52817f83bdd5336ef1226bd9 | spark.streaming.ui.retainedBatches | 1.0.0 | SPARK-1386 | f36dc3fed0a0671b0712d664db859da28c0a98e2#diff-56b8d67d07284cfab165d5363bd3500e | spark.default.parallelism | 0.5.0 | None | e5c4cd8a5e188592f8786a265c0cd073c69ac886#diff-0544ebf7533fa70ff5103e0fe1f0b036 | spark.files.fetchTimeout | 1.0.0 | None | f6f9d02e85d17da2f742ed0062f1648a9293e73c#diff-d239aee594001f8391676e1047a0381e | spark.files.useFetchCache | 1.2.2 | SPARK-6313 | a2a94a154bdd00753b8d5e344d712664c7151050#diff-d239aee594001f8391676e1047a0381e | spark.files.overwrite | 1.0.0 | None | 84670f2715392859624df290c1b52eb4ed4a9cb1#diff-d239aee594001f8391676e1047a0381e | Exists in branch-1.0, but the version of pom is 0.9.0-incubating-SNAPSHOT spark.hadoop.cloneConf | 1.0.3 | SPARK-2546 | 6d8f1dd15afdc7432b5721c89f9b2b402460322b#diff-83eb37f7b0ebed3c14ccb7bff0d577c2 | spark.hadoop.validateOutputSpecs | 1.0.1 | SPARK-1677 | 8100cbdb7546e8438019443cfc00683017c81278#diff-f70e97c099b5eac05c75288cb215e080 | spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version | 2.2.0 | SPARK-20107 | edc87d76efea7b4d19d9d0c4ddba274a3ccb8752#diff-76e731333fb756df3bff5ddb3b731c46 | spark.rpc.io.backLog | 3.0.0 | SPARK-27868 | 09ed64d795d3199a94e175273fff6fcea6b52131#diff-76e731333fb756df3bff5ddb3b731c46 | spark.network.io.preferDirectBufs | 3.0.0 | SPARK-24920 | e103c4a5e72bab8862ff49d6d4c1e62e642fc412#diff-0ac65da2bc6b083fb861fe410c7688c2 | spark.port.maxRetries | 1.1.1 | SPARK-3565 | 32f2222e915f31422089139944a077e2cbd442f9#diff-d239aee594001f8391676e1047a0381e | spark.core.connection.ack.wait.timeout | 1.1.1 | SPARK-2677 | bd3ce2ffb8964abb4d59918ebb2c230fe4614aa2#diff-f748e95f2aa97ed715afa53ddeeac9de | spark.scheduler.listenerbus.eventqueue.shared.capacity | 3.0.0 | SPARK-28574 | c212c9d9ed7375cd1ea16c118733edd84037ec0d#diff-eb519ad78cc3cf0b95839cc37413b509 | spark.scheduler.listenerbus.eventqueue.appStatus.capacity | 3.0.0 | SPARK-28574 | c212c9d9ed7375cd1ea16c118733edd84037ec0d#diff-eb519ad78cc3cf0b95839cc37413b509 | spark.scheduler.listenerbus.eventqueue.executorManagement.capacity | 3.0.0 | SPARK-28574 | c212c9d9ed7375cd1ea16c118733edd84037ec0d#diff-eb519ad78cc3cf0b95839cc37413b509 | spark.scheduler.listenerbus.eventqueue.eventLog.capacity | 3.0.0 | SPARK-28574 | c212c9d9ed7375cd1ea16c118733edd84037ec0d#diff-eb519ad78cc3cf0b95839cc37413b509 | spark.scheduler.listenerbus.eventqueue.streams.capacity | 3.0.0 | SPARK-28574 | c212c9d9ed7375cd1ea16c118733edd84037ec0d#diff-eb519ad78cc3cf0b95839cc37413b509 | spark.task.resource.{resourceName}.amount | 3.0.0 | SPARK-27760 | d30284b5a51dd784f663eb4eea37087b35a54d00#diff-76e731333fb756df3bff5ddb3b731c46 | spark.stage.maxConsecutiveAttempts | 2.2.0 | SPARK-13369 | 7b5d873aef672aa0aee41e338bab7428101e1ad3#diff-6a9ff7fb74fd490a50462d45db2d5e11 | spark.{driver\|executor}.rpc.io.serverThreads | 1.6.0 | SPARK-10745 | 7c5b641808740ba5eed05ba8204cdbaf3fc579f5#diff-d2ce9b38bdc38ca9d7119f9c2cf79907 | spark.{driver\|executor}.rpc.io.clientThreads | 1.6.0 | SPARK-10745 | 7c5b641808740ba5eed05ba8204cdbaf3fc579f5#diff-d2ce9b38bdc38ca9d7119f9c2cf79907 | spark.{driver\|executor}.rpc.netty.dispatcher.numThreads | 3.0.0 | SPARK-29398 | 2f0a38cb50e3e8b4b72219c7b2b8b15d51f6b931#diff-a68a21481fea5053848ca666dd3201d8 | spark.r.driver.command | 1.5.3 | SPARK-10971 | 9695f452e86a88bef3bcbd1f3c0b00ad9e9ac6e1#diff-025470e1b7094d7cf4a78ea353fb3981 | spark.r.shell.command | 2.1.0 | SPARK-17178 | fa6347938fc1c72ddc03a5f3cd2e929b5694f0a6#diff-a78ecfc6a89edfaf0b60a5eaa0381970 | spark.graphx.pregel.checkpointInterval | 2.2.0 | SPARK-5484 | f971ce5dd0788fe7f5d2ca820b9ea3db72033ddc#diff-e399679417ffa6eeedf26a7630baca16 | ### Why are the changes needed? Supplemental configuration version information. ### Does this PR introduce any user-facing change? 'No'. ### How was this patch tested? Jenkins test Closes #28035 from beliefer/supplement-configuration-version. Authored-by: beliefer <[email protected]> Signed-off-by: HyukjinKwon <[email protected]> --- docs/configuration.md | 88 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 6d01897..fae3bb4 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -143,6 +143,7 @@ of the most common options to set are: <td> The name of your application. This will appear in the UI and in log data. </td> + <td>0.9.0</td> </tr> <tr> <td><code>spark.driver.cores</code></td> @@ -206,6 +207,7 @@ of the most common options to set are: <code>spark.driver.resource.{resourceName}.discoveryScript</code> for the driver to find the resource on startup. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.driver.resource.{resourceName}.discoveryScript</code></td> @@ -216,6 +218,7 @@ of the most common options to set are: name and an array of addresses. For a client-submitted driver, discovery script must assign different resource addresses to this driver comparing to other drivers on the same host. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.driver.resource.{resourceName}.vendor</code></td> @@ -226,6 +229,7 @@ of the most common options to set are: the Kubernetes device plugin naming convention. (e.g. For GPUs on Kubernetes this config would be set to nvidia.com or amd.com) </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.resources.discoveryPlugin</code></td> @@ -293,6 +297,7 @@ of the most common options to set are: <code>spark.executor.resource.{resourceName}.discoveryScript</code> for the executor to find the resource on startup. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.executor.resource.{resourceName}.discoveryScript</code></td> @@ -302,6 +307,7 @@ of the most common options to set are: write to STDOUT a JSON string in the format of the ResourceInformation class. This has a name and an array of addresses. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.executor.resource.{resourceName}.vendor</code></td> @@ -312,6 +318,7 @@ of the most common options to set are: the Kubernetes device plugin naming convention. (e.g. For GPUs on Kubernetes this config would be set to nvidia.com or amd.com) </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.extraListeners</code></td> @@ -337,6 +344,7 @@ of the most common options to set are: <em>Note:</em> This will be overridden by SPARK_LOCAL_DIRS (Standalone), MESOS_SANDBOX (Mesos) or LOCAL_DIRS (YARN) environment variables set by the cluster manager. </td> + <td>0.5.0</td> </tr> <tr> <td><code>spark.logConf</code></td> @@ -344,6 +352,7 @@ of the most common options to set are: <td> Logs the effective SparkConf as INFO when a SparkContext is started. </td> + <td>0.9.0</td> </tr> <tr> <td><code>spark.master</code></td> @@ -352,6 +361,7 @@ of the most common options to set are: The cluster manager to connect to. See the list of <a href="submitting-applications.html#master-urls"> allowed master URL's</a>. </td> + <td>0.9.0</td> </tr> <tr> <td><code>spark.submit.deployMode</code></td> @@ -467,6 +477,7 @@ Apart from these, the following properties are also available, and may be useful Instead, please set this through the <code>--driver-java-options</code> command line option or in your default properties file. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.driver.extraJavaOptions</code></td> @@ -540,6 +551,7 @@ Apart from these, the following properties are also available, and may be useful verbose gc logging to a file named for the executor ID of the app in /tmp, pass a 'value' of: <code>-verbose:gc -Xloggc:/tmp/{{APP_ID}}-{{EXECUTOR_ID}}.gc</code> </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.executor.extraJavaOptions</code></td> @@ -636,6 +648,7 @@ Apart from these, the following properties are also available, and may be useful Add the environment variable specified by <code>EnvironmentVariableName</code> to the Executor process. The user can specify multiple of these to set multiple environment variables. </td> + <td>0.9.0</td> </tr> <tr> <td><code>spark.redaction.regex</code></td> @@ -659,7 +672,7 @@ Apart from these, the following properties are also available, and may be useful By default the <code>pyspark.profiler.BasicProfiler</code> will be used, but this can be overridden by passing a profiler class in as a parameter to the <code>SparkContext</code> constructor. </td> - <td></td> + <td>1.2.0</td> </tr> <tr> <td><code>spark.python.profile.dump</code></td> @@ -670,6 +683,7 @@ Apart from these, the following properties are also available, and may be useful by <code>pstats.Stats()</code>. If this is specified, the profile result will not be displayed automatically. </td> + <td>1.2.0</td> </tr> <tr> <td><code>spark.python.worker.memory</code></td> @@ -680,6 +694,7 @@ Apart from these, the following properties are also available, and may be useful (e.g. <code>512m</code>, <code>2g</code>). If the memory used during aggregation goes above this amount, it will spill the data into disks. </td> + <td>1.1.0</td> </tr> <tr> <td><code>spark.python.worker.reuse</code></td> @@ -727,6 +742,7 @@ Apart from these, the following properties are also available, and may be useful repositories given by the command-line option <code>--repositories</code>. For more details, see <a href="submitting-applications.html#advanced-dependency-management">Advanced Dependency Management</a>. </td> + <td>1.5.0</td> </tr> <tr> <td><code>spark.jars.excludes</code></td> @@ -735,6 +751,7 @@ Apart from these, the following properties are also available, and may be useful Comma-separated list of groupId:artifactId, to exclude while resolving the dependencies provided in <code>spark.jars.packages</code> to avoid dependency conflicts. </td> + <td>1.5.0</td> </tr> <tr> <td><code>spark.jars.ivy</code></td> @@ -744,6 +761,7 @@ Apart from these, the following properties are also available, and may be useful <code>spark.jars.packages</code>. This will override the Ivy property <code>ivy.default.ivy.user.dir</code> which defaults to ~/.ivy2. </td> + <td>1.3.0</td> </tr> <tr> <td><code>spark.jars.ivySettings</code></td> @@ -756,6 +774,7 @@ Apart from these, the following properties are also available, and may be useful artifact server like Artifactory. Details on the settings file format can be found at <a href="http://ant.apache.org/ivy/history/latest-milestone/settings.html">Settings Files</a> </td> + <td>2.2.0</td> </tr> <tr> <td><code>spark.jars.repositories</code></td> @@ -764,6 +783,7 @@ Apart from these, the following properties are also available, and may be useful Comma-separated list of additional remote repositories to search for the maven coordinates given with <code>--packages</code> or <code>spark.jars.packages</code>. </td> + <td>2.3.0</td> </tr> <tr> <td><code>spark.pyspark.driver.python</code></td> @@ -849,6 +869,7 @@ Apart from these, the following properties are also available, and may be useful set to a non-zero value. This retry logic helps stabilize large shuffles in the face of long GC pauses or transient network connectivity issues. </td> + <td>1.2.0</td> </tr> <tr> <td><code>spark.shuffle.io.numConnectionsPerPeer</code></td> @@ -858,6 +879,7 @@ Apart from these, the following properties are also available, and may be useful large clusters. For clusters with many hard disks and few hosts, this may result in insufficient concurrency to saturate all disks, and so users may consider increasing this value. </td> + <td>1.2.1</td> </tr> <tr> <td><code>spark.shuffle.io.preferDirectBufs</code></td> @@ -867,6 +889,7 @@ Apart from these, the following properties are also available, and may be useful block transfer. For environments where off-heap memory is tightly limited, users may wish to turn this off to force all allocations from Netty to be on-heap. </td> + <td>1.2.0</td> </tr> <tr> <td><code>spark.shuffle.io.retryWait</code></td> @@ -875,6 +898,7 @@ Apart from these, the following properties are also available, and may be useful (Netty only) How long to wait between retries of fetches. The maximum delay caused by retrying is 15 seconds by default, calculated as <code>maxRetries * retryWait</code>. </td> + <td>1.2.1</td> </tr> <tr> <td><code>spark.shuffle.io.backLog</code></td> @@ -887,6 +911,7 @@ Apart from these, the following properties are also available, and may be useful application (see <code>spark.shuffle.service.enabled</code> option below). If set below 1, will fallback to OS default defined by Netty's <code>io.netty.util.NetUtil#SOMAXCONN</code>. </td> + <td>1.1.1</td> </tr> <tr> <td><code>spark.shuffle.service.enabled</code></td> @@ -915,6 +940,7 @@ Apart from these, the following properties are also available, and may be useful <td> Cache entries limited to the specified memory footprint, in bytes unless otherwise specified. </td> + <td>2.3.0</td> </tr> <tr> <td><code>spark.shuffle.maxChunksBeingTransferred</code></td> @@ -926,6 +952,7 @@ Apart from these, the following properties are also available, and may be useful <code>spark.shuffle.io.retryWait</code>), if those limits are reached the task will fail with fetch failure. </td> + <td>2.3.0</td> </tr> <tr> <td><code>spark.shuffle.sort.bypassMergeThreshold</code></td> @@ -1241,6 +1268,7 @@ Apart from these, the following properties are also available, and may be useful <td> How many finished batches the Spark UI and status APIs remember before garbage collecting. </td> + <td>1.0.0</td> </tr> <tr> <td><code>spark.ui.retainedDeadExecutors</code></td> @@ -1634,6 +1662,7 @@ Apart from these, the following properties are also available, and may be useful Default number of partitions in RDDs returned by transformations like <code>join</code>, <code>reduceByKey</code>, and <code>parallelize</code> when not set by user. </td> + <td>0.5.0</td> </tr> <tr> <td><code>spark.executor.heartbeatInterval</code></td> @@ -1653,6 +1682,7 @@ Apart from these, the following properties are also available, and may be useful Communication timeout to use when fetching files added through SparkContext.addFile() from the driver. </td> + <td>1.0.0</td> </tr> <tr> <td><code>spark.files.useFetchCache</code></td> @@ -1665,6 +1695,7 @@ Apart from these, the following properties are also available, and may be useful disabled in order to use Spark local directories that reside on NFS filesystems (see <a href="https://issues.apache.org/jira/browse/SPARK-6313">SPARK-6313</a> for more details). </td> + <td>1.2.2</td> </tr> <tr> <td><code>spark.files.overwrite</code></td> @@ -1673,6 +1704,7 @@ Apart from these, the following properties are also available, and may be useful Whether to overwrite files added through SparkContext.addFile() when the target file exists and its contents do not match those of the source. </td> + <td>1.0.0</td> </tr> <tr> <td><code>spark.files.maxPartitionBytes</code></td> @@ -1693,23 +1725,29 @@ Apart from these, the following properties are also available, and may be useful <td>2.1.0</td> </tr> <tr> - <td><code>spark.hadoop.cloneConf</code></td> - <td>false</td> - <td>If set to true, clones a new Hadoop <code>Configuration</code> object for each task. This + <td><code>spark.hadoop.cloneConf</code></td> + <td>false</td> + <td> + If set to true, clones a new Hadoop <code>Configuration</code> object for each task. This option should be enabled to work around <code>Configuration</code> thread-safety issues (see <a href="https://issues.apache.org/jira/browse/SPARK-2546">SPARK-2546</a> for more details). This is disabled by default in order to avoid unexpected performance regressions for jobs that - are not affected by these issues.</td> + are not affected by these issues. + </td> + <td>1.0.3</td> </tr> <tr> - <td><code>spark.hadoop.validateOutputSpecs</code></td> - <td>true</td> - <td>If set to true, validates the output specification (e.g. checking if the output directory already exists) + <td><code>spark.hadoop.validateOutputSpecs</code></td> + <td>true</td> + <td> + If set to true, validates the output specification (e.g. checking if the output directory already exists) used in saveAsHadoopFile and other variants. This can be disabled to silence exceptions due to pre-existing - output directories. We recommend that users do not disable this except if trying to achieve compatibility with - previous versions of Spark. Simply use Hadoop's FileSystem API to delete output directories by hand. - This setting is ignored for jobs generated through Spark Streaming's StreamingContext, since - data may need to be rewritten to pre-existing output directories during checkpoint recovery.</td> + output directories. We recommend that users do not disable this except if trying to achieve compatibility + with previous versions of Spark. Simply use Hadoop's FileSystem API to delete output directories by hand. + This setting is ignored for jobs generated through Spark Streaming's StreamingContext, since data may + need to be rewritten to pre-existing output directories during checkpoint recovery. + </td> + <td>1.0.1</td> </tr> <tr> <td><code>spark.storage.memoryMapThreshold</code></td> @@ -1729,6 +1767,7 @@ Apart from these, the following properties are also available, and may be useful Version 2 may have better performance, but version 1 may handle failures better in certain situations, as per <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4815">MAPREDUCE-4815</a>. </td> + <td>2.2.0</td> </tr> </table> @@ -1843,7 +1882,7 @@ Apart from these, the following properties are also available, and may be useful need to be increased, so that incoming connections are not dropped when a large number of connections arrives in a short period of time. </td> - <td></td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.network.timeout</code></td> @@ -1866,7 +1905,7 @@ Apart from these, the following properties are also available, and may be useful block transfer. For environments where off-heap memory is tightly limited, users may wish to turn this off to force all allocations to be on-heap. </td> - <td></td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.port.maxRetries</code></td> @@ -1878,7 +1917,7 @@ Apart from these, the following properties are also available, and may be useful essentially allows it to try a range of ports from the start port specified to port + maxRetries. </td> - <td></td> + <td>1.1.1</td> </tr> <tr> <td><code>spark.rpc.numRetries</code></td> @@ -1921,7 +1960,7 @@ Apart from these, the following properties are also available, and may be useful out and giving up. To avoid unwilling timeout caused by long pause like GC, you can set larger value. </td> - <td></td> + <td>1.1.1</td> </tr> <tr> <td><code>spark.network.maxRemoteBlockSizeFetchToMem</code></td> @@ -2054,6 +2093,7 @@ Apart from these, the following properties are also available, and may be useful that register to the listener bus. Consider increasing value, if the listener events corresponding to shared queue are dropped. Increasing this value may result in the driver using more memory. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.scheduler.listenerbus.eventqueue.appStatus.capacity</code></td> @@ -2063,6 +2103,7 @@ Apart from these, the following properties are also available, and may be useful Consider increasing value, if the listener events corresponding to appStatus queue are dropped. Increasing this value may result in the driver using more memory. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.scheduler.listenerbus.eventqueue.executorManagement.capacity</code></td> @@ -2072,6 +2113,7 @@ Apart from these, the following properties are also available, and may be useful executor management listeners. Consider increasing value if the listener events corresponding to executorManagement queue are dropped. Increasing this value may result in the driver using more memory. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.scheduler.listenerbus.eventqueue.eventLog.capacity</code></td> @@ -2081,6 +2123,7 @@ Apart from these, the following properties are also available, and may be useful that write events to eventLogs. Consider increasing value if the listener events corresponding to eventLog queue are dropped. Increasing this value may result in the driver using more memory. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.scheduler.listenerbus.eventqueue.streams.capacity</code></td> @@ -2090,6 +2133,7 @@ Apart from these, the following properties are also available, and may be useful Consider increasing value if the listener events corresponding to streams queue are dropped. Increasing this value may result in the driver using more memory. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.scheduler.blacklist.unschedulableTaskSetTimeout</code></td> @@ -2272,6 +2316,7 @@ Apart from these, the following properties are also available, and may be useful in order to assign resource slots (e.g. a 0.2222 configuration, or 1/0.2222 slots will become 4 tasks/resource, not 5). </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.task.maxFailures</code></td> @@ -2336,6 +2381,7 @@ Apart from these, the following properties are also available, and may be useful <td> Number of consecutive stage attempts allowed before a stage is aborted. </td> + <td>2.2.0</td> </tr> </table> @@ -2528,13 +2574,14 @@ like shuffle, just replace "rpc" with "shuffle" in the property names except <code>spark.{driver|executor}.rpc.netty.dispatcher.numThreads</code>, which is only for RPC module. <table class="table"> -<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td><code>spark.{driver|executor}.rpc.io.serverThreads</code></td> <td> Fall back on <code>spark.rpc.io.serverThreads</code> </td> <td>Number of threads used in the server thread pool</td> + <td>1.6.0</td> </tr> <tr> <td><code>spark.{driver|executor}.rpc.io.clientThreads</code></td> @@ -2542,6 +2589,7 @@ like shuffle, just replace "rpc" with "shuffle" in the property names except Fall back on <code>spark.rpc.io.clientThreads</code> </td> <td>Number of threads used in the client thread pool</td> + <td>1.6.0</td> </tr> <tr> <td><code>spark.{driver|executor}.rpc.netty.dispatcher.numThreads</code></td> @@ -2549,6 +2597,7 @@ like shuffle, just replace "rpc" with "shuffle" in the property names except Fall back on <code>spark.rpc.netty.dispatcher.numThreads</code> </td> <td>Number of threads used in RPC message dispatcher thread pool</td> + <td>3.0.0</td> </tr> </table> @@ -2730,7 +2779,7 @@ Spark subsystems. <td> Executable for executing R scripts in client modes for driver. Ignored in cluster modes. </td> - <td></td> + <td>1.5.3</td> </tr> <tr> <td><code>spark.r.shell.command</code></td> @@ -2739,7 +2788,7 @@ Spark subsystems. Executable for executing sparkR shell in client modes for driver. Ignored in cluster modes. It is the same as environment variable <code>SPARKR_DRIVER_R</code>, but take precedence over it. <code>spark.r.shell.command</code> is used for sparkR shell while <code>spark.r.driver.command</code> is used for running R script. </td> - <td></td> + <td>2.1.0</td> </tr> <tr> <td><code>spark.r.backendConnectionTimeout</code></td> @@ -2771,6 +2820,7 @@ Spark subsystems. Checkpoint interval for graph and message in Pregel. It used to avoid stackOverflowError due to long lineage chains after lots of iterations. The checkpoint is disabled by default. </td> + <td>2.2.0</td> </tr> </table> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
