This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
commit afab532189a4e224f1334172b07e2f927f530a9f Author: beliefer <[email protected]> AuthorDate: Mon Mar 23 11:07:43 2020 +0900 [SPARK-31002][CORE][DOC][FOLLOWUP] Add version information to the configuration of Core ### What changes were proposed in this pull request? This PR follows up #27847, #27852 and https://github.com/apache/spark/pull/27913. I sorted out some information show below. Item name | Since version | JIRA ID | Commit ID | Note -- | -- | -- | -- | -- spark.storage.localDiskByExecutors.cacheSize | 3.0.0 | SPARK-27651 | fd2bf55abaab08798a428d4e47d4050ba2b82a95#diff-6bdad48cfc34314e89599655442ff210 | spark.storage.memoryMapLimitForTests | 2.3.0 | SPARK-3151 | b8ffb51055108fd606b86f034747006962cd2df3#diff-abd96f2ae793cd6ea6aab5b96a3c1d7a | spark.barrier.sync.timeout | 2.4.0 | SPARK-24817 | 388f5a0635a2812cd71b08352e3ddc20293ec189#diff-6bdad48cfc34314e89599655442ff210 | spark.scheduler.blacklist.unschedulableTaskSetTimeout | 2.4.1 | SPARK-22148 | 52e9711d01694158ecb3691f2ec25c0ebe4b0207#diff-6bdad48cfc34314e89599655442ff210 | spark.scheduler.barrier.maxConcurrentTasksCheck.interval | 2.4.0 | SPARK-24819 | bfb74394a5513134ea1da9fcf4a1783b77dd64e4#diff-6bdad48cfc34314e89599655442ff210 | spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures | 2.4.0 | SPARK-24819 | bfb74394a5513134ea1da9fcf4a1783b77dd64e4#diff-6bdad48cfc34314e89599655442ff210 | spark.unsafe.exceptionOnMemoryLeak | 1.4.0 | SPARK-7076 and SPARK-7077 and SPARK-7080 | f49284b5bf3a69ed91a5e3e6e0ed3be93a6ab9e4#diff-5a0de266c82b95adb47d9bca714e1f1b | spark.unsafe.sorter.spill.read.ahead.enabled | 2.3.0 | SPARK-21113 | 1e978b17d63d7ba20368057aa4e65f5ef6e87369#diff-93a086317cea72a113cf81056882c206 | spark.unsafe.sorter.spill.reader.buffer.size | 2.1.0 | SPARK-16862 | c1937dd19a23bd096a4707656c7ba19fb5c16966#diff-93a086317cea72a113cf81056882c206 | spark.plugins | 3.0.0 | SPARK-29397 | d51d228048d519a9a666f48dc532625de13e7587#diff-6bdad48cfc34314e89599655442ff210 | spark.cleaner.periodicGC.interval | 1.6.0 | SPARK-8414 | 72da2a21f0940b97757ace5975535e559d627688#diff-75141521b1d55bc32d72b70032ad96c0 | spark.cleaner.referenceTracking | 1.0.0 | SPARK-1103 | 11eabbe125b2ee572fad359c33c93f5e6fdf0b2d#diff-364713d7776956cb8b0a771e9b62f82d | spark.cleaner.referenceTracking.blocking | 1.0.0 | SPARK-1103 | 11eabbe125b2ee572fad359c33c93f5e6fdf0b2d#diff-364713d7776956cb8b0a771e9b62f82d | spark.cleaner.referenceTracking.blocking.shuffle | 1.1.1 | SPARK-3139 | 5cf1e440137006eedd6846ac8fa57ccf9fd1958d#diff-75141521b1d55bc32d72b70032ad96c0 | spark.cleaner.referenceTracking.cleanCheckpoints | 1.4.0 | SPARK-2033 | 25998e4d73bcc95ac85d9af71adfdc726ec89568#diff-440e866c5df0b8386aff57f9f8bd8db1 | spark.executor.logs.rolling.strategy | 1.1.0 | SPARK-1940 | 4823bf470ec1b47a6f404834d4453e61d3dcbec9#diff-2b4575e096e4db7165e087f9429f2a02 | spark.executor.logs.rolling.time.interval | 1.1.0 | SPARK-1940 | 4823bf470ec1b47a6f404834d4453e61d3dcbec9#diff-2b4575e096e4db7165e087f9429f2a02 | spark.executor.logs.rolling.maxSize | 1.4.0 | SPARK-5932 | 2d222fb39dd978e5a33cde6ceb59307cbdf7b171#diff-529fc5c06b9731c1fbda6f3db60b16aa | spark.executor.logs.rolling.maxRetainedFiles | 1.1.0 | SPARK-1940 | 4823bf470ec1b47a6f404834d4453e61d3dcbec9#diff-2b4575e096e4db7165e087f9429f2a02 | spark.executor.logs.rolling.enableCompression | 2.0.2 | SPARK-17711 | 26e978a93f029e1a1b5c7524d0b52c8141b70997#diff-2b4575e096e4db7165e087f9429f2a02 | spark.master.rest.enabled | 1.3.0 | SPARK-5388 | 6ec0cdc14390d4dc45acf31040f21e1efc476fc0#diff-29dffdccd5a7f4c8b496c293e87c8668 | spark.master.rest.port | 1.3.0 | SPARK-5388 | 6ec0cdc14390d4dc45acf31040f21e1efc476fc0#diff-29dffdccd5a7f4c8b496c293e87c8668 | spark.master.ui.port | 1.1.0 | SPARK-2857 | 12f99cf5f88faf94d9dbfe85cb72d0010a3a25ac#diff-366c88f47e9b5cfa4d4305febeb8b026 | spark.io.compression.snappy.blockSize | 1.4.0 | SPARK-5932 | 2d222fb39dd978e5a33cde6ceb59307cbdf7b171#diff-529fc5c06b9731c1fbda6f3db60b16aa | spark.io.compression.lz4.blockSize | 1.4.0 | SPARK-5932 | 2d222fb39dd978e5a33cde6ceb59307cbdf7b171#diff-529fc5c06b9731c1fbda6f3db60b16aa | spark.io.compression.codec | 0.8.0 | None | 46eecd110a4017ea0c86cbb1010d0ccd6a5eb2ef#diff-df9e6118c481ceb27faa399114fac0a1 | spark.io.compression.zstd.bufferSize | 2.3.0 | SPARK-19112 | 444bce1c98c45147fe63e2132e9743a0c5e49598#diff-df9e6118c481ceb27faa399114fac0a1 | spark.io.compression.zstd.level | 2.3.0 | SPARK-19112 | 444bce1c98c45147fe63e2132e9743a0c5e49598#diff-df9e6118c481ceb27faa399114fac0a1 | spark.io.warning.largeFileThreshold | 3.0.0 | SPARK-28366 | 26d03b62e20d053943d03b5c5573dd349e49654c#diff-6bdad48cfc34314e89599655442ff210 | spark.eventLog.compression.codec | 3.0.0 | SPARK-28118 | 47f54b1ec717d0d744bf3ad46bb1ed3542b667c8#diff-6bdad48cfc34314e89599655442ff210 | spark.buffer.size | 0.5.0 | None | 4b1646a25f7581cecae108553da13833e842e68a#diff-eaf125f56ce786d64dcef99cf446a751 | spark.locality.wait.process | 0.8.0 | None | 46eecd110a4017ea0c86cbb1010d0ccd6a5eb2ef#diff-264da78fe625d594eae59d1adabc8ae9 | spark.locality.wait.node | 0.8.0 | None | 46eecd110a4017ea0c86cbb1010d0ccd6a5eb2ef#diff-264da78fe625d594eae59d1adabc8ae9 | spark.locality.wait.rack | 0.8.0 | None | 46eecd110a4017ea0c86cbb1010d0ccd6a5eb2ef#diff-264da78fe625d594eae59d1adabc8ae9 | spark.reducer.maxSizeInFlight | 1.4.0 | SPARK-5932 | 2d222fb39dd978e5a33cde6ceb59307cbdf7b171#diff-529fc5c06b9731c1fbda6f3db60b16aa | spark.reducer.maxReqsInFlight | 2.0.0 | SPARK-6166 | 894921d813a259f2f266fde7d86d2ecb5a0af24b#diff-eb30a71e0d04150b8e0b64929852e38b | spark.broadcast.compress | 0.6.0 | None | efc5423210d1aadeaea78273a4a8f10425753079#diff-76170a9c8f67b542bc58240a0a12fe08 | spark.broadcast.blockSize | 0.5.0 | None | b8ab7862b8bd168bca60bd930cd97c1099fbc8a8#diff-271d7958e14cdaa46cf3737cfcf51341 | spark.broadcast.checksum | 2.1.1 | SPARK-18188 | 06a56df226aa0c03c21f23258630d8a96385c696#diff-4f43d14923008c6650a8eb7b40c07f74 | spark.broadcast.UDFCompressionThreshold | 3.0.0 | SPARK-28355 | 79e204770300dab4a669b9f8e2421ef905236e7b#diff-6bdad48cfc34314e89599655442ff210 | spark.rdd.compress | 0.6.0 | None | efc5423210d1aadeaea78273a4a8f10425753079#diff-76170a9c8f67b542bc58240a0a12fe08 | spark.rdd.parallelListingThreshold | 2.0.0 | SPARK-9926 | 80a4bfa4d1c86398b90b26c34d8dcbc2355f5a6a#diff-eaababfc87ea4949f97860e8b89b7586 | spark.rdd.limit.scaleUpFactor | 2.1.0 | SPARK-16984 | 806d8a8e980d8ba2f4261bceb393c40bafaa2f73#diff-1d55e54678eff2076263f2fe36150c17 | spark.serializer | 0.5.0 | None | fd1d255821bde844af28e897fabd59a715659038#diff-b920b65c23bf3a1b3326325b0d6a81b2 | spark.serializer.objectStreamReset | 1.0.0 | SPARK-942 | 40566e10aae4b21ffc71ea72702b8df118ac5c8e#diff-6a59dfc43d1b31dc1c3072ceafa829f5 | spark.serializer.extraDebugInfo | 1.3.0 | SPARK-5307 | 636408311deeebd77fb83d2249e0afad1a1ba149#diff-6a59dfc43d1b31dc1c3072ceafa829f5 | spark.jars | 0.9.0 | None | f1d206c6b4c0a5b2517b05af05fdda6049e2f7c2#diff-364713d7776956cb8b0a771e9b62f82d | spark.files | 1.0.0 | None | 29ee101c73bf066bf7f4f8141c475b8d1bd3cf1c#diff-364713d7776956cb8b0a771e9b62f82d | spark.submit.deployMode | 1.5.0 | SPARK-6797 | 7f487c8bde14dbdd244a3493ad11a129ef2bb327#diff-4d2ab44195558d5a9d5f15b8803ef39d | spark.submit.pyFiles | 1.0.1 | SPARK-1549 | d7ddb26e1fa02e773999cc4a97c48d2cd1723956#diff-4d2ab44195558d5a9d5f15b8803ef39d | spark.scheduler.allocation.file | 0.8.1 | None | 976fe60f7609d7b905a34f18743efabd966407f0#diff-9bc0105ee454005379abed710cd20ced | spark.scheduler.minRegisteredResourcesRatio | 1.1.1 | SPARK-2635 | 3311da2f9efc5ff2c7d01273ac08f719b067d11d#diff-7d99a7c7a051e5e851aaaefb275a44a1 | spark.scheduler.maxRegisteredResourcesWaitingTime | 1.1.1 | SPARK-2635 | 3311da2f9efc5ff2c7d01273ac08f719b067d11d#diff-7d99a7c7a051e5e851aaaefb275a44a1 | spark.scheduler.mode | 0.8.0 | None | 98fb69822cf780160bca51abeaab7c82e49fab54#diff-cb7a25b3c9a7341c6d99bcb8e9780c92 | spark.scheduler.revive.interval | 0.8.1 | None | d0c9d41a061969d409715b86a91937d8de4c29f7#diff-7d99a7c7a051e5e851aaaefb275a44a1 | spark.speculation | 0.6.0 | None | e72afdb817bcc8388aeb8b8d31628fd5fd67acf1#diff-4e188f32951dc989d97fa7577858bc7c | spark.speculation.interval | 0.6.0 | None | e72afdb817bcc8388aeb8b8d31628fd5fd67acf1#diff-4e188f32951dc989d97fa7577858bc7c | spark.speculation.multiplier | 0.6.0 | None | e72afdb817bcc8388aeb8b8d31628fd5fd67acf1#diff-fff59f72dfe6ca4ccb607ad12535da07 | spark.speculation.quantile | 0.6.0 | None | e72afdb817bcc8388aeb8b8d31628fd5fd67acf1#diff-fff59f72dfe6ca4ccb607ad12535da07 | spark.speculation.task.duration.threshold | 3.0.0 | SPARK-29976 | ad238a2238a9d0da89be4424574436cbfaee579d#diff-6bdad48cfc34314e89599655442ff210 | spark.yarn.stagingDir | 2.0.0 | SPARK-13063 | bc36df127d3b9f56b4edaeb5eca7697d4aef761a#diff-14b8ed2ef4e3da985300b8d796a38fa9 | spark.buffer.pageSize | 1.5.0 | SPARK-9411 | 1b0099fc62d02ff6216a76fbfe17a4ec5b2f3536#diff-1b22e54318c04824a6d53ed3f4d1bb35 | ### Why are the changes needed? Supplemental configuration version information. ### Does this PR introduce any user-facing change? 'No'. ### How was this patch tested? Exists UT Closes #27931 from beliefer/add-version-to-core-config-part-four. Authored-by: beliefer <[email protected]> Signed-off-by: HyukjinKwon <[email protected]> --- .../org/apache/spark/internal/config/package.scala | 82 ++++++++++++++++++++-- docs/configuration.md | 72 +++++++++++++++---- 2 files changed, 134 insertions(+), 20 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 74a2e0a..f70ee2e 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -1367,6 +1367,7 @@ package object config { "unbounded store. This cache will be used to avoid the network in case of fetching disk " + s"persisted RDD blocks or shuffle blocks " + s"(when `${SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED.key}` is set) from the same host.") + .version("3.0.0") .intConf .createWithDefault(1000) @@ -1374,6 +1375,7 @@ package object config { ConfigBuilder("spark.storage.memoryMapLimitForTests") .internal() .doc("For testing only, controls the size of chunks when memory mapping a file") + .version("2.3.0") .bytesConf(ByteUnit.BYTE) .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) @@ -1383,6 +1385,7 @@ package object config { "coordinator didn't receive all the sync messages from barrier tasks within the " + "configured time, throw a SparkException to fail all the tasks. The default value is set " + "to 31536000(3600 * 24 * 365) so the barrier() call shall wait for one year.") + .version("2.4.0") .timeConf(TimeUnit.SECONDS) .checkValue(v => v > 0, "The value should be a positive time value.") .createWithDefaultString("365d") @@ -1391,6 +1394,7 @@ package object config { ConfigBuilder("spark.scheduler.blacklist.unschedulableTaskSetTimeout") .doc("The timeout in seconds to wait to acquire a new executor and schedule a task " + "before aborting a TaskSet which is unschedulable because of being completely blacklisted.") + .version("2.4.1") .timeConf(TimeUnit.SECONDS) .checkValue(v => v >= 0, "The value should be a non negative time value.") .createWithDefault(120) @@ -1405,6 +1409,7 @@ package object config { "configured max failure times for a job then fail current job submission. Note this " + "config only applies to jobs that contain one or more barrier stages, we won't perform " + "the check on non-barrier jobs.") + .version("2.4.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("15s") @@ -1418,6 +1423,7 @@ package object config { "max failure times for a job then fail current job submission. Note this config only " + "applies to jobs that contain one or more barrier stages, we won't perform the check on " + "non-barrier jobs.") + .version("2.4.0") .intConf .checkValue(v => v > 0, "The max failures should be a positive value.") .createWithDefault(40) @@ -1425,18 +1431,21 @@ package object config { private[spark] val UNSAFE_EXCEPTION_ON_MEMORY_LEAK = ConfigBuilder("spark.unsafe.exceptionOnMemoryLeak") .internal() + .version("1.4.0") .booleanConf .createWithDefault(false) private[spark] val UNSAFE_SORTER_SPILL_READ_AHEAD_ENABLED = ConfigBuilder("spark.unsafe.sorter.spill.read.ahead.enabled") .internal() + .version("2.3.0") .booleanConf .createWithDefault(true) private[spark] val UNSAFE_SORTER_SPILL_READER_BUFFER_SIZE = ConfigBuilder("spark.unsafe.sorter.spill.reader.buffer.size") .internal() + .version("2.1.0") .bytesConf(ByteUnit.BYTE) .checkValue(v => 1024 * 1024 <= v && v <= MAX_BUFFER_SIZE_BYTES, s"The value must be in allowed range [1,048,576, ${MAX_BUFFER_SIZE_BYTES}].") @@ -1449,63 +1458,83 @@ package object config { .withPrepended(DEFAULT_PLUGINS_LIST, separator = ",") .doc("Comma-separated list of class names implementing " + "org.apache.spark.api.plugin.SparkPlugin to load into the application.") + .version("3.0.0") .stringConf .toSequence .createWithDefault(Nil) private[spark] val CLEANER_PERIODIC_GC_INTERVAL = ConfigBuilder("spark.cleaner.periodicGC.interval") + .version("1.6.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("30min") private[spark] val CLEANER_REFERENCE_TRACKING = ConfigBuilder("spark.cleaner.referenceTracking") + .version("1.0.0") .booleanConf .createWithDefault(true) private[spark] val CLEANER_REFERENCE_TRACKING_BLOCKING = ConfigBuilder("spark.cleaner.referenceTracking.blocking") + .version("1.0.0") .booleanConf .createWithDefault(true) private[spark] val CLEANER_REFERENCE_TRACKING_BLOCKING_SHUFFLE = ConfigBuilder("spark.cleaner.referenceTracking.blocking.shuffle") + .version("1.1.1") .booleanConf .createWithDefault(false) private[spark] val CLEANER_REFERENCE_TRACKING_CLEAN_CHECKPOINTS = ConfigBuilder("spark.cleaner.referenceTracking.cleanCheckpoints") + .version("1.4.0") .booleanConf .createWithDefault(false) private[spark] val EXECUTOR_LOGS_ROLLING_STRATEGY = - ConfigBuilder("spark.executor.logs.rolling.strategy").stringConf.createWithDefault("") + ConfigBuilder("spark.executor.logs.rolling.strategy") + .version("1.1.0") + .stringConf + .createWithDefault("") private[spark] val EXECUTOR_LOGS_ROLLING_TIME_INTERVAL = - ConfigBuilder("spark.executor.logs.rolling.time.interval").stringConf.createWithDefault("daily") + ConfigBuilder("spark.executor.logs.rolling.time.interval") + .version("1.1.0") + .stringConf + .createWithDefault("daily") private[spark] val EXECUTOR_LOGS_ROLLING_MAX_SIZE = ConfigBuilder("spark.executor.logs.rolling.maxSize") + .version("1.4.0") .stringConf .createWithDefault((1024 * 1024).toString) private[spark] val EXECUTOR_LOGS_ROLLING_MAX_RETAINED_FILES = - ConfigBuilder("spark.executor.logs.rolling.maxRetainedFiles").intConf.createWithDefault(-1) + ConfigBuilder("spark.executor.logs.rolling.maxRetainedFiles") + .version("1.1.0") + .intConf + .createWithDefault(-1) private[spark] val EXECUTOR_LOGS_ROLLING_ENABLE_COMPRESSION = ConfigBuilder("spark.executor.logs.rolling.enableCompression") + .version("2.0.2") .booleanConf .createWithDefault(false) private[spark] val MASTER_REST_SERVER_ENABLED = ConfigBuilder("spark.master.rest.enabled") + .version("1.3.0") .booleanConf .createWithDefault(false) private[spark] val MASTER_REST_SERVER_PORT = ConfigBuilder("spark.master.rest.port") + .version("1.3.0") .intConf .createWithDefault(6066) private[spark] val MASTER_UI_PORT = ConfigBuilder("spark.master.ui.port") + .version("1.1.0") .intConf .createWithDefault(8080) @@ -1514,6 +1543,7 @@ package object config { .doc("Block size in bytes used in Snappy compression, in the case when " + "Snappy compression codec is used. Lowering this block size " + "will also lower shuffle memory usage when Snappy is used") + .version("1.4.0") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("32k") @@ -1522,6 +1552,7 @@ package object config { .doc("Block size in bytes used in LZ4 compression, in the case when LZ4 compression" + "codec is used. Lowering this block size will also lower shuffle memory " + "usage when LZ4 is used.") + .version("1.4.0") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("32k") @@ -1531,6 +1562,7 @@ package object config { "broadcast variables and shuffle outputs. By default, Spark provides four codecs: " + "lz4, lzf, snappy, and zstd. You can also use fully qualified class names to specify " + "the codec") + .version("0.8.0") .stringConf .createWithDefaultString("lz4") @@ -1540,6 +1572,7 @@ package object config { "compression codec is used. Lowering this size will lower the shuffle " + "memory usage when Zstd is used, but it might increase the compression " + "cost because of excessive JNI call overhead") + .version("2.3.0") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("32k") @@ -1547,6 +1580,7 @@ package object config { ConfigBuilder("spark.io.compression.zstd.level") .doc("Compression level for Zstd compression codec. Increasing the compression " + "level will result in better compression at the expense of more CPU and memory") + .version("2.3.0") .intConf .createWithDefault(1) @@ -1555,6 +1589,7 @@ package object config { .internal() .doc("If the size in bytes of a file loaded by Spark exceeds this threshold, " + "a warning is logged with the possible reasons.") + .version("3.0.0") .bytesConf(ByteUnit.BYTE) .createWithDefault(1024 * 1024 * 1024) @@ -1563,28 +1598,34 @@ package object config { .doc("The codec used to compress event log. By default, Spark provides four codecs: " + "lz4, lzf, snappy, and zstd. You can also use fully qualified class names to specify " + "the codec. If this is not given, spark.io.compression.codec will be used.") + .version("3.0.0") .fallbackConf(IO_COMPRESSION_CODEC) private[spark] val BUFFER_SIZE = ConfigBuilder("spark.buffer.size") + .version("0.5.0") .intConf .checkValue(_ >= 0, "The buffer size must not be negative") .createWithDefault(65536) private[spark] val LOCALITY_WAIT_PROCESS = ConfigBuilder("spark.locality.wait.process") + .version("0.8.0") .fallbackConf(LOCALITY_WAIT) private[spark] val LOCALITY_WAIT_NODE = ConfigBuilder("spark.locality.wait.node") + .version("0.8.0") .fallbackConf(LOCALITY_WAIT) private[spark] val LOCALITY_WAIT_RACK = ConfigBuilder("spark.locality.wait.rack") + .version("0.8.0") .fallbackConf(LOCALITY_WAIT) - private[spark] val REDUCER_MAX_SIZE_IN_FLIGHT = ConfigBuilder("spark.reducer.maxSizeInFlight") + private[spark] val REDUCER_MAX_SIZE_IN_FLIGHT = ConfigBuilder("spark.reducer.maxSizeInFlight") .doc("Maximum size of map outputs to fetch simultaneously from each reduce task, " + "in MiB unless otherwise specified. Since each output requires us to create a " + "buffer to receive it, this represents a fixed memory overhead per reduce task, " + "so keep it small unless you have a large amount of memory") + .version("1.4.0") .bytesConf(ByteUnit.MiB) .createWithDefaultString("48m") @@ -1594,12 +1635,14 @@ package object config { "it might lead to very large number of inbound connections to one or more nodes, " + "causing the workers to fail under load. By allowing it to limit the number of " + "fetch requests, this scenario can be mitigated") + .version("2.0.0") .intConf .createWithDefault(Int.MaxValue) private[spark] val BROADCAST_COMPRESS = ConfigBuilder("spark.broadcast.compress") .doc("Whether to compress broadcast variables before sending them. " + "Generally a good idea. Compression will use spark.io.compression.codec") + .version("0.6.0") .booleanConf.createWithDefault(true) private[spark] val BROADCAST_BLOCKSIZE = ConfigBuilder("spark.broadcast.blockSize") @@ -1607,6 +1650,7 @@ package object config { "KiB unless otherwise specified. Too large a value decreases " + "parallelism during broadcast (makes it slower); however, " + "if it is too small, BlockManager might take a performance hit") + .version("0.5.0") .bytesConf(ByteUnit.KiB) .createWithDefaultString("4m") @@ -1616,12 +1660,15 @@ package object config { "corrupted blocks, at the cost of computing and sending a little " + "more data. It's possible to disable it if the network has other " + "mechanisms to guarantee data won't be corrupted during broadcast") - .booleanConf.createWithDefault(true) + .version("2.1.1") + .booleanConf + .createWithDefault(true) private[spark] val BROADCAST_FOR_UDF_COMPRESSION_THRESHOLD = ConfigBuilder("spark.broadcast.UDFCompressionThreshold") .doc("The threshold at which user-defined functions (UDFs) and Python RDD commands " + "are compressed by broadcast in bytes unless otherwise specified") + .version("3.0.0") .bytesConf(ByteUnit.BYTE) .checkValue(v => v >= 0, "The threshold should be non-negative.") .createWithDefault(1L * 1024 * 1024) @@ -1632,92 +1679,112 @@ package object config { "or StorageLevel.MEMORY_ONLY in Python). Can save substantial " + "space at the cost of some extra CPU time. " + "Compression will use spark.io.compression.codec") - .booleanConf.createWithDefault(false) + .version("0.6.0") + .booleanConf + .createWithDefault(false) private[spark] val RDD_PARALLEL_LISTING_THRESHOLD = ConfigBuilder("spark.rdd.parallelListingThreshold") + .version("2.0.0") .intConf .createWithDefault(10) private[spark] val RDD_LIMIT_SCALE_UP_FACTOR = ConfigBuilder("spark.rdd.limit.scaleUpFactor") + .version("2.1.0") .intConf .createWithDefault(4) private[spark] val SERIALIZER = ConfigBuilder("spark.serializer") + .version("0.5.0") .stringConf .createWithDefault("org.apache.spark.serializer.JavaSerializer") private[spark] val SERIALIZER_OBJECT_STREAM_RESET = ConfigBuilder("spark.serializer.objectStreamReset") + .version("1.0.0") .intConf .createWithDefault(100) private[spark] val SERIALIZER_EXTRA_DEBUG_INFO = ConfigBuilder("spark.serializer.extraDebugInfo") + .version("1.3.0") .booleanConf .createWithDefault(true) private[spark] val JARS = ConfigBuilder("spark.jars") + .version("0.9.0") .stringConf .toSequence .createWithDefault(Nil) private[spark] val FILES = ConfigBuilder("spark.files") + .version("1.0.0") .stringConf .toSequence .createWithDefault(Nil) private[spark] val SUBMIT_DEPLOY_MODE = ConfigBuilder("spark.submit.deployMode") + .version("1.5.0") .stringConf .createWithDefault("client") private[spark] val SUBMIT_PYTHON_FILES = ConfigBuilder("spark.submit.pyFiles") + .version("1.0.1") .stringConf .toSequence .createWithDefault(Nil) private[spark] val SCHEDULER_ALLOCATION_FILE = ConfigBuilder("spark.scheduler.allocation.file") + .version("0.8.1") .stringConf .createOptional private[spark] val SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO = ConfigBuilder("spark.scheduler.minRegisteredResourcesRatio") + .version("1.1.1") .doubleConf .createOptional private[spark] val SCHEDULER_MAX_REGISTERED_RESOURCE_WAITING_TIME = ConfigBuilder("spark.scheduler.maxRegisteredResourcesWaitingTime") + .version("1.1.1") .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("30s") private[spark] val SCHEDULER_MODE = ConfigBuilder("spark.scheduler.mode") + .version("0.8.0") .stringConf .createWithDefault(SchedulingMode.FIFO.toString) private[spark] val SCHEDULER_REVIVE_INTERVAL = ConfigBuilder("spark.scheduler.revive.interval") + .version("0.8.1") .timeConf(TimeUnit.MILLISECONDS) .createOptional private[spark] val SPECULATION_ENABLED = ConfigBuilder("spark.speculation") + .version("0.6.0") .booleanConf .createWithDefault(false) private[spark] val SPECULATION_INTERVAL = ConfigBuilder("spark.speculation.interval") + .version("0.6.0") .timeConf(TimeUnit.MILLISECONDS) .createWithDefault(100) private[spark] val SPECULATION_MULTIPLIER = ConfigBuilder("spark.speculation.multiplier") + .version("0.6.0") .doubleConf .createWithDefault(1.5) private[spark] val SPECULATION_QUANTILE = ConfigBuilder("spark.speculation.quantile") + .version("0.6.0") .doubleConf .createWithDefault(0.75) @@ -1731,16 +1798,19 @@ package object config { "large enough. E.g. tasks might be re-launched if there are enough successful runs " + "even though the threshold hasn't been reached. The number of slots is computed based " + "on the conf values of spark.executor.cores and spark.task.cpus minimum 1.") + .version("3.0.0") .timeConf(TimeUnit.MILLISECONDS) .createOptional private[spark] val STAGING_DIR = ConfigBuilder("spark.yarn.stagingDir") .doc("Staging directory used while submitting applications.") + .version("2.0.0") .stringConf .createOptional private[spark] val BUFFER_PAGESIZE = ConfigBuilder("spark.buffer.pageSize") .doc("The amount of memory used per page in bytes") + .version("1.5.0") .bytesConf(ByteUnit.BYTE) .createOptional diff --git a/docs/configuration.md b/docs/configuration.md index e4ea25c..6d01897 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -361,6 +361,7 @@ of the most common options to set are: Which means to launch driver program locally ("client") or remotely ("cluster") on one of the nodes inside the cluster. </td> + <td>1.5.0</td> </tr> <tr> <td><code>spark.log.callerContext</code></td> @@ -575,6 +576,7 @@ Apart from these, the following properties are also available, and may be useful Sets the number of latest rolling log files that are going to be retained by the system. Older log files will be deleted. Disabled by default. </td> + <td>1.1.0</td> </tr> <tr> <td><code>spark.executor.logs.rolling.enableCompression</code></td> @@ -583,6 +585,7 @@ Apart from these, the following properties are also available, and may be useful Enable executor log compression. If it is enabled, the rolled executor logs will be compressed. Disabled by default. </td> + <td>2.0.2</td> </tr> <tr> <td><code>spark.executor.logs.rolling.maxSize</code></td> @@ -592,6 +595,7 @@ Apart from these, the following properties are also available, and may be useful Rolling is disabled by default. See <code>spark.executor.logs.rolling.maxRetainedFiles</code> for automatic cleaning of old logs. </td> + <td>1.4.0</td> </tr> <tr> <td><code>spark.executor.logs.rolling.strategy</code></td> @@ -603,6 +607,7 @@ Apart from these, the following properties are also available, and may be useful For "size", use <code>spark.executor.logs.rolling.maxSize</code> to set the maximum file size for rolling. </td> + <td>1.1.0</td> </tr> <tr> <td><code>spark.executor.logs.rolling.time.interval</code></td> @@ -613,6 +618,7 @@ Apart from these, the following properties are also available, and may be useful any interval in seconds. See <code>spark.executor.logs.rolling.maxRetainedFiles</code> for automatic cleaning of old logs. </td> + <td>1.1.0</td> </tr> <tr> <td><code>spark.executor.userClassPathFirst</code></td> @@ -692,6 +698,7 @@ Apart from these, the following properties are also available, and may be useful <td> Comma-separated list of files to be placed in the working directory of each executor. Globs are allowed. </td> + <td>1.0.0</td> </tr> <tr> <td><code>spark.submit.pyFiles</code></td> @@ -699,6 +706,7 @@ Apart from these, the following properties are also available, and may be useful <td> Comma-separated list of .zip, .egg, or .py files to place on the PYTHONPATH for Python apps. Globs are allowed. </td> + <td>1.0.1</td> </tr> <tr> <td><code>spark.jars</code></td> @@ -706,6 +714,7 @@ Apart from these, the following properties are also available, and may be useful <td> Comma-separated list of jars to include on the driver and executor classpaths. Globs are allowed. </td> + <td>0.9.0</td> </tr> <tr> <td><code>spark.jars.packages</code></td> @@ -778,7 +787,7 @@ Apart from these, the following properties are also available, and may be useful ### Shuffle Behavior <table class="table"> -<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td><code>spark.reducer.maxSizeInFlight</code></td> <td>48m</td> @@ -788,6 +797,7 @@ Apart from these, the following properties are also available, and may be useful represents a fixed memory overhead per reduce task, so keep it small unless you have a large amount of memory. </td> + <td>1.4.0</td> </tr> <tr> <td><code>spark.reducer.maxReqsInFlight</code></td> @@ -798,6 +808,7 @@ Apart from these, the following properties are also available, and may be useful of inbound connections to one or more nodes, causing the workers to fail under load. By allowing it to limit the number of fetch requests, this scenario can be mitigated. </td> + <td>2.0.0</td> </tr> <tr> <td><code>spark.reducer.maxBlocksInFlightPerAddress</code></td> @@ -998,6 +1009,7 @@ Apart from these, the following properties are also available, and may be useful The codec to compress logged events. If this is not given, <code>spark.io.compression.codec</code> will be used. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.eventLog.erasureCoding.enabled</code></td> @@ -1278,6 +1290,7 @@ Apart from these, the following properties are also available, and may be useful Whether to compress broadcast variables before sending them. Generally a good idea. Compression will use <code>spark.io.compression.codec</code>. </td> + <td>0.6.0</td> </tr> <tr> <td><code>spark.checkpoint.compress</code></td> @@ -1301,6 +1314,7 @@ Apart from these, the following properties are also available, and may be useful <code>org.apache.spark.io.SnappyCompressionCodec</code>, and <code>org.apache.spark.io.ZStdCompressionCodec</code>. </td> + <td>0.8.0</td> </tr> <tr> <td><code>spark.io.compression.lz4.blockSize</code></td> @@ -1310,6 +1324,7 @@ Apart from these, the following properties are also available, and may be useful is used. Lowering this block size will also lower shuffle memory usage when LZ4 is used. Default unit is bytes, unless otherwise specified. </td> + <td>1.4.0</td> </tr> <tr> <td><code>spark.io.compression.snappy.blockSize</code></td> @@ -1319,6 +1334,7 @@ Apart from these, the following properties are also available, and may be useful Lowering this block size will also lower shuffle memory usage when Snappy is used. Default unit is bytes, unless otherwise specified. </td> + <td>1.4.0</td> </tr> <tr> <td><code>spark.io.compression.zstd.level</code></td> @@ -1327,6 +1343,7 @@ Apart from these, the following properties are also available, and may be useful Compression level for Zstd compression codec. Increasing the compression level will result in better compression at the expense of more CPU and memory. </td> + <td>2.3.0</td> </tr> <tr> <td><code>spark.io.compression.zstd.bufferSize</code></td> @@ -1336,6 +1353,7 @@ Apart from these, the following properties are also available, and may be useful is used. Lowering this size will lower the shuffle memory usage when Zstd is used, but it might increase the compression cost because of excessive JNI call overhead. </td> + <td>2.3.0</td> </tr> <tr> <td><code>spark.kryo.classesToRegister</code></td> @@ -1345,7 +1363,7 @@ Apart from these, the following properties are also available, and may be useful with Kryo. See the <a href="tuning.html#data-serialization">tuning guide</a> for more details. </td> - <th>1.2.0</th> + <td>1.2.0</td> </tr> <tr> <td><code>spark.kryo.referenceTracking</code></td> @@ -1356,7 +1374,7 @@ Apart from these, the following properties are also available, and may be useful copies of the same object. Can be disabled to improve performance if you know this is not the case. </td> - <th>0.8.0</th> + <td>0.8.0</td> </tr> <tr> <td><code>spark.kryo.registrationRequired</code></td> @@ -1368,7 +1386,7 @@ Apart from these, the following properties are also available, and may be useful significant performance overhead, so enabling this option can enforce strictly that a user has not omitted classes from registration. </td> - <th>1.1.0</th> + <td>1.1.0</td> </tr> <tr> <td><code>spark.kryo.registrator</code></td> @@ -1382,7 +1400,7 @@ Apart from these, the following properties are also available, and may be useful <code>KryoRegistrator</code></a>. See the <a href="tuning.html#data-serialization">tuning guide</a> for more details. </td> - <th>0.5.0</th> + <td>0.5.0</td> </tr> <tr> <td><code>spark.kryo.unsafe</code></td> @@ -1391,7 +1409,7 @@ Apart from these, the following properties are also available, and may be useful Whether to use unsafe based Kryo serializer. Can be substantially faster by using Unsafe Based IO. </td> - <th>2.1.0</th> + <td>2.1.0</td> </tr> <tr> <td><code>spark.kryoserializer.buffer.max</code></td> @@ -1401,7 +1419,7 @@ Apart from these, the following properties are also available, and may be useful This must be larger than any object you attempt to serialize and must be less than 2048m. Increase this if you get a "buffer limit exceeded" exception inside Kryo. </td> - <th>1.4.0</th> + <td>1.4.0</td> </tr> <tr> <td><code>spark.kryoserializer.buffer</code></td> @@ -1411,7 +1429,7 @@ Apart from these, the following properties are also available, and may be useful Note that there will be one buffer <i>per core</i> on each worker. This buffer will grow up to <code>spark.kryoserializer.buffer.max</code> if needed. </td> - <th>1.4.0</th> + <td>1.4.0</td> </tr> <tr> <td><code>spark.rdd.compress</code></td> @@ -1423,6 +1441,7 @@ Apart from these, the following properties are also available, and may be useful Can save substantial space at the cost of some extra CPU time. Compression will use <code>spark.io.compression.codec</code>. </td> + <td>0.6.0</td> </tr> <tr> <td><code>spark.serializer</code></td> @@ -1438,6 +1457,7 @@ Apart from these, the following properties are also available, and may be useful <a href="api/scala/org/apache/spark/serializer/Serializer.html"> <code>org.apache.spark.Serializer</code></a>. </td> + <td>0.5.0</td> </tr> <tr> <td><code>spark.serializer.objectStreamReset</code></td> @@ -1449,13 +1469,14 @@ Apart from these, the following properties are also available, and may be useful objects to be collected. To turn off this periodic reset set it to -1. By default it will reset the serializer every 100 objects. </td> + <td>1.0.0</td> </tr> </table> ### Memory Management <table class="table"> -<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td><code>spark.memory.fraction</code></td> <td>0.6</td> @@ -1522,6 +1543,7 @@ Apart from these, the following properties are also available, and may be useful on the driver, this may happen very occasionally or not at all. Not cleaning at all may lead to executors running out of disk space after a while. </td> + <td>1.6.0</td> </tr> <tr> <td><code>spark.cleaner.referenceTracking</code></td> @@ -1529,6 +1551,7 @@ Apart from these, the following properties are also available, and may be useful <td> Enables or disables context cleaning. </td> + <td>1.0.0</td> </tr> <tr> <td><code>spark.cleaner.referenceTracking.blocking</code></td> @@ -1537,6 +1560,7 @@ Apart from these, the following properties are also available, and may be useful Controls whether the cleaning thread should block on cleanup tasks (other than shuffle, which is controlled by <code>spark.cleaner.referenceTracking.blocking.shuffle</code> Spark property). </td> + <td>1.0.0</td> </tr> <tr> <td><code>spark.cleaner.referenceTracking.blocking.shuffle</code></td> @@ -1544,6 +1568,7 @@ Apart from these, the following properties are also available, and may be useful <td> Controls whether the cleaning thread should block on shuffle cleanup tasks. </td> + <td>1.1.1</td> </tr> <tr> <td><code>spark.cleaner.referenceTracking.cleanCheckpoints</code></td> @@ -1551,13 +1576,14 @@ Apart from these, the following properties are also available, and may be useful <td> Controls whether to clean checkpoint files if the reference is out of scope. </td> + <td>1.4.0</td> </tr> </table> ### Execution Behavior <table class="table"> -<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td><code>spark.broadcast.blockSize</code></td> <td>4m</td> @@ -1566,6 +1592,7 @@ Apart from these, the following properties are also available, and may be useful specified. Too large a value decreases parallelism during broadcast (makes it slower); however, if it is too small, <code>BlockManager</code> might take a performance hit. </td> + <td>0.5.0</td> </tr> <tr> <td><code>spark.broadcast.checksum</code></td> @@ -1575,6 +1602,7 @@ Apart from these, the following properties are also available, and may be useful help detect corrupted blocks, at the cost of computing and sending a little more data. It's possible to disable it if the network has other mechanisms to guarantee data won't be corrupted during broadcast. </td> + <td>2.1.1</td> </tr> <tr> <td><code>spark.executor.cores</code></td> @@ -1905,14 +1933,14 @@ Apart from these, the following properties are also available, and may be useful For users who enabled external shuffle service, this feature can only work when external shuffle service is at least 2.3.0. </td> - <td></td> + <td>3.0.0</td> </tr> </table> ### Scheduling <table class="table"> -<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td><code>spark.cores.max</code></td> <td>(not set)</td> @@ -1946,6 +1974,7 @@ Apart from these, the following properties are also available, and may be useful Customize the locality wait for node locality. For example, you can set this to 0 to skip node locality and search immediately for rack locality (if your cluster has rack information). </td> + <td>0.8.0</td> </tr> <tr> <td><code>spark.locality.wait.process</code></td> @@ -1954,6 +1983,7 @@ Apart from these, the following properties are also available, and may be useful Customize the locality wait for process locality. This affects tasks that attempt to access cached data in a particular executor process. </td> + <td>0.8.0</td> </tr> <tr> <td><code>spark.locality.wait.rack</code></td> @@ -1961,6 +1991,7 @@ Apart from these, the following properties are also available, and may be useful <td> Customize the locality wait for rack locality. </td> + <td>0.8.0</td> </tr> <tr> <td><code>spark.scheduler.maxRegisteredResourcesWaitingTime</code></td> @@ -1968,6 +1999,7 @@ Apart from these, the following properties are also available, and may be useful <td> Maximum amount of time to wait for resources to register before scheduling begins. </td> + <td>1.1.1</td> </tr> <tr> <td><code>spark.scheduler.minRegisteredResourcesRatio</code></td> @@ -1981,6 +2013,7 @@ Apart from these, the following properties are also available, and may be useful the maximum amount of time it will wait before scheduling begins is controlled by config <code>spark.scheduler.maxRegisteredResourcesWaitingTime</code>. </td> + <td>1.1.1</td> </tr> <tr> <td><code>spark.scheduler.mode</code></td> @@ -1991,6 +2024,7 @@ Apart from these, the following properties are also available, and may be useful to use fair sharing instead of queueing jobs one after another. Useful for multi-user services. </td> + <td>0.8.0</td> </tr> <tr> <td><code>spark.scheduler.revive.interval</code></td> @@ -1998,6 +2032,7 @@ Apart from these, the following properties are also available, and may be useful <td> The interval length for the scheduler to revive the worker resource offers to run tasks. </td> + <td>0.8.1</td> </tr> <tr> <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td> @@ -2063,6 +2098,7 @@ Apart from these, the following properties are also available, and may be useful The timeout in seconds to wait to acquire a new executor and schedule a task before aborting a TaskSet which is unschedulable because of being completely blacklisted. </td> + <td>2.4.1</td> </tr> <tr> <td><code>spark.blacklist.enabled</code></td> @@ -2173,6 +2209,7 @@ Apart from these, the following properties are also available, and may be useful If set to "true", performs speculative execution of tasks. This means if one or more tasks are running slowly in a stage, they will be re-launched. </td> + <td>0.6.0</td> </tr> <tr> <td><code>spark.speculation.interval</code></td> @@ -2180,6 +2217,7 @@ Apart from these, the following properties are also available, and may be useful <td> How often Spark will check for tasks to speculate. </td> + <td>0.6.0</td> </tr> <tr> <td><code>spark.speculation.multiplier</code></td> @@ -2187,6 +2225,7 @@ Apart from these, the following properties are also available, and may be useful <td> How many times slower a task is than the median to be considered for speculation. </td> + <td>0.6.0</td> </tr> <tr> <td><code>spark.speculation.quantile</code></td> @@ -2194,6 +2233,7 @@ Apart from these, the following properties are also available, and may be useful <td> Fraction of tasks which must be complete before speculation is enabled for a particular stage. </td> + <td>0.6.0</td> </tr> <tr> <td><code>spark.speculation.task.duration.threshold</code></td> @@ -2208,6 +2248,7 @@ Apart from these, the following properties are also available, and may be useful the conf values of spark.executor.cores and spark.task.cpus minimum 1. Default unit is bytes, unless otherwise specified. </td> + <td>3.0.0</td> </tr> <tr> <td><code>spark.task.cpus</code></td> @@ -2301,7 +2342,7 @@ Apart from these, the following properties are also available, and may be useful ### Barrier Execution Mode <table class="table"> -<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td><code>spark.barrier.sync.timeout</code></td> <td>365d</td> @@ -2311,6 +2352,7 @@ Apart from these, the following properties are also available, and may be useful configured time, throw a SparkException to fail all the tasks. The default value is set to 31536000(3600 * 24 * 365) so the <code>barrier()</code> call shall wait for one year. </td> + <td>2.4.0</td> </tr> <tr> <td><code>spark.scheduler.barrier.maxConcurrentTasksCheck.interval</code></td> @@ -2325,6 +2367,7 @@ Apart from these, the following properties are also available, and may be useful config only applies to jobs that contain one or more barrier stages, we won't perform the check on non-barrier jobs. </td> + <td>2.4.0</td> </tr> <tr> <td><code>spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures</code></td> @@ -2339,13 +2382,14 @@ Apart from these, the following properties are also available, and may be useful applies to jobs that contain one or more barrier stages, we won't perform the check on non-barrier jobs. </td> + <td>2.4.0</td> </tr> </table> ### Dynamic Allocation <table class="table"> -<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td><code>spark.dynamicAllocation.enabled</code></td> <td>false</td> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
