This is an automated email from the ASF dual-hosted git repository.
ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new 1455b6e2f [CELEBORN-1860] Remove unused
celeborn.<module>.io.enableVerboseMetrics option
1455b6e2f is described below
commit 1455b6e2f3ec75973adbb815a000e200fbe264b9
Author: Nicholas Jiang <[email protected]>
AuthorDate: Wed Feb 12 11:42:26 2025 +0800
[CELEBORN-1860] Remove unused celeborn.<module>.io.enableVerboseMetrics
option
### What changes were proposed in this pull request?
Remove unused `celeborn.<module>.io.enableVerboseMetrics` option.
### Why are the changes needed?
`celeborn.<module>.io.enableVerboseMetrics` option is unused, which could
be replaced with `celeborn.network.memory.allocator.verbose.metric`.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
`CelebornConfSuite`
Closes #3094 from SteNicholas/CELEBORN-1860.
Authored-by: Nicholas Jiang <[email protected]>
Signed-off-by: mingji <[email protected]>
---
.../apache/celeborn/common/network/util/TransportConf.java | 8 --------
.../scala/org/apache/celeborn/common/CelebornConf.scala | 12 ------------
.../org/apache/celeborn/common/CelebornConfSuite.scala | 13 +++++--------
docs/configuration/network.md | 1 -
4 files changed, 5 insertions(+), 29 deletions(-)
diff --git
a/common/src/main/java/org/apache/celeborn/common/network/util/TransportConf.java
b/common/src/main/java/org/apache/celeborn/common/network/util/TransportConf.java
index 36fcf363a..145060805 100644
---
a/common/src/main/java/org/apache/celeborn/common/network/util/TransportConf.java
+++
b/common/src/main/java/org/apache/celeborn/common/network/util/TransportConf.java
@@ -136,14 +136,6 @@ public class TransportConf {
return celebornConf.networkIoLazyFileDescriptor(module);
}
- /**
- * Whether to track Netty memory detailed metrics. If true, the detailed
metrics of Netty
- * PoolByteBufAllocator will be gotten, otherwise only general memory usage
will be tracked.
- */
- public boolean verboseMetrics() {
- return celebornConf.networkIoVerboseMetrics(module);
- }
-
public CelebornConf getCelebornConf() {
return celebornConf;
}
diff --git
a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
index 144de1dac..2359c57a2 100644
--- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
@@ -601,10 +601,6 @@ class CelebornConf(loadDefaults: Boolean) extends
Cloneable with Logging with Se
getTransportConfBoolean(module, NETWORK_IO_LAZY_FD)
}
- def networkIoVerboseMetrics(module: String): Boolean = {
- getTransportConfBoolean(module, NETWORK_VERBOSE_METRICS)
- }
-
def networkShareMemoryAllocator: Boolean =
get(NETWORK_MEMORY_ALLOCATOR_SHARE)
def networkMemoryAllocatorAllowCache: Boolean =
@@ -2163,14 +2159,6 @@ object CelebornConf extends Logging {
.booleanConf
.createWithDefault(true)
- val NETWORK_VERBOSE_METRICS: ConfigEntry[Boolean] =
- buildConf("celeborn.<module>.io.enableVerboseMetrics")
- .categories("network")
- .doc("Whether to track Netty memory detailed metrics. If true, the
detailed metrics of Netty " +
- "PoolByteBufAllocator will be gotten, otherwise only general memory
usage will be tracked.")
- .booleanConf
- .createWithDefault(false)
-
val NETWORK_IO_STORAGE_MEMORY_MAP_THRESHOLD: ConfigEntry[Long] =
buildConf("celeborn.<module>.storage.memoryMapThreshold")
.withAlternative("celeborn.storage.memoryMapThreshold")
diff --git
a/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala
b/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala
index 42783cb6d..9d96c4fb3 100644
--- a/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala
+++ b/common/src/test/scala/org/apache/celeborn/common/CelebornConfSuite.scala
@@ -293,7 +293,6 @@ class CelebornConfSuite extends CelebornFunSuite {
private val transportTestNetworkIoStorageMemoryMapThreshold =
NETWORK_IO_STORAGE_MEMORY_MAP_THRESHOLD.defaultValue.get + 5
private val transportTestNetworkIoLazyFd =
!NETWORK_IO_LAZY_FD.defaultValue.get
- private val transportTestNetworkVerboseMetrics =
!NETWORK_VERBOSE_METRICS.defaultValue.get
private val transportTestChannelHeartbeatInterval =
CHANNEL_HEARTBEAT_INTERVAL.defaultValue.get + 5
private val transportTestPushTimeoutCheckThreads =
PUSH_TIMEOUT_CHECK_THREADS.defaultValue.get + 5
@@ -334,7 +333,6 @@ class CelebornConfSuite extends CelebornFunSuite {
moduleKey(NETWORK_IO_STORAGE_MEMORY_MAP_THRESHOLD),
transportTestNetworkIoStorageMemoryMapThreshold.toString)
conf.set(moduleKey(NETWORK_IO_LAZY_FD),
transportTestNetworkIoLazyFd.toString)
- conf.set(moduleKey(NETWORK_VERBOSE_METRICS),
transportTestNetworkVerboseMetrics.toString)
conf.set(moduleKey(CHANNEL_HEARTBEAT_INTERVAL),
transportTestChannelHeartbeatInterval.toString)
conf.set(moduleKey(PUSH_TIMEOUT_CHECK_THREADS),
transportTestPushTimeoutCheckThreads.toString)
conf.set(moduleKey(PUSH_TIMEOUT_CHECK_INTERVAL),
transportTestPushTimeoutCheckInterval.toString)
@@ -347,7 +345,7 @@ class CelebornConfSuite extends CelebornFunSuite {
conf
}
- private def validateDefauitTransportConfValue(conf: CelebornConf, module:
String): Unit = {
+ private def validateDefaultTransportConfValue(conf: CelebornConf, module:
String): Unit = {
assert(transportTestNetworkIoMode == conf.networkIoMode(module))
assert(transportTestNetworkIoPreferDirectBufs ==
conf.networkIoPreferDirectBufs(module))
assert(transportTestNetworkIoConnectTimeout ==
conf.networkIoConnectTimeoutMs(module))
@@ -363,7 +361,6 @@ class CelebornConfSuite extends CelebornFunSuite {
assert(transportTestNetworkIoRetryWait ==
conf.networkIoRetryWaitMs(module))
assert(transportTestNetworkIoStorageMemoryMapThreshold ==
conf.networkIoMemoryMapBytes(module))
assert(transportTestNetworkIoLazyFd ==
conf.networkIoLazyFileDescriptor(module))
- assert(transportTestNetworkVerboseMetrics ==
conf.networkIoVerboseMetrics(module))
assert(transportTestChannelHeartbeatInterval ==
conf.clientHeartbeatInterval(module))
assert(transportTestPushTimeoutCheckThreads ==
conf.pushDataTimeoutCheckerThreads(module))
assert(transportTestPushTimeoutCheckInterval ==
conf.pushDataTimeoutCheckInterval(module))
@@ -374,20 +371,20 @@ class CelebornConfSuite extends CelebornFunSuite {
test("Basic fetch module config") {
val conf = setupCelebornConfForTransportTests("test")
- validateDefauitTransportConfValue(conf, "test")
+ validateDefaultTransportConfValue(conf, "test")
}
test("Fallback to parent module's config for transport conf when not defined
for module") {
val conf = setupCelebornConfForTransportTests("test_parent_module")
// set in parent, but should work in child
- validateDefauitTransportConfValue(conf, "test_child_module")
+ validateDefaultTransportConfValue(conf, "test_child_module")
}
test("rpc_service and rpc_client should default to rpc if not configured") {
val conf = setupCelebornConfForTransportTests("rpc")
// set in rpc, so should work for specific rpc servers
- validateDefauitTransportConfValue(conf, "rpc_service")
- validateDefauitTransportConfValue(conf, "rpc_app")
+ validateDefaultTransportConfValue(conf, "rpc_service")
+ validateDefaultTransportConfValue(conf, "rpc_app")
}
test("Test fallback config works even with parent") {
diff --git a/docs/configuration/network.md b/docs/configuration/network.md
index 3f9c0bc9a..c958451ba 100644
--- a/docs/configuration/network.md
+++ b/docs/configuration/network.md
@@ -27,7 +27,6 @@ license: |
| celeborn.<module>.io.conflictAvoidChooser.enable | false | false |
Whether to use conflict avoid event executor chooser in the client thread pool.
If setting <module> to `rpc_app`, works for shuffle client. If setting <module>
to `rpc_service`, works for master or worker. If setting <module> to `data`, it
works for shuffle client push and fetch data. If setting <module> to
`replicate`, it works for replicate client of worker replicating data to peer
worker. | | |
| celeborn.<module>.io.connectTimeout | <value of
celeborn.network.connect.timeout> | false | Socket connect timeout. If
setting <module> to `rpc_app`, works for shuffle client. If setting <module> to
`rpc_service`, works for master or worker. If setting <module> to `data`, it
works for shuffle client push and fetch data. If setting <module> to
`replicate`, it works for the replicate client of worker replicating data to
peer worker. | | |
| celeborn.<module>.io.connectionTimeout | <value of
celeborn.network.timeout> | false | Connection active timeout. If setting
<module> to `rpc_app`, works for shuffle client. If setting <module> to
`rpc_service`, works for master or worker. If setting <module> to `data`, it
works for shuffle client push and fetch data. If setting <module> to `push`, it
works for worker receiving push data. If setting <module> to `replicate`, it
works for replicate server or client of worker [...]
-| celeborn.<module>.io.enableVerboseMetrics | false | false | Whether to
track Netty memory detailed metrics. If true, the detailed metrics of Netty
PoolByteBufAllocator will be gotten, otherwise only general memory usage will
be tracked. | | |
| celeborn.<module>.io.lazyFD | true | false | Whether to initialize
FileDescriptor lazily or not. If true, file descriptors are created only when
data is going to be transferred. This can reduce the number of open files. If
setting <module> to `fetch`, it works for worker fetch server. | | |
| celeborn.<module>.io.maxRetries | 3 | false | Max number of times we
will try IO exceptions (such as connection timeouts) per request. If set to 0,
we will not do any retries. If setting <module> to `data`, it works for shuffle
client push and fetch data. If setting <module> to `replicate`, it works for
replicate client of worker replicating data to peer worker. If setting <module>
to `push`, it works for Flink shuffle client push data. | | |
| celeborn.<module>.io.mode | <undefined> | false | Netty
EventLoopGroup backend, available options: NIO, EPOLL. If epoll mode is
available, the default IO mode is EPOLL; otherwise, the default is NIO. | | |