This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f30bf06540af [SPARK-55051][CORE] Byte string accepts KiB, MiB, GiB,
TiB, PiB
f30bf06540af is described below
commit f30bf06540af08e675cfdcf1e5c49e1a47b84586
Author: Cheng Pan <[email protected]>
AuthorDate: Mon Jan 19 11:30:21 2026 +0800
[SPARK-55051][CORE] Byte string accepts KiB, MiB, GiB, TiB, PiB
### What changes were proposed in this pull request?
This PR enhances `JavaUtils.byteStringAs` to support parsing the input
string that has suffixes `Ki`, `KiB`, `Mi`, `MiB`, and so on, which allows
users to use, for example, `2GiB`, as the value of byte type configurations.
### Why are the changes needed?
Strictly speaking, `1KB = 1000B` and `1KiB = 1024B`, while currently, Spark
only accepts `1K` or `1KB` and interprets it as `1KiB`.
I'm not intending to "correct" it, but I think it should at least accept
`1Ki` or `1KiB` as input, which usually gets complain by users who are familiar
with K8s, as suffix `Mi`, `GiB` are widely used in the K8s ecosystem.
### Does this PR introduce _any_ user-facing change?
Yes, users are allowed to use `1Ki`, `2MiB`, etc. as the value of byte type
configurations.
### How was this patch tested?
UTs are added.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #53816 from pan3793/SPARK-55051.
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
.../java/org/apache/spark/network/util/JavaUtils.java | 12 +++++++++++-
.../src/test/scala/org/apache/spark/util/UtilsSuite.scala | 15 +++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git
a/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
index b222a936531d..b106ad001d93 100644
---
a/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++
b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -401,15 +401,25 @@ public class JavaUtils {
byteSuffixes = Map.ofEntries(
Map.entry("b", ByteUnit.BYTE),
Map.entry("k", ByteUnit.KiB),
+ Map.entry("ki", ByteUnit.KiB),
Map.entry("kb", ByteUnit.KiB),
+ Map.entry("kib", ByteUnit.KiB),
Map.entry("m", ByteUnit.MiB),
+ Map.entry("mi", ByteUnit.MiB),
Map.entry("mb", ByteUnit.MiB),
+ Map.entry("mib", ByteUnit.MiB),
Map.entry("g", ByteUnit.GiB),
+ Map.entry("gi", ByteUnit.GiB),
Map.entry("gb", ByteUnit.GiB),
+ Map.entry("gib", ByteUnit.GiB),
Map.entry("t", ByteUnit.TiB),
+ Map.entry("ti", ByteUnit.TiB),
Map.entry("tb", ByteUnit.TiB),
+ Map.entry("tib", ByteUnit.TiB),
Map.entry("p", ByteUnit.PiB),
- Map.entry("pb", ByteUnit.PiB));
+ Map.entry("pi", ByteUnit.PiB),
+ Map.entry("pb", ByteUnit.PiB),
+ Map.entry("pib", ByteUnit.PiB));
}
private static final Pattern TIME_STRING_PATTERN =
Pattern.compile("(-?[0-9]+)([a-z]+)?");
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 61952c401853..1cc16f206146 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -134,10 +134,25 @@ class UtilsSuite extends SparkFunSuite with
ResetSystemProperties {
assert(Utils.byteStringAsBytes("1") === 1)
assert(Utils.byteStringAsBytes("1k") === ByteUnit.KiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1Ki") === ByteUnit.KiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1KB") === ByteUnit.KiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1KiB") === ByteUnit.KiB.toBytes(1))
assert(Utils.byteStringAsBytes("1m") === ByteUnit.MiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1Mi") === ByteUnit.MiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1Mb") === ByteUnit.MiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1MiB") === ByteUnit.MiB.toBytes(1))
assert(Utils.byteStringAsBytes("1g") === ByteUnit.GiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1GI") === ByteUnit.GiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1gb") === ByteUnit.GiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1giB") === ByteUnit.GiB.toBytes(1))
assert(Utils.byteStringAsBytes("1t") === ByteUnit.TiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1Ti") === ByteUnit.TiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1tb") === ByteUnit.TiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1Tib") === ByteUnit.TiB.toBytes(1))
assert(Utils.byteStringAsBytes("1p") === ByteUnit.PiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1pi") === ByteUnit.PiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1pb") === ByteUnit.PiB.toBytes(1))
+ assert(Utils.byteStringAsBytes("1pib") === ByteUnit.PiB.toBytes(1))
// Overflow handling, 1073741824p exceeds Long.MAX_VALUE if converted
straight to Bytes
// This demonstrates that we can have e.g 1024^3 PiB without overflowing.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]