This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f30bf06540af [SPARK-55051][CORE] Byte string accepts KiB, MiB, GiB, 
TiB, PiB
f30bf06540af is described below

commit f30bf06540af08e675cfdcf1e5c49e1a47b84586
Author: Cheng Pan <[email protected]>
AuthorDate: Mon Jan 19 11:30:21 2026 +0800

    [SPARK-55051][CORE] Byte string accepts KiB, MiB, GiB, TiB, PiB
    
    ### What changes were proposed in this pull request?
    
    This PR enhances `JavaUtils.byteStringAs` to support parsing the input 
string that has suffixes `Ki`, `KiB`, `Mi`, `MiB`, and so on, which allows 
users to use, for example, `2GiB`, as the value of byte type configurations.
    
    ### Why are the changes needed?
    
    Strictly speaking, `1KB = 1000B` and `1KiB = 1024B`, while currently, Spark 
only accepts `1K` or `1KB` and interprets it as `1KiB`.
    
    I'm not intending to "correct" it, but I think it should at least accept 
`1Ki` or `1KiB` as input, which usually gets complain by users who are familiar 
with K8s, as suffix `Mi`, `GiB` are widely used in the K8s ecosystem.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, users are allowed to use `1Ki`, `2MiB`, etc. as the value of byte type 
configurations.
    
    ### How was this patch tested?
    
    UTs are added.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #53816 from pan3793/SPARK-55051.
    
    Authored-by: Cheng Pan <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
---
 .../java/org/apache/spark/network/util/JavaUtils.java     | 12 +++++++++++-
 .../src/test/scala/org/apache/spark/util/UtilsSuite.scala | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git 
a/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java 
b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
index b222a936531d..b106ad001d93 100644
--- 
a/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ 
b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -401,15 +401,25 @@ public class JavaUtils {
     byteSuffixes = Map.ofEntries(
       Map.entry("b", ByteUnit.BYTE),
       Map.entry("k", ByteUnit.KiB),
+      Map.entry("ki", ByteUnit.KiB),
       Map.entry("kb", ByteUnit.KiB),
+      Map.entry("kib", ByteUnit.KiB),
       Map.entry("m", ByteUnit.MiB),
+      Map.entry("mi", ByteUnit.MiB),
       Map.entry("mb", ByteUnit.MiB),
+      Map.entry("mib", ByteUnit.MiB),
       Map.entry("g", ByteUnit.GiB),
+      Map.entry("gi", ByteUnit.GiB),
       Map.entry("gb", ByteUnit.GiB),
+      Map.entry("gib", ByteUnit.GiB),
       Map.entry("t", ByteUnit.TiB),
+      Map.entry("ti", ByteUnit.TiB),
       Map.entry("tb", ByteUnit.TiB),
+      Map.entry("tib", ByteUnit.TiB),
       Map.entry("p", ByteUnit.PiB),
-      Map.entry("pb", ByteUnit.PiB));
+      Map.entry("pi", ByteUnit.PiB),
+      Map.entry("pb", ByteUnit.PiB),
+      Map.entry("pib", ByteUnit.PiB));
   }
 
   private static final Pattern TIME_STRING_PATTERN = 
Pattern.compile("(-?[0-9]+)([a-z]+)?");
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 61952c401853..1cc16f206146 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -134,10 +134,25 @@ class UtilsSuite extends SparkFunSuite with 
ResetSystemProperties {
 
     assert(Utils.byteStringAsBytes("1") === 1)
     assert(Utils.byteStringAsBytes("1k") === ByteUnit.KiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1Ki") === ByteUnit.KiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1KB") === ByteUnit.KiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1KiB") === ByteUnit.KiB.toBytes(1))
     assert(Utils.byteStringAsBytes("1m") === ByteUnit.MiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1Mi") === ByteUnit.MiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1Mb") === ByteUnit.MiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1MiB") === ByteUnit.MiB.toBytes(1))
     assert(Utils.byteStringAsBytes("1g") === ByteUnit.GiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1GI") === ByteUnit.GiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1gb") === ByteUnit.GiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1giB") === ByteUnit.GiB.toBytes(1))
     assert(Utils.byteStringAsBytes("1t") === ByteUnit.TiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1Ti") === ByteUnit.TiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1tb") === ByteUnit.TiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1Tib") === ByteUnit.TiB.toBytes(1))
     assert(Utils.byteStringAsBytes("1p") === ByteUnit.PiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1pi") === ByteUnit.PiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1pb") === ByteUnit.PiB.toBytes(1))
+    assert(Utils.byteStringAsBytes("1pib") === ByteUnit.PiB.toBytes(1))
 
     // Overflow handling, 1073741824p exceeds Long.MAX_VALUE if converted 
straight to Bytes
     // This demonstrates that we can have e.g 1024^3 PiB without overflowing.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to