[GitHub] [druid] FrankChen021 commented on a change in pull request #10635: Add SQL functions to format numbers into human readable format

GitBox Mon, 04 Jan 2021 21:41:23 -0800


FrankChen021 commented on a change in pull request #10635:
URL: https://github.com/apache/druid/pull/10635#discussion_r551727213




##########
File path: core/src/test/java/org/apache/druid/math/expr/FunctionTest.java
##########
@@ -519,6 +525,203 @@ public void testLeast()
     assertExpr("least(1, null, 'A')", "1");
   }
 
+  @Test
+  public void testSizeFormat()
+  {
+    assertExpr("binary_byte_format(-1024)", "-1.00 KiB");
+    assertExpr("binary_byte_format(1024)", "1.00 KiB");
+    assertExpr("binary_byte_format(1024*1024)", "1.00 MiB");
+    assertExpr("binary_byte_format(1024*1024*1024)", "1.00 GiB");
+    assertExpr("binary_byte_format(1024*1024*1024*1024)", "1.00 TiB");
+    assertExpr("binary_byte_format(1024*1024*1024*1024*1024)", "1.00 PiB");
+
+    assertExpr("decimal_byte_format(-1000)", "-1.00 KB");
+    assertExpr("decimal_byte_format(1000)", "1.00 KB");
+    assertExpr("decimal_byte_format(1000*1000)", "1.00 MB");
+    assertExpr("decimal_byte_format(1000*1000*1000)", "1.00 GB");
+    assertExpr("decimal_byte_format(1000*1000*1000*1000)", "1.00 TB");
+
+    assertExpr("decimal_format(-1000)", "-1.00 K");
+    assertExpr("decimal_format(1000)", "1.00 K");
+    assertExpr("decimal_format(1000*1000)", "1.00 M");
+    assertExpr("decimal_format(1000*1000*1000)", "1.00 G");
+    assertExpr("decimal_format(1000*1000*1000*1000)", "1.00 T");
+  }
+
+  @Test
+  public void testSizeFormatWithDifferentPrecision()
+  {
+    assertExpr("binary_byte_format(1024, 0)", "1 KiB");
+    assertExpr("binary_byte_format(1024*1024, 1)", "1.0 MiB");
+    assertExpr("binary_byte_format(1024*1024*1024, 2)", "1.00 GiB");
+    assertExpr("binary_byte_format(1024*1024*1024*1024, 3)", "1.000 TiB");
+
+    assertExpr("decimal_byte_format(1234, 0)", "1 KB");
+    assertExpr("decimal_byte_format(1234*1000, 1)", "1.2 MB");
+    assertExpr("decimal_byte_format(1234*1000*1000, 2)", "1.23 GB");
+    assertExpr("decimal_byte_format(1234*1000*1000*1000, 3)", "1.234 TB");
+
+    assertExpr("decimal_format(1234, 0)", "1 K");
+    assertExpr("decimal_format(1234*1000,1)", "1.2 M");
+    assertExpr("decimal_format(1234*1000*1000,2)", "1.23 G");
+    assertExpr("decimal_format(1234*1000*1000*1000,3)", "1.234 T");
+  }
+
+  @Test
+  public void testSizeFormatWithEdgeCases()
+  {
+    //a nonexist value is null which is treated as 0
+    assertExpr("binary_byte_format(nonexist)", "0 B");
+
+    //f = 12.34
+    assertExpr("binary_byte_format(f)", "12 B");
+
+    //nan is Double.NaN
+    assertExpr("binary_byte_format(nan)", "0 B");
+
+    //inf = Double.POSITIVE_INFINITY
+    assertExpr("binary_byte_format(inf)", "8.00 EiB");
+
+    //inf = Double.NEGATIVE_INFINITY
+    assertExpr("binary_byte_format(-inf)", "-8.00 EiB");
+
+    // o = 0
+    assertExpr("binary_byte_format(o)", "0 B");
+
+    // od = 0D
+    assertExpr("binary_byte_format(od)", "0 B");
+
+    // of = 0F
+    assertExpr("binary_byte_format(of)", "0 B");
+  }
+
+  @Test
+  public void testSizeForatInvalidArgumentType()
+  {
+    try {
+      //x = "foo"
+      Parser.parse("binary_byte_format(x)", ExprMacroTable.nil())
+            .eval(bindings);
+
+      //must not go to here
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Function[binary_byte_format] needs a number as its 
first argument", e.getMessage());
+    }
+
+    try {
+      //x = "foo"
+      Parser.parse("binary_byte_format(1024, x)", ExprMacroTable.nil())
+            .eval(bindings);
+
+      //must not go to here
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Function[binary_byte_format] needs an integer as 
its second argument", e.getMessage());
+    }
+
+    try {
+      //of = 0F
+      Parser.parse("binary_byte_format(1024, of)", ExprMacroTable.nil())
+            .eval(bindings);
+
+      //must not go to here
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Function[binary_byte_format] needs an integer as 
its second argument", e.getMessage());
+    }
+
+    try {
+      //of = 0F
+      Parser.parse("binary_byte_format(1024, nonexist)", ExprMacroTable.nil())
+            .eval(bindings);
+
+      //must not go to here
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Function[binary_byte_format] needs an integer as 
its second argument", e.getMessage());
+    }
+  }
+
+  @Test
+  public void testSizeFormatInvalidPrecision()
+  {
+    try {
+      Parser.parse("binary_byte_format(1024, maxLong)", ExprMacroTable.nil())
+            .eval(bindings);
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Given precision[9223372036854775807] of 
Function[binary_byte_format] must be in the range of [0,3]", e.getMessage());
+    }
+
+    try {
+      Parser.parse("binary_byte_format(1024, minLong)", ExprMacroTable.nil())
+            .eval(bindings);
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Given precision[-9223372036854775808] of 
Function[binary_byte_format] must be in the range of [0,3]", e.getMessage());
+    }
+
+    try {
+      Parser.parse("binary_byte_format(1024, -1)", ExprMacroTable.nil())
+            .eval(bindings);
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Given precision[-1] of Function[binary_byte_format] 
must be in the range of [0,3]", e.getMessage());
+    }
+
+    try {
+      Parser.parse("binary_byte_format(1024, 4)", ExprMacroTable.nil())
+            .eval(bindings);
+      Assert.assertTrue(false);
+    }
+    catch (IAE e) {
+      Assert.assertEquals("Given precision[4] of Function[binary_byte_format] 
must be in the range of [0,3]", e.getMessage());
+    }
+  }
+
+  @Test
+  public void testSizeFormatInvalidArgumentSize()
+  {
+    expectedException.expect(IAE.class);
+    expectedException.expectMessage("Function[binary_byte_format] needs 1 or 2 
arguments");
+    Parser.parse("binary_byte_format(1024, 2, 3)", ExprMacroTable.nil())
+          .eval(bindings);
+  }
+
+  @Test
+  public void testSizeFormatWithNoDefaultValueForNull()
+  {
+    NullHandling.updateForTests(false);

Review comment:
       MySQL ships a similar function 
[format_bytes](https://dev.mysql.com/doc/refman/5.7/en/sys-format-bytes.html)
   
   I struggled the two different approaches at first. And at last I chose to do 
it by 3 different functions. The reasons are,
   
   1. different function names are more meaningful than different arguments for 
one function. Since there're 3 different unit systems in this PR, how to name 
them in a short enough way and without ambiguity is a big challenge. For 
example,  `FORMAT(number, 'si')`, `FORMAT(number, 'dec')`, `si` and `dec` are 
standard abbreviation and short enough but they're hard to understand; 
`FORMAT(number, 'binary_byte')`, it's clear enough, but it's not so simple 
compared to `binary_byte_format(number)`
   
   2. at the underlying layer, there are always different format functions, and 
if we provide one function at the user side, we have to do some checks on the 
format specifier and dispatch calls to those different functions. It's a little 
bit simple if different functions are provided.
   
   But as you mentioned, there are also some drawbacks in this way. If the 
standard is to keep consistent with other databases or keep less numbers of 
functions exposed to users, maybe we need to combine these functions together.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [druid] FrankChen021 commented on a change in pull request #10635: Add SQL functions to format numbers into human readable format

Reply via email to