FrankChen021 commented on a change in pull request #10635:
URL: https://github.com/apache/druid/pull/10635#discussion_r551727213
##########
File path: core/src/test/java/org/apache/druid/math/expr/FunctionTest.java
##########
@@ -519,6 +525,203 @@ public void testLeast()
assertExpr("least(1, null, 'A')", "1");
}
+ @Test
+ public void testSizeFormat()
+ {
+ assertExpr("binary_byte_format(-1024)", "-1.00 KiB");
+ assertExpr("binary_byte_format(1024)", "1.00 KiB");
+ assertExpr("binary_byte_format(1024*1024)", "1.00 MiB");
+ assertExpr("binary_byte_format(1024*1024*1024)", "1.00 GiB");
+ assertExpr("binary_byte_format(1024*1024*1024*1024)", "1.00 TiB");
+ assertExpr("binary_byte_format(1024*1024*1024*1024*1024)", "1.00 PiB");
+
+ assertExpr("decimal_byte_format(-1000)", "-1.00 KB");
+ assertExpr("decimal_byte_format(1000)", "1.00 KB");
+ assertExpr("decimal_byte_format(1000*1000)", "1.00 MB");
+ assertExpr("decimal_byte_format(1000*1000*1000)", "1.00 GB");
+ assertExpr("decimal_byte_format(1000*1000*1000*1000)", "1.00 TB");
+
+ assertExpr("decimal_format(-1000)", "-1.00 K");
+ assertExpr("decimal_format(1000)", "1.00 K");
+ assertExpr("decimal_format(1000*1000)", "1.00 M");
+ assertExpr("decimal_format(1000*1000*1000)", "1.00 G");
+ assertExpr("decimal_format(1000*1000*1000*1000)", "1.00 T");
+ }
+
+ @Test
+ public void testSizeFormatWithDifferentPrecision()
+ {
+ assertExpr("binary_byte_format(1024, 0)", "1 KiB");
+ assertExpr("binary_byte_format(1024*1024, 1)", "1.0 MiB");
+ assertExpr("binary_byte_format(1024*1024*1024, 2)", "1.00 GiB");
+ assertExpr("binary_byte_format(1024*1024*1024*1024, 3)", "1.000 TiB");
+
+ assertExpr("decimal_byte_format(1234, 0)", "1 KB");
+ assertExpr("decimal_byte_format(1234*1000, 1)", "1.2 MB");
+ assertExpr("decimal_byte_format(1234*1000*1000, 2)", "1.23 GB");
+ assertExpr("decimal_byte_format(1234*1000*1000*1000, 3)", "1.234 TB");
+
+ assertExpr("decimal_format(1234, 0)", "1 K");
+ assertExpr("decimal_format(1234*1000,1)", "1.2 M");
+ assertExpr("decimal_format(1234*1000*1000,2)", "1.23 G");
+ assertExpr("decimal_format(1234*1000*1000*1000,3)", "1.234 T");
+ }
+
+ @Test
+ public void testSizeFormatWithEdgeCases()
+ {
+ //a nonexist value is null which is treated as 0
+ assertExpr("binary_byte_format(nonexist)", "0 B");
+
+ //f = 12.34
+ assertExpr("binary_byte_format(f)", "12 B");
+
+ //nan is Double.NaN
+ assertExpr("binary_byte_format(nan)", "0 B");
+
+ //inf = Double.POSITIVE_INFINITY
+ assertExpr("binary_byte_format(inf)", "8.00 EiB");
+
+ //inf = Double.NEGATIVE_INFINITY
+ assertExpr("binary_byte_format(-inf)", "-8.00 EiB");
+
+ // o = 0
+ assertExpr("binary_byte_format(o)", "0 B");
+
+ // od = 0D
+ assertExpr("binary_byte_format(od)", "0 B");
+
+ // of = 0F
+ assertExpr("binary_byte_format(of)", "0 B");
+ }
+
+ @Test
+ public void testSizeForatInvalidArgumentType()
+ {
+ try {
+ //x = "foo"
+ Parser.parse("binary_byte_format(x)", ExprMacroTable.nil())
+ .eval(bindings);
+
+ //must not go to here
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Function[binary_byte_format] needs a number as its
first argument", e.getMessage());
+ }
+
+ try {
+ //x = "foo"
+ Parser.parse("binary_byte_format(1024, x)", ExprMacroTable.nil())
+ .eval(bindings);
+
+ //must not go to here
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Function[binary_byte_format] needs an integer as
its second argument", e.getMessage());
+ }
+
+ try {
+ //of = 0F
+ Parser.parse("binary_byte_format(1024, of)", ExprMacroTable.nil())
+ .eval(bindings);
+
+ //must not go to here
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Function[binary_byte_format] needs an integer as
its second argument", e.getMessage());
+ }
+
+ try {
+ //of = 0F
+ Parser.parse("binary_byte_format(1024, nonexist)", ExprMacroTable.nil())
+ .eval(bindings);
+
+ //must not go to here
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Function[binary_byte_format] needs an integer as
its second argument", e.getMessage());
+ }
+ }
+
+ @Test
+ public void testSizeFormatInvalidPrecision()
+ {
+ try {
+ Parser.parse("binary_byte_format(1024, maxLong)", ExprMacroTable.nil())
+ .eval(bindings);
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Given precision[9223372036854775807] of
Function[binary_byte_format] must be in the range of [0,3]", e.getMessage());
+ }
+
+ try {
+ Parser.parse("binary_byte_format(1024, minLong)", ExprMacroTable.nil())
+ .eval(bindings);
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Given precision[-9223372036854775808] of
Function[binary_byte_format] must be in the range of [0,3]", e.getMessage());
+ }
+
+ try {
+ Parser.parse("binary_byte_format(1024, -1)", ExprMacroTable.nil())
+ .eval(bindings);
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Given precision[-1] of Function[binary_byte_format]
must be in the range of [0,3]", e.getMessage());
+ }
+
+ try {
+ Parser.parse("binary_byte_format(1024, 4)", ExprMacroTable.nil())
+ .eval(bindings);
+ Assert.assertTrue(false);
+ }
+ catch (IAE e) {
+ Assert.assertEquals("Given precision[4] of Function[binary_byte_format]
must be in the range of [0,3]", e.getMessage());
+ }
+ }
+
+ @Test
+ public void testSizeFormatInvalidArgumentSize()
+ {
+ expectedException.expect(IAE.class);
+ expectedException.expectMessage("Function[binary_byte_format] needs 1 or 2
arguments");
+ Parser.parse("binary_byte_format(1024, 2, 3)", ExprMacroTable.nil())
+ .eval(bindings);
+ }
+
+ @Test
+ public void testSizeFormatWithNoDefaultValueForNull()
+ {
+ NullHandling.updateForTests(false);
Review comment:
MySQL ships a similar function
[format_bytes](https://dev.mysql.com/doc/refman/5.7/en/sys-format-bytes.html)
I struggled the two different approaches at first. And at last I chose to do
it by 3 different functions. The reasons are,
1. different function names are more meaningful than different arguments for
one function. Since there're 3 different unit systems in this PR, how to name
them in a short enough way and without ambiguity is a big challenge. For
example, `FORMAT(number, 'si')`, `FORMAT(number, 'dec')`, `si` and `dec` are
standard abbreviation and short enough but they're hard to understand;
`FORMAT(number, 'binary_byte')`, it's clear enough, but it's not so simple
compared to `binary_byte_format(number)`
2. at the underlying layer, there are always different format functions, and
if we provide one function at the user side, we have to do some checks on the
format specifier and dispatch calls to those different functions. It's a little
bit simple if different functions are provided.
But as you mentioned, there are also some drawbacks in this way. If the
standard is to keep consistent with other databases or keep less numbers of
functions exposed to users, maybe we need to combine these functions together.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]