This is an automated email from the ASF dual-hosted git repository.
zhehu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new e59ed9537e [CALCITE-6815] Support bin function for Spark and Hive
Library
e59ed9537e is described below
commit e59ed9537e50a4d55ee1c23209102983882a1f49
Author: xuyu <[email protected]>
AuthorDate: Thu Feb 6 10:19:48 2025 +0800
[CALCITE-6815] Support bin function for Spark and Hive Library
---
.../calcite/adapter/enumerable/RexImpTable.java | 2 ++
.../org/apache/calcite/runtime/SqlFunctions.java | 17 +++++++++
.../calcite/sql/fun/SqlLibraryOperators.java | 7 ++++
.../org/apache/calcite/util/BuiltInMethod.java | 1 +
site/_docs/reference.md | 5 +--
.../org/apache/calcite/test/SqlOperatorTest.java | 41 ++++++++++++++++++++++
6 files changed, 71 insertions(+), 2 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index ebc9c6d9da..5a9d4181c7 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -159,6 +159,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ATAND;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ATANH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.BASE64;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.BIN;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.BITAND_AGG;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.BITOR_AGG;
import static
org.apache.calcite.sql.fun.SqlLibraryOperators.BIT_COUNT_BIG_QUERY;
@@ -694,6 +695,7 @@ void populate1() {
defineMethod(HEX, BuiltInMethod.HEX.method, NullPolicy.STRICT);
defineMethod(TO_HEX, BuiltInMethod.TO_HEX.method, NullPolicy.STRICT);
defineMethod(FROM_HEX, BuiltInMethod.FROM_HEX.method, NullPolicy.STRICT);
+ defineMethod(BIN, BuiltInMethod.BIN.method, NullPolicy.STRICT);
defineMethod(MD5, BuiltInMethod.MD5.method, NullPolicy.STRICT);
defineMethod(SHA1, BuiltInMethod.SHA1.method, NullPolicy.STRICT);
defineMethod(SHA256, BuiltInMethod.SHA256.method, NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index eeee51eecd..e7ea146118 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -369,6 +369,23 @@ public static String hex(String value) {
return Hex.encodeHexString(value.getBytes(UTF_8));
}
+ /** SQL BIN(long) function. */
+ public static String bin(long value) {
+ int zeros = Long.numberOfLeadingZeros(value);
+ if (zeros == Long.SIZE) {
+ return "0";
+ } else {
+ int length = Long.SIZE - zeros;
+ byte[] bytes = new byte[length];
+ for (int index = length - 1; index >= 0; index--) {
+ bytes[index] = (byte) ((value & 0x1) == 1 ? '1' : '0');
+ value >>>= 1;
+ }
+ //CHECKSTYLE: IGNORE 1
+ return new String(bytes, UTF_8);
+ }
+ }
+
/** SQL MD5(string) function. */
public static String md5(String string) {
return DigestUtils.md5Hex(string.getBytes(UTF_8));
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index b3fa3585aa..a1faaba871 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -2504,6 +2504,13 @@ private static RelDataType
deriveTypeMapFromEntries(SqlOperatorBinding opBinding
OperandTypes.INTEGER,
SqlFunctionCategory.NUMERIC);
+ @LibraryOperator(libraries = {SPARK, HIVE})
+ public static final SqlFunction BIN =
+ SqlBasicFunction.create("BIN",
+ ReturnTypes.VARCHAR_NULLABLE,
+ OperandTypes.NUMERIC,
+ SqlFunctionCategory.STRING);
+
@LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL, SPARK, HIVE})
public static final SqlFunction MD5 =
SqlBasicFunction.create("MD5",
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index ba6b13b457..82be1a9fe6 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -438,6 +438,7 @@ public enum BuiltInMethod {
HEX(SqlFunctions.class, "hex", String.class),
TO_HEX(SqlFunctions.class, "toHex", ByteString.class),
FROM_HEX(SqlFunctions.class, "fromHex", String.class),
+ BIN(SqlFunctions.class, "bin", long.class),
MD5(SqlFunctions.class, "md5", String.class),
SHA1(SqlFunctions.class, "sha1", String.class),
SHA256(SqlFunctions.class, "sha256", String.class),
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 2fd932e57f..99f3b775bf 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2917,7 +2917,7 @@ ### Dialect-specific Operators
| p | LOG([, base ], numeric1 ) | Returns the logarithm
of *numeric1* to base *base*, or base 10 if *numeric1* is not present, or error
if *numeric1* is 0 or negative
| m s | LOG2(numeric) | Returns the base 2
logarithm of *numeric*
| s | LOG1P(numeric) | Returns the natural
logarithm of 1 plus *numeric*
-| b o p r s h | LPAD(string, length [, pattern ]) | Returns a string or
bytes value that consists of *string* prepended to *length* with *pattern*
+| b o p r s h | LPAD(string, length [, pattern ]) | Returns a string or
bytes value that consists of *string* prepended to *length* with *pattern*
| b | TO_BASE32(string) | Converts the *string*
to base-32 encoded form and returns an encoded string
| b | FROM_BASE32(string) | Returns the decoded
result of a base-32 *string* as a string
| m | TO_BASE64(string) | Converts the *string*
to base-64 encoded form and returns a encoded string
@@ -2927,6 +2927,7 @@ ### Dialect-specific Operators
| h s | HEX(string) | Converts *string* into
a hexadecimal varchar
| b | TO_HEX(binary) | Converts *binary* into
a hexadecimal varchar
| b | FROM_HEX(varchar) | Converts a
hexadecimal-encoded *varchar* into bytes
+| s h | BIN(BIGINT) | Converts a *bigint*
into bytes string
| b o p r s h | LTRIM(string) | Returns *string* with
all blanks removed from the start
| s | MAP() | Returns an empty map
| s | MAP(key, value [, key, value]*) | Returns a map with the
given *key*/*value* pairs
@@ -2939,7 +2940,7 @@ ### Dialect-specific Operators
| s | MAP_FROM_ENTRIES(arrayOfRows) | Returns a map created
from an arrays of row with two fields. Note that the number of fields in a row
must be 2. Note that calcite is using the LAST_WIN strategy
| s | STR_TO_MAP(string [, stringDelimiter [, keyValueDelimiter]]) | Returns a
map after splitting the *string* into key/value pairs using delimiters. Default
delimiters are ',' for *stringDelimiter* and ':' for *keyValueDelimiter*. Note
that calcite is using the LAST_WIN strategy
| s | SUBSTRING_INDEX(string, delim, count) | Returns the substring
from *string* before *count* occurrences of the delimiter *delim*. If *count*
is positive, everything to the left of the final delimiter (counting from the
left) is returned. If *count* is negative, everything to the right of the final
delimiter (counting from the right) is returned. The function substring_index
performs a case-sensitive match when searching for *delim*.
-| b m p r s h | MD5(string) | Calculates an MD5
128-bit checksum of *string* and returns it as a hex string
+| b m p r s h | MD5(string) | Calculates an MD5
128-bit checksum of *string* and returns it as a hex string
| m | MONTHNAME(date) | Returns the name, in
the connection's locale, of the month in *datetime*; for example, for a locale
of en, it will return 'February' for both DATE '2020-02-10' and TIMESTAMP
'2020-02-10 10:10:10', and for a locale of zh, it will return '二月'
| o r s | NVL(value1, value2) | Returns *value1* if
*value1* is not null, otherwise *value2*
| o r s | NVL2(value1, value2, value3) | Returns *value2* if
*value1* is not null, otherwise *value3*
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index dab42ffdac..5b2521112f 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -5756,6 +5756,47 @@ void testBitGetFunc(SqlOperatorFixture f, String
functionName) {
f.checkNull("from_base32(cast (null as varchar))");
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6815">[CALCITE-6815]
+ * Add bin function (enabled in Hive and Spark library)</a>. */
+ @Test void testBin() {
+// final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.BIN);
+ final SqlOperatorFixture f0 =
Fixtures.forOperators(true).setFor(SqlLibraryOperators.BIN);
+ f0.checkFails("^bin(x'')^",
+ "No match found for function signature BIN\\(<BINARY>\\)",
+ false);
+ final List<SqlLibrary> libraries =
+ ImmutableList.of(SqlLibrary.SPARK, SqlLibrary.HIVE);
+ final Consumer<SqlOperatorFixture> consumer = f -> {
+ f.checkString("bin(12)",
+ "1100",
+ "VARCHAR NOT NULL");
+ f.checkString("bin(1)",
+ "1",
+ "VARCHAR NOT NULL");
+ f.checkString("bin(01)",
+ "1",
+ "VARCHAR NOT NULL");
+ f.checkString("bin(000)",
+ "0",
+ "VARCHAR NOT NULL");
+ f.checkString("bin(-000)",
+ "0",
+ "VARCHAR NOT NULL");
+ f.checkString("bin(-11)",
+ "1111111111111111111111111111111111111111111111111111111111110101",
+ "VARCHAR NOT NULL");
+ f.checkString("bin(-1)",
+ "1111111111111111111111111111111111111111111111111111111111111111",
+ "VARCHAR NOT NULL");
+ f.checkString("bin(-001)",
+ "1111111111111111111111111111111111111111111111111111111111111111",
+ "VARCHAR NOT NULL");
+ f.checkNull("bin(null)");
+ };
+ f0.forEachLibrary(libraries, consumer);
+ }
+
@Test void testMd5() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.MD5);
f0.checkFails("^md5(x'')^",