This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c960fb5f1f ARROW-15661: [Gandiva][C++] Add SHA512 function (#12404)
c960fb5f1f is described below
commit c960fb5f1f6afbf024b83015f4a0038bedd64d1e
Author: Johnnathan Almeida <[email protected]>
AuthorDate: Wed Jun 22 09:40:30 2022 -0300
ARROW-15661: [Gandiva][C++] Add SHA512 function (#12404)
This PR was created to implement SHA512 functions in Gandiva side.
This PR implements the follow signatures:
FunctionSignature{name =hashSHA512, return type =String, param types
=[int8]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[int16]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[int32]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[int64]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[uint8]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[uint16]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[uint32]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[uint64]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[float]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[double]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[boolean]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[date64]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[date32]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[time32]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[timestamp]}
FunctionSignature{name =hashSHA512, return type =String, param types
=[string]}
Authored-by: Johnnathan <[email protected]>
Signed-off-by: Pindikura Ravindra <[email protected]>
---
cpp/src/gandiva/function_registry_common.h | 10 ++
cpp/src/gandiva/function_registry_hash.cc | 5 +
cpp/src/gandiva/gdv_function_stubs.cc | 1 -
cpp/src/gandiva/gdv_function_stubs.h | 133 ++++++++++++++++
cpp/src/gandiva/gdv_hash_function_stubs.cc | 247 +++++++++++++++++++++++++++++
cpp/src/gandiva/hash_utils.cc | 10 ++
cpp/src/gandiva/hash_utils.h | 5 +
cpp/src/gandiva/hash_utils_test.cc | 86 ++++++++++
cpp/src/gandiva/tests/hash_test.cc | 227 ++++++++++++++++++++++++++
9 files changed, 723 insertions(+), 1 deletion(-)
diff --git a/cpp/src/gandiva/function_registry_common.h
b/cpp/src/gandiva/function_registry_common.h
index 9ec9be1b54..6fa51b498d 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -235,6 +235,16 @@ typedef std::unordered_map<const FunctionSignature*, const
NativeFunction*, KeyH
kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE), \
NativeFunction::kNeedsContext |
NativeFunction::kCanReturnErrors)
+// HashSHA512 functions that :
+// - NULL handling is of type NULL_NEVER
+// - can return errors
+//
+// The function name includes the base name & input type name.
gdv_fn_sha512_float64
+#define HASH_SHA512_NULL_NEVER(NAME, ALIASES, TYPE)
\
+ NativeFunction(#NAME, {"sha512"}, DataTypeVector{TYPE()}, utf8(),
kResultNullNever, \
+ ARROW_STRINGIFY(gdv_fn_sha512_##TYPE),
\
+ NativeFunction::kNeedsContext |
NativeFunction::kCanReturnErrors)
+
// HashSHA256 functions that :
// - NULL handling is of type NULL_NEVER
// - can return errors
diff --git a/cpp/src/gandiva/function_registry_hash.cc
b/cpp/src/gandiva/function_registry_hash.cc
index b12ac14725..7fd8751f40 100644
--- a/cpp/src/gandiva/function_registry_hash.cc
+++ b/cpp/src/gandiva/function_registry_hash.cc
@@ -38,6 +38,9 @@ namespace gandiva {
#define HASH_SHA256_NULL_NEVER_FN(name, ALIASES) \
NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH_SHA256_NULL_NEVER, name, ALIASES)
+#define HASH_SHA512_NULL_NEVER_FN(name, ALIASES) \
+ NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH_SHA512_NULL_NEVER, name, ALIASES)
+
#define HASH_MD5_NULL_NEVER_FN(name, ALIASES) \
NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH_MD5_NULL_NEVER, name, ALIASES)
@@ -60,6 +63,8 @@ std::vector<NativeFunction> GetHashFunctionRegistry() {
HASH_SHA256_NULL_NEVER_FN(hashSHA256, {}),
+ HASH_SHA512_NULL_NEVER_FN(hashSHA512, {}),
+
HASH_MD5_NULL_NEVER_FN(hashMD5, {})};
return hash_fn_registry_;
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc
b/cpp/src/gandiva/gdv_function_stubs.cc
index cf1daba81b..27900c14af 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -26,7 +26,6 @@
#include "arrow/util/base64.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/double_conversion.h"
-#include "arrow/util/string_view.h"
#include "arrow/util/value_parsing.h"
#include "gandiva/encrypt_utils.h"
diff --git a/cpp/src/gandiva/gdv_function_stubs.h
b/cpp/src/gandiva/gdv_function_stubs.h
index d39d294042..a8ce58698e 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -105,6 +105,11 @@ GANDIVA_EXPORT
const char* gdv_fn_castVARBINARY_int64_int64(int64_t context, gdv_int64 value,
int64_t out_len, int32_t*
out_length);
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_decimal128(int64_t context, int64_t x_high, uint64_t
x_low,
+ int32_t x_precision, int32_t x_scale,
+ gdv_boolean x_isvalid, int32_t*
out_length);
+
GANDIVA_EXPORT
const char* gdv_fn_sha256_decimal128(int64_t context, int64_t x_high, uint64_t
x_low,
int32_t x_precision, int32_t x_scale,
@@ -200,4 +205,132 @@ GANDIVA_EXPORT
const char* gdv_mask_last_n_utf8_int32(int64_t context, const char* data,
int32_t data_len, int32_t n_to_mask,
int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int8(int64_t context, gdv_int8 value, bool validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int16(int64_t context, gdv_int16 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int32(int64_t context, gdv_int32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int64(int64_t context, gdv_int64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint8(int64_t context, gdv_uint8 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint16(int64_t context, gdv_uint16 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint32(int64_t context, gdv_uint32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint64(int64_t context, gdv_uint64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_float32(int64_t context, gdv_float32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_float64(int64_t context, gdv_float64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_boolean(int64_t context, gdv_boolean value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_date64(int64_t context, gdv_date64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_date32(int64_t context, gdv_date32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_time32(int64_t context, gdv_time32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_timestamp(int64_t context, gdv_timestamp value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_utf8(int64_t context, gdv_utf8 value, int32_t
value_length,
+ bool value_validity, int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int8(int64_t context, gdv_int8 value, bool validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int16(int64_t context, gdv_int16 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int32(int64_t context, gdv_int32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int64(int64_t context, gdv_int64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint8(int64_t context, gdv_uint8 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint16(int64_t context, gdv_uint16 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint32(int64_t context, gdv_uint32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint64(int64_t context, gdv_uint64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_float32(int64_t context, gdv_float32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_float64(int64_t context, gdv_float64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_boolean(int64_t context, gdv_boolean value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_date64(int64_t context, gdv_date64 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_date32(int64_t context, gdv_date32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_time32(int64_t context, gdv_time32 value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_timestamp(int64_t context, gdv_timestamp value, bool
validity,
+ int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_utf8(int64_t context, gdv_utf8 value, int32_t
value_length,
+ bool value_validity, int32_t* out_length);
}
diff --git a/cpp/src/gandiva/gdv_hash_function_stubs.cc
b/cpp/src/gandiva/gdv_hash_function_stubs.cc
index 235e8a8961..018b0fbb70 100644
--- a/cpp/src/gandiva/gdv_hash_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_hash_function_stubs.cc
@@ -76,6 +76,31 @@ extern "C" {
return gandiva::gdv_sha1_hash(context, value, value_length, out_length); \
}
+#define SHA512_HASH_FUNCTION(TYPE)
\
+ GANDIVA_EXPORT
\
+ const char* gdv_fn_sha512_##TYPE(int64_t context, gdv_##TYPE value, bool
validity, \
+ int32_t* out_length) {
\
+ if (!validity) {
\
+ return gandiva::gdv_sha512_hash(context, NULLPTR, 0, out_length);
\
+ }
\
+ auto value_as_long = gandiva::gdv_double_to_long((double)value);
\
+ const char* result = gandiva::gdv_sha512_hash(context, &value_as_long,
\
+ sizeof(value_as_long),
out_length); \
+ return result;
\
+ }
+
+#define SHA512_HASH_FUNCTION_BUF(TYPE)
\
+ GANDIVA_EXPORT
\
+ const char* gdv_fn_sha512_##TYPE(int64_t context, gdv_##TYPE value,
\
+ int32_t value_length, bool value_validity,
\
+ int32_t* out_length) {
\
+ if (!value_validity) {
\
+ return gandiva::gdv_sha512_hash(context, NULLPTR, 0, out_length);
\
+ }
\
+
\
+ return gandiva::gdv_sha512_hash(context, value, value_length, out_length);
\
+ }
+
#define SHA256_HASH_FUNCTION(TYPE)
\
GANDIVA_EXPORT
\
const char* gdv_fn_sha256_##TYPE(int64_t context, gdv_##TYPE value, bool
validity, \
@@ -127,6 +152,9 @@ extern "C" {
SHA_NUMERIC_BOOL_DATE_PARAMS(MD5_HASH_FUNCTION)
SHA_VAR_LEN_PARAMS(MD5_HASH_FUNCTION_BUF)
+SHA_NUMERIC_BOOL_DATE_PARAMS(SHA512_HASH_FUNCTION)
+SHA_VAR_LEN_PARAMS(SHA512_HASH_FUNCTION_BUF)
+
SHA_NUMERIC_BOOL_DATE_PARAMS(SHA256_HASH_FUNCTION)
SHA_VAR_LEN_PARAMS(SHA256_HASH_FUNCTION_BUF)
@@ -149,6 +177,18 @@ const char* gdv_fn_md5_decimal128(int64_t context, int64_t
x_high, uint64_t x_lo
return gandiva::gdv_md5_hash(context, decimal_128.ToBytes().data(), 16,
out_length);
}
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_decimal128(int64_t context, int64_t x_high, uint64_t
x_low,
+ int32_t /*x_precision*/, int32_t
/*x_scale*/,
+ gdv_boolean x_isvalid, int32_t*
out_length) {
+ if (!x_isvalid) {
+ return gandiva::gdv_sha512_hash(context, NULLPTR, 0, out_length);
+ }
+
+ const gandiva::BasicDecimal128 decimal_128(x_high, x_low);
+ return gandiva::gdv_sha512_hash(context, decimal_128.ToBytes().data(), 16,
out_length);
+}
+
GANDIVA_EXPORT
const char* gdv_fn_sha256_decimal128(int64_t context, int64_t x_high, uint64_t
x_low,
int32_t /*x_precision*/, int32_t
/*x_scale*/,
@@ -560,6 +600,197 @@ void ExportedHashFunctions::AddMappings(Engine* engine)
const {
types->i8_ptr_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_sha1_binary));
+ // gdv_fn_sha512_int8
+ args = {
+ types->i64_type(), // context
+ types->i8_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_int8",
+ types->i8_ptr_type() /*return_type*/, args,
+ reinterpret_cast<void*>(gdv_fn_sha512_int8));
+
+ // gdv_fn_sha512_int16
+ args = {
+ types->i64_type(), // context
+ types->i16_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_int16",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_int16));
+
+ // gdv_fn_sha512_int32
+ args = {
+ types->i64_type(), // context
+ types->i32_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_int32",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_int32));
+
+ // gdv_fn_sha512_int32
+ args = {
+ types->i64_type(), // context
+ types->i64_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_int64",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_int64));
+
+ // gdv_fn_sha512_uint8
+ args = {
+ types->i64_type(), // context
+ types->i8_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint8",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_uint8));
+
+ // gdv_fn_sha512_uint16
+ args = {
+ types->i64_type(), // context
+ types->i16_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint16",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_uint16));
+
+ // gdv_fn_sha512_uint32
+ args = {
+ types->i64_type(), // context
+ types->i32_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint32",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_uint32));
+
+ // gdv_fn_sha512_uint64
+ args = {
+ types->i64_type(), // context
+ types->i64_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint64",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_uint64));
+
+ // gdv_fn_sha512_float32
+ args = {
+ types->i64_type(), // context
+ types->float_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_float32",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_float32));
+
+ // gdv_fn_sha512_float64
+ args = {
+ types->i64_type(), // context
+ types->double_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_float64",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_float64));
+
+ // gdv_fn_sha512_boolean
+ args = {
+ types->i64_type(), // context
+ types->i1_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_boolean",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_boolean));
+
+ // gdv_fn_sha512_date64
+ args = {
+ types->i64_type(), // context
+ types->i64_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_date64",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_date64));
+
+ // gdv_fn_sha512_date32
+ args = {
+ types->i64_type(), // context
+ types->i32_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_date32",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_date32));
+
+ // gdv_fn_sha512_time32
+ args = {
+ types->i64_type(), // context
+ types->i32_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_time32",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_time32));
+
+ // gdv_fn_sha512_timestamp
+ args = {
+ types->i64_type(), // context
+ types->i64_type(), // value
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out_length
+ };
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_timestamp",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_timestamp));
+
+ // gdv_fn_hash_sha512_from_utf8
+ args = {
+ types->i64_type(), // context
+ types->i8_ptr_type(), // const char*
+ types->i32_type(), // value_length
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out
+ };
+
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_utf8",
+ types->i8_ptr_type() /*return_type*/, args,
+ reinterpret_cast<void*>(gdv_fn_sha512_utf8));
+
+ // gdv_fn_hash_sha512_from_binary
+ args = {
+ types->i64_type(), // context
+ types->i8_ptr_type(), // const char*
+ types->i32_type(), // value_length
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out
+ };
+
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_binary",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_binary));
+
// gdv_fn_sha256_int8
args = {
types->i64_type(), // context
@@ -765,6 +996,22 @@ void ExportedHashFunctions::AddMappings(Engine* engine)
const {
engine->AddGlobalMappingForFunc("gdv_fn_sha1_decimal128",
types->i8_ptr_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_sha1_decimal128));
+
+ // gdv_fn_sha512_decimal128
+ args = {
+ types->i64_type(), // context
+ types->i64_type(), // high_bits
+ types->i64_type(), // low_bits
+ types->i32_type(), // precision
+ types->i32_type(), // scale
+ types->i1_type(), // validity
+ types->i32_ptr_type() // out length
+ };
+
+ engine->AddGlobalMappingForFunc("gdv_fn_sha512_decimal128",
+ types->i8_ptr_type() /*return_type*/, args,
+
reinterpret_cast<void*>(gdv_fn_sha512_decimal128));
+
// gdv_fn_sha256_decimal128
args = {
types->i64_type(), // context
diff --git a/cpp/src/gandiva/hash_utils.cc b/cpp/src/gandiva/hash_utils.cc
index 493eec48c2..5019d45ca8 100644
--- a/cpp/src/gandiva/hash_utils.cc
+++ b/cpp/src/gandiva/hash_utils.cc
@@ -22,6 +22,16 @@
#include "openssl/evp.h"
namespace gandiva {
+
+/// Hashes a generic message using the SHA512 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha512_hash(int64_t context, const void* message, size_t
message_length,
+ int32_t* out_length) {
+ constexpr int sha512_result_length = 128;
+ return gdv_hash_using_openssl(context, message, message_length, EVP_sha512(),
+ sha512_result_length, out_length);
+}
+
/// Hashes a generic message using the SHA256 algorithm
GANDIVA_EXPORT
const char* gdv_sha256_hash(int64_t context, const void* message, size_t
message_length,
diff --git a/cpp/src/gandiva/hash_utils.h b/cpp/src/gandiva/hash_utils.h
index a7d3b48c30..06e988496b 100644
--- a/cpp/src/gandiva/hash_utils.h
+++ b/cpp/src/gandiva/hash_utils.h
@@ -24,6 +24,11 @@
#include "openssl/evp.h"
namespace gandiva {
+
+GANDIVA_EXPORT
+const char* gdv_sha512_hash(int64_t context, const void* message, size_t
message_length,
+ int32_t* out_length);
+
GANDIVA_EXPORT
const char* gdv_sha256_hash(int64_t context, const void* message, size_t
message_length,
int32_t* out_length);
diff --git a/cpp/src/gandiva/hash_utils_test.cc
b/cpp/src/gandiva/hash_utils_test.cc
index b4d66f1aa6..96f9819e53 100644
--- a/cpp/src/gandiva/hash_utils_test.cc
+++ b/cpp/src/gandiva/hash_utils_test.cc
@@ -56,6 +56,41 @@ TEST(TestShaHashUtils, TestSha1Numeric) {
}
}
+TEST(TestShaHashUtils, TestSha512Numeric) {
+ gandiva::ExecutionContext ctx;
+
+ auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+ std::vector<uint64_t> values_to_be_hashed;
+
+ // Generate a list of values to obtains the SHA1 hash
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.0));
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.1));
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.2));
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.10000001));
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000001));
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(1.000000));
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000002));
+ values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.999999));
+
+ // Checks if the hash value is different for each one of the values
+ std::unordered_set<std::string> sha_values;
+
+ int sha512_size = 128;
+
+ for (auto value : values_to_be_hashed) {
+ int out_length;
+ const char* sha_512 =
+ gandiva::gdv_sha512_hash(ctx_ptr, &value, sizeof(value), &out_length);
+ std::string sha512_as_str(sha_512, out_length);
+ EXPECT_EQ(sha512_as_str.size(), sha512_size);
+
+ // The value can not exists inside the set with the hash results
+ EXPECT_EQ(sha_values.find(sha512_as_str), sha_values.end());
+ sha_values.insert(sha512_as_str);
+ }
+}
+
TEST(TestShaHashUtils, TestSha256Numeric) {
gandiva::ExecutionContext ctx;
@@ -160,6 +195,57 @@ TEST(TestShaHashUtils, TestSha1Varlen) {
EXPECT_EQ(sha2_as_str, expected_second_result);
}
+TEST(TestShaHashUtils, TestSha512Varlen) {
+ gandiva::ExecutionContext ctx;
+
+ auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+ std::string first_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ]";
+
+ std::string second_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+ std::string third_string = "0";
+
+ // The strings expected hashes are obtained from shell executing the
following command:
+ // echo -n <output-string> | openssl dgst sha1
+ std::string expected_first_result =
+
"ea11714806203ca486cbb13783c2f4c52b962072ad69cb1dbc8f2960f0fc7ff5996316fea8607bd1af"
+ "0f1f13542fef677a01f4cec3cbeb1c4a89e8567d366b0e";
+ std::string expected_second_result =
+
"a5446a30e173baf3aa27800a7d304d16a68b87800723973156ad4362cbe4c136e4b12c950a603f25fc"
+ "3b2e1ea778a1936ee2dbf71d27a3bc0f81498df3ce060c";
+
+ std::string expected_third_result =
+
"31bca02094eb78126a517b206a88c73cfa9ec6f704c7030d18212cace820f025f00bf0ea68dbf3f3a5"
+ "436ca63b53bf7bf80ad8d5de7d8359d0b7fed9dbc3ab99";
+
+ // Generate the hashes and compare with expected outputs
+ const int sha512_size = 128;
+ int out_length;
+
+ const char* sha_1 = gandiva::gdv_sha512_hash(ctx_ptr, first_string.c_str(),
+ first_string.size(),
&out_length);
+ std::string sha1_as_str(sha_1, out_length);
+ EXPECT_EQ(sha1_as_str.size(), sha512_size);
+ EXPECT_EQ(sha1_as_str, expected_first_result);
+
+ const char* sha_2 = gandiva::gdv_sha512_hash(ctx_ptr, second_string.c_str(),
+ second_string.size(),
&out_length);
+ std::string sha2_as_str(sha_2, out_length);
+ EXPECT_EQ(sha2_as_str.size(), sha512_size);
+ EXPECT_EQ(sha2_as_str, expected_second_result);
+
+ const char* sha_3 = gandiva::gdv_sha512_hash(ctx_ptr, third_string.c_str(),
+ third_string.size(),
&out_length);
+ std::string sha3_as_str(sha_3, out_length);
+ EXPECT_EQ(sha3_as_str.size(), sha512_size);
+ EXPECT_EQ(sha3_as_str, expected_third_result);
+}
+
TEST(TestShaHashUtils, TestSha256Varlen) {
gandiva::ExecutionContext ctx;
diff --git a/cpp/src/gandiva/tests/hash_test.cc
b/cpp/src/gandiva/tests/hash_test.cc
index 0a574f3267..de418f4dd7 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -149,6 +149,146 @@ TEST_F(TestHash, TestBuf) {
}
}
+TEST_F(TestHash, TestSha512Simple) {
+ // schema for input fields
+ auto field_a = field("a", int32());
+ auto field_b = field("b", int64());
+ auto field_c = field("c", float32());
+ auto field_d = field("d", float64());
+ auto schema = arrow::schema({field_a, field_b, field_c, field_d});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+ auto res_1 = field("res1", utf8());
+ auto res_2 = field("res2", utf8());
+ auto res_3 = field("res3", utf8());
+
+ // build expressions.
+ // hashSHA512(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha512_1 = TreeExprBuilder::MakeFunction("hashSHA512", {node_a},
utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha512_1, res_0);
+
+ auto node_b = TreeExprBuilder::MakeField(field_b);
+ auto hashSha512_2 = TreeExprBuilder::MakeFunction("hashSHA512", {node_b},
utf8());
+ auto expr_1 = TreeExprBuilder::MakeExpression(hashSha512_2, res_1);
+
+ auto node_c = TreeExprBuilder::MakeField(field_c);
+ auto hashSha512_3 = TreeExprBuilder::MakeFunction("hashSHA512", {node_c},
utf8());
+ auto expr_2 = TreeExprBuilder::MakeExpression(hashSha512_3, res_2);
+
+ auto node_d = TreeExprBuilder::MakeField(field_d);
+ auto hashSha512_4 = TreeExprBuilder::MakeFunction("hashSHA512", {node_d},
utf8());
+ auto expr_3 = TreeExprBuilder::MakeExpression(hashSha512_4, res_3);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3},
+ TestConfiguration(), &projector);
+ ASSERT_OK(status) << status.message();
+
+ // Create a row-batch with some sample data
+ int num_records = 2;
+ auto validity_array = {false, true};
+
+ auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array);
+
+ auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array);
+
+ auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array);
+
+ auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(
+ schema, num_records, {array_int32, array_int64, array_float32,
array_float64});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ ASSERT_OK(status);
+
+ auto response_int32 = outputs.at(0);
+ auto response_int64 = outputs.at(1);
+ auto response_float32 = outputs.at(2);
+ auto response_float64 = outputs.at(3);
+
+ // Checks if the null and zero representation for numeric values
+ // are consistent between the types
+ EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64);
+ EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32);
+ EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64);
+
+ const int sha512_hash_size = 128;
+
+ // Checks if the hash size in response is correct
+ for (int i = 1; i < num_records; ++i) {
+ const auto& value_at_position =
response_int32->GetScalar(i).ValueOrDie()->ToString();
+
+ EXPECT_EQ(value_at_position.size(), sha512_hash_size);
+ EXPECT_NE(value_at_position,
+ response_int32->GetScalar(i - 1).ValueOrDie()->ToString());
+ }
+}
+
+TEST_F(TestHash, TestSha512Varlen) {
+ // schema for input fields
+ auto field_a = field("a", utf8());
+ auto schema = arrow::schema({field_a});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+
+ // build expressions.
+ // hashSHA512(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha512 = TreeExprBuilder::MakeFunction("hashSHA512", {node_a},
utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha512, res_0);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, {expr_0}, TestConfiguration(),
&projector);
+ EXPECT_TRUE(status.ok()) << status.message();
+
+ // Create a row-batch with some sample data
+ int num_records = 3;
+
+ std::string first_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY "
+ "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]";
+ std::string second_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY "
+ "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+ auto array_a =
+ MakeArrowArrayUtf8({"foo", first_string, second_string}, {false, true,
true});
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ ASSERT_OK(status);
+
+ auto response = outputs.at(0);
+ const int sha512_hash_size = 128;
+
+ EXPECT_EQ(response->null_count(), 0);
+
+ // Checks that the null value was hashed
+ EXPECT_NE(response->GetScalar(0).ValueOrDie()->ToString(), "");
+ EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString().size(),
sha512_hash_size);
+
+ // Check that all generated hashes were different
+ for (int i = 1; i < num_records; ++i) {
+ const auto& value_at_position =
response->GetScalar(i).ValueOrDie()->ToString();
+
+ EXPECT_EQ(value_at_position.size(), sha512_hash_size);
+ EXPECT_NE(value_at_position, response->GetScalar(i -
1).ValueOrDie()->ToString());
+ }
+}
+
TEST_F(TestHash, TestSha256Simple) {
// schema for input fields
auto field_a = field("a", int32());
@@ -527,6 +667,92 @@ TEST_F(TestHash, TestSha1FunctionsAlias) {
EXPECT_ARROW_ARRAY_EQUALS(outputs.at(7), outputs.at(8)); // sha and sha1
responses
}
+TEST_F(TestHash, TestSha512FunctionsAlias) {
+ // schema for input fields
+ auto field_a = field("a", utf8());
+ auto field_b = field("c", int64());
+ auto field_c = field("e", float64());
+ auto schema = arrow::schema({field_a, field_b, field_c});
+
+ // output fields
+ auto res_0 = field("res0", utf8());
+ auto res_0_sha512 = field("res0sha512", utf8());
+
+ auto res_1 = field("res1", utf8());
+ auto res_1_sha512 = field("res1sha512", utf8());
+
+ auto res_2 = field("res2", utf8());
+ auto res_2_sha512 = field("res2_sha512", utf8());
+
+ // build expressions.
+ // hashSHA1(a)
+ auto node_a = TreeExprBuilder::MakeField(field_a);
+ auto hashSha2 = TreeExprBuilder::MakeFunction("hashSHA512", {node_a},
utf8());
+ auto expr_0 = TreeExprBuilder::MakeExpression(hashSha2, res_0);
+ auto sha512 = TreeExprBuilder::MakeFunction("sha512", {node_a}, utf8());
+ auto expr_0_sha512 = TreeExprBuilder::MakeExpression(sha512, res_0_sha512);
+
+ auto node_b = TreeExprBuilder::MakeField(field_b);
+ auto hashSha2_1 = TreeExprBuilder::MakeFunction("hashSHA512", {node_b},
utf8());
+ auto expr_1 = TreeExprBuilder::MakeExpression(hashSha2_1, res_1);
+ auto sha512_1 = TreeExprBuilder::MakeFunction("sha512", {node_b}, utf8());
+ auto expr_1_sha512 = TreeExprBuilder::MakeExpression(sha512_1, res_1_sha512);
+
+ auto node_c = TreeExprBuilder::MakeField(field_c);
+ auto hashSha2_2 = TreeExprBuilder::MakeFunction("hashSHA512", {node_c},
utf8());
+ auto expr_2 = TreeExprBuilder::MakeExpression(hashSha2_2, res_2);
+ auto sha512_2 = TreeExprBuilder::MakeFunction("sha512", {node_c}, utf8());
+ auto expr_2_sha512 = TreeExprBuilder::MakeExpression(sha512_2, res_2_sha512);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(
+ schema, {expr_0, expr_0_sha512, expr_1, expr_1_sha512, expr_2,
expr_2_sha512},
+ TestConfiguration(), &projector);
+ ASSERT_OK(status) << status.message();
+
+ // Create a row-batch with some sample data
+ int32_t num_records = 3;
+
+ std::string first_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ]";
+ std::string second_string =
+ "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+ "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+ auto array_utf8 =
+ MakeArrowArrayUtf8({"", first_string, second_string}, {false, true,
true});
+
+ auto validity_array = {false, true, true};
+
+ auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+ auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849},
validity_array);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+ {array_utf8, array_int64,
array_float64});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ ASSERT_OK(status);
+
+ // Checks that the response for the hashSHA2, sha512 and sha2 are equals for
the first
+ // field of utf8 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1)); // hashSha2 and
sha512
+
+ // Checks that the response for the hashSHA2, sha512 and sha2 are equals for
the second
+ // field of int64 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3)); // hashSha2 and
sha512
+
+ // Checks that the response for the hashSHA2, sha512 and sha2 are equals for
the first
+ // field of float64 type
+ EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4),
+ outputs.at(5)); // hashSha2 and sha512 responses
+}
+
TEST_F(TestHash, TestSha256FunctionsAlias) {
// schema for input fields
auto field_a = field("a", utf8());
@@ -752,4 +978,5 @@ TEST_F(TestHash, TestMD5Varlen) {
EXPECT_NE(value_at_position, response->GetScalar(i -
1).ValueOrDie()->ToString());
}
}
+
} // namespace gandiva