This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c960fb5f1f ARROW-15661: [Gandiva][C++] Add SHA512 function (#12404)
c960fb5f1f is described below

commit c960fb5f1f6afbf024b83015f4a0038bedd64d1e
Author: Johnnathan Almeida <[email protected]>
AuthorDate: Wed Jun 22 09:40:30 2022 -0300

    ARROW-15661: [Gandiva][C++] Add SHA512 function (#12404)
    
    This PR was created to implement SHA512 functions in Gandiva side.
    
    This PR implements the follow signatures:
    
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[int8]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[int16]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[int32]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[int64]}
    
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[uint8]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[uint16]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[uint32]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[uint64]}
    
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[float]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[double]}
    
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[boolean]}
    
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[date64]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[date32]}
    
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[time32]}
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[timestamp]}
    
    FunctionSignature{name =hashSHA512, return type =String, param types 
=[string]}
    
    Authored-by: Johnnathan <[email protected]>
    Signed-off-by: Pindikura Ravindra <[email protected]>
---
 cpp/src/gandiva/function_registry_common.h |  10 ++
 cpp/src/gandiva/function_registry_hash.cc  |   5 +
 cpp/src/gandiva/gdv_function_stubs.cc      |   1 -
 cpp/src/gandiva/gdv_function_stubs.h       | 133 ++++++++++++++++
 cpp/src/gandiva/gdv_hash_function_stubs.cc | 247 +++++++++++++++++++++++++++++
 cpp/src/gandiva/hash_utils.cc              |  10 ++
 cpp/src/gandiva/hash_utils.h               |   5 +
 cpp/src/gandiva/hash_utils_test.cc         |  86 ++++++++++
 cpp/src/gandiva/tests/hash_test.cc         | 227 ++++++++++++++++++++++++++
 9 files changed, 723 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/function_registry_common.h 
b/cpp/src/gandiva/function_registry_common.h
index 9ec9be1b54..6fa51b498d 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -235,6 +235,16 @@ typedef std::unordered_map<const FunctionSignature*, const 
NativeFunction*, KeyH
                  kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE),  \
                  NativeFunction::kNeedsContext | 
NativeFunction::kCanReturnErrors)
 
+// HashSHA512 functions that :
+// - NULL handling is of type NULL_NEVER
+// - can return errors
+//
+// The function name includes the base name & input type name. 
gdv_fn_sha512_float64
+#define HASH_SHA512_NULL_NEVER(NAME, ALIASES, TYPE)                            
       \
+  NativeFunction(#NAME, {"sha512"}, DataTypeVector{TYPE()}, utf8(), 
kResultNullNever, \
+                 ARROW_STRINGIFY(gdv_fn_sha512_##TYPE),                        
       \
+                 NativeFunction::kNeedsContext | 
NativeFunction::kCanReturnErrors)
+
 // HashSHA256 functions that :
 // - NULL handling is of type NULL_NEVER
 // - can return errors
diff --git a/cpp/src/gandiva/function_registry_hash.cc 
b/cpp/src/gandiva/function_registry_hash.cc
index b12ac14725..7fd8751f40 100644
--- a/cpp/src/gandiva/function_registry_hash.cc
+++ b/cpp/src/gandiva/function_registry_hash.cc
@@ -38,6 +38,9 @@ namespace gandiva {
 #define HASH_SHA256_NULL_NEVER_FN(name, ALIASES) \
   NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH_SHA256_NULL_NEVER, name, ALIASES)
 
+#define HASH_SHA512_NULL_NEVER_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH_SHA512_NULL_NEVER, name, ALIASES)
+
 #define HASH_MD5_NULL_NEVER_FN(name, ALIASES) \
   NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH_MD5_NULL_NEVER, name, ALIASES)
 
@@ -60,6 +63,8 @@ std::vector<NativeFunction> GetHashFunctionRegistry() {
 
       HASH_SHA256_NULL_NEVER_FN(hashSHA256, {}),
 
+      HASH_SHA512_NULL_NEVER_FN(hashSHA512, {}),
+
       HASH_MD5_NULL_NEVER_FN(hashMD5, {})};
 
   return hash_fn_registry_;
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc 
b/cpp/src/gandiva/gdv_function_stubs.cc
index cf1daba81b..27900c14af 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -26,7 +26,6 @@
 #include "arrow/util/base64.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/double_conversion.h"
-#include "arrow/util/string_view.h"
 #include "arrow/util/value_parsing.h"
 
 #include "gandiva/encrypt_utils.h"
diff --git a/cpp/src/gandiva/gdv_function_stubs.h 
b/cpp/src/gandiva/gdv_function_stubs.h
index d39d294042..a8ce58698e 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -105,6 +105,11 @@ GANDIVA_EXPORT
 const char* gdv_fn_castVARBINARY_int64_int64(int64_t context, gdv_int64 value,
                                              int64_t out_len, int32_t* 
out_length);
 
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_decimal128(int64_t context, int64_t x_high, uint64_t 
x_low,
+                                     int32_t x_precision, int32_t x_scale,
+                                     gdv_boolean x_isvalid, int32_t* 
out_length);
+
 GANDIVA_EXPORT
 const char* gdv_fn_sha256_decimal128(int64_t context, int64_t x_high, uint64_t 
x_low,
                                      int32_t x_precision, int32_t x_scale,
@@ -200,4 +205,132 @@ GANDIVA_EXPORT
 const char* gdv_mask_last_n_utf8_int32(int64_t context, const char* data,
                                        int32_t data_len, int32_t n_to_mask,
                                        int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int8(int64_t context, gdv_int8 value, bool validity,
+                               int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int16(int64_t context, gdv_int16 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int32(int64_t context, gdv_int32 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_int64(int64_t context, gdv_int64 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint8(int64_t context, gdv_uint8 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint16(int64_t context, gdv_uint16 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint32(int64_t context, gdv_uint32 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_uint64(int64_t context, gdv_uint64 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_float32(int64_t context, gdv_float32 value, bool 
validity,
+                                  int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_float64(int64_t context, gdv_float64 value, bool 
validity,
+                                  int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_boolean(int64_t context, gdv_boolean value, bool 
validity,
+                                  int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_date64(int64_t context, gdv_date64 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_date32(int64_t context, gdv_date32 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_time32(int64_t context, gdv_time32 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_timestamp(int64_t context, gdv_timestamp value, bool 
validity,
+                                    int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_utf8(int64_t context, gdv_utf8 value, int32_t 
value_length,
+                               bool value_validity, int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int8(int64_t context, gdv_int8 value, bool validity,
+                               int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int16(int64_t context, gdv_int16 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int32(int64_t context, gdv_int32 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_int64(int64_t context, gdv_int64 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint8(int64_t context, gdv_uint8 value, bool 
validity,
+                                int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint16(int64_t context, gdv_uint16 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint32(int64_t context, gdv_uint32 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_uint64(int64_t context, gdv_uint64 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_float32(int64_t context, gdv_float32 value, bool 
validity,
+                                  int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_float64(int64_t context, gdv_float64 value, bool 
validity,
+                                  int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_boolean(int64_t context, gdv_boolean value, bool 
validity,
+                                  int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_date64(int64_t context, gdv_date64 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_date32(int64_t context, gdv_date32 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_time32(int64_t context, gdv_time32 value, bool 
validity,
+                                 int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_timestamp(int64_t context, gdv_timestamp value, bool 
validity,
+                                    int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_sha256_utf8(int64_t context, gdv_utf8 value, int32_t 
value_length,
+                               bool value_validity, int32_t* out_length);
 }
diff --git a/cpp/src/gandiva/gdv_hash_function_stubs.cc 
b/cpp/src/gandiva/gdv_hash_function_stubs.cc
index 235e8a8961..018b0fbb70 100644
--- a/cpp/src/gandiva/gdv_hash_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_hash_function_stubs.cc
@@ -76,6 +76,31 @@ extern "C" {
     return gandiva::gdv_sha1_hash(context, value, value_length, out_length); \
   }
 
+#define SHA512_HASH_FUNCTION(TYPE)                                             
       \
+  GANDIVA_EXPORT                                                               
       \
+  const char* gdv_fn_sha512_##TYPE(int64_t context, gdv_##TYPE value, bool 
validity,  \
+                                   int32_t* out_length) {                      
       \
+    if (!validity) {                                                           
       \
+      return gandiva::gdv_sha512_hash(context, NULLPTR, 0, out_length);        
       \
+    }                                                                          
       \
+    auto value_as_long = gandiva::gdv_double_to_long((double)value);           
       \
+    const char* result = gandiva::gdv_sha512_hash(context, &value_as_long,     
       \
+                                                  sizeof(value_as_long), 
out_length); \
+    return result;                                                             
       \
+  }
+
+#define SHA512_HASH_FUNCTION_BUF(TYPE)                                         
\
+  GANDIVA_EXPORT                                                               
\
+  const char* gdv_fn_sha512_##TYPE(int64_t context, gdv_##TYPE value,          
\
+                                   int32_t value_length, bool value_validity,  
\
+                                   int32_t* out_length) {                      
\
+    if (!value_validity) {                                                     
\
+      return gandiva::gdv_sha512_hash(context, NULLPTR, 0, out_length);        
\
+    }                                                                          
\
+                                                                               
\
+    return gandiva::gdv_sha512_hash(context, value, value_length, out_length); 
\
+  }
+
 #define SHA256_HASH_FUNCTION(TYPE)                                             
       \
   GANDIVA_EXPORT                                                               
       \
   const char* gdv_fn_sha256_##TYPE(int64_t context, gdv_##TYPE value, bool 
validity,  \
@@ -127,6 +152,9 @@ extern "C" {
 SHA_NUMERIC_BOOL_DATE_PARAMS(MD5_HASH_FUNCTION)
 SHA_VAR_LEN_PARAMS(MD5_HASH_FUNCTION_BUF)
 
+SHA_NUMERIC_BOOL_DATE_PARAMS(SHA512_HASH_FUNCTION)
+SHA_VAR_LEN_PARAMS(SHA512_HASH_FUNCTION_BUF)
+
 SHA_NUMERIC_BOOL_DATE_PARAMS(SHA256_HASH_FUNCTION)
 SHA_VAR_LEN_PARAMS(SHA256_HASH_FUNCTION_BUF)
 
@@ -149,6 +177,18 @@ const char* gdv_fn_md5_decimal128(int64_t context, int64_t 
x_high, uint64_t x_lo
   return gandiva::gdv_md5_hash(context, decimal_128.ToBytes().data(), 16, 
out_length);
 }
 
+GANDIVA_EXPORT
+const char* gdv_fn_sha512_decimal128(int64_t context, int64_t x_high, uint64_t 
x_low,
+                                     int32_t /*x_precision*/, int32_t 
/*x_scale*/,
+                                     gdv_boolean x_isvalid, int32_t* 
out_length) {
+  if (!x_isvalid) {
+    return gandiva::gdv_sha512_hash(context, NULLPTR, 0, out_length);
+  }
+
+  const gandiva::BasicDecimal128 decimal_128(x_high, x_low);
+  return gandiva::gdv_sha512_hash(context, decimal_128.ToBytes().data(), 16, 
out_length);
+}
+
 GANDIVA_EXPORT
 const char* gdv_fn_sha256_decimal128(int64_t context, int64_t x_high, uint64_t 
x_low,
                                      int32_t /*x_precision*/, int32_t 
/*x_scale*/,
@@ -560,6 +600,197 @@ void ExportedHashFunctions::AddMappings(Engine* engine) 
const {
                                   types->i8_ptr_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_sha1_binary));
 
+  // gdv_fn_sha512_int8
+  args = {
+      types->i64_type(),     // context
+      types->i8_type(),      // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_int8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_sha512_int8));
+
+  // gdv_fn_sha512_int16
+  args = {
+      types->i64_type(),     // context
+      types->i16_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_int16",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_int16));
+
+  // gdv_fn_sha512_int32
+  args = {
+      types->i64_type(),     // context
+      types->i32_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_int32",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_int32));
+
+  // gdv_fn_sha512_int32
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_int64",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_int64));
+
+  // gdv_fn_sha512_uint8
+  args = {
+      types->i64_type(),     // context
+      types->i8_type(),      // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_uint8));
+
+  // gdv_fn_sha512_uint16
+  args = {
+      types->i64_type(),     // context
+      types->i16_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint16",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_uint16));
+
+  // gdv_fn_sha512_uint32
+  args = {
+      types->i64_type(),     // context
+      types->i32_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint32",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_uint32));
+
+  // gdv_fn_sha512_uint64
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_uint64",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_uint64));
+
+  // gdv_fn_sha512_float32
+  args = {
+      types->i64_type(),     // context
+      types->float_type(),   // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_float32",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_float32));
+
+  // gdv_fn_sha512_float64
+  args = {
+      types->i64_type(),     // context
+      types->double_type(),  // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_float64",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_float64));
+
+  // gdv_fn_sha512_boolean
+  args = {
+      types->i64_type(),     // context
+      types->i1_type(),      // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_boolean",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_boolean));
+
+  // gdv_fn_sha512_date64
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_date64",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_date64));
+
+  // gdv_fn_sha512_date32
+  args = {
+      types->i64_type(),     // context
+      types->i32_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_date32",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_date32));
+
+  // gdv_fn_sha512_time32
+  args = {
+      types->i64_type(),     // context
+      types->i32_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_time32",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_time32));
+
+  // gdv_fn_sha512_timestamp
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // value
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out_length
+  };
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_timestamp",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_timestamp));
+
+  // gdv_fn_hash_sha512_from_utf8
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // const char*
+      types->i32_type(),     // value_length
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_sha512_utf8));
+
+  // gdv_fn_hash_sha512_from_binary
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // const char*
+      types->i32_type(),     // value_length
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_binary",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_binary));
+
   // gdv_fn_sha256_int8
   args = {
       types->i64_type(),     // context
@@ -765,6 +996,22 @@ void ExportedHashFunctions::AddMappings(Engine* engine) 
const {
   engine->AddGlobalMappingForFunc("gdv_fn_sha1_decimal128",
                                   types->i8_ptr_type() /*return_type*/, args,
                                   
reinterpret_cast<void*>(gdv_fn_sha1_decimal128));
+
+  // gdv_fn_sha512_decimal128
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // high_bits
+      types->i64_type(),     // low_bits
+      types->i32_type(),     // precision
+      types->i32_type(),     // scale
+      types->i1_type(),      // validity
+      types->i32_ptr_type()  // out length
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_sha512_decimal128",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(gdv_fn_sha512_decimal128));
+
   // gdv_fn_sha256_decimal128
   args = {
       types->i64_type(),     // context
diff --git a/cpp/src/gandiva/hash_utils.cc b/cpp/src/gandiva/hash_utils.cc
index 493eec48c2..5019d45ca8 100644
--- a/cpp/src/gandiva/hash_utils.cc
+++ b/cpp/src/gandiva/hash_utils.cc
@@ -22,6 +22,16 @@
 #include "openssl/evp.h"
 
 namespace gandiva {
+
+/// Hashes a generic message using the SHA512 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha512_hash(int64_t context, const void* message, size_t 
message_length,
+                            int32_t* out_length) {
+  constexpr int sha512_result_length = 128;
+  return gdv_hash_using_openssl(context, message, message_length, EVP_sha512(),
+                                sha512_result_length, out_length);
+}
+
 /// Hashes a generic message using the SHA256 algorithm
 GANDIVA_EXPORT
 const char* gdv_sha256_hash(int64_t context, const void* message, size_t 
message_length,
diff --git a/cpp/src/gandiva/hash_utils.h b/cpp/src/gandiva/hash_utils.h
index a7d3b48c30..06e988496b 100644
--- a/cpp/src/gandiva/hash_utils.h
+++ b/cpp/src/gandiva/hash_utils.h
@@ -24,6 +24,11 @@
 #include "openssl/evp.h"
 
 namespace gandiva {
+
+GANDIVA_EXPORT
+const char* gdv_sha512_hash(int64_t context, const void* message, size_t 
message_length,
+                            int32_t* out_length);
+
 GANDIVA_EXPORT
 const char* gdv_sha256_hash(int64_t context, const void* message, size_t 
message_length,
                             int32_t* out_length);
diff --git a/cpp/src/gandiva/hash_utils_test.cc 
b/cpp/src/gandiva/hash_utils_test.cc
index b4d66f1aa6..96f9819e53 100644
--- a/cpp/src/gandiva/hash_utils_test.cc
+++ b/cpp/src/gandiva/hash_utils_test.cc
@@ -56,6 +56,41 @@ TEST(TestShaHashUtils, TestSha1Numeric) {
   }
 }
 
+TEST(TestShaHashUtils, TestSha512Numeric) {
+  gandiva::ExecutionContext ctx;
+
+  auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  std::vector<uint64_t> values_to_be_hashed;
+
+  // Generate a list of values to obtains the SHA1 hash
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.0));
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.1));
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.2));
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.10000001));
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000001));
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(1.000000));
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000002));
+  values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.999999));
+
+  // Checks if the hash value is different for each one of the values
+  std::unordered_set<std::string> sha_values;
+
+  int sha512_size = 128;
+
+  for (auto value : values_to_be_hashed) {
+    int out_length;
+    const char* sha_512 =
+        gandiva::gdv_sha512_hash(ctx_ptr, &value, sizeof(value), &out_length);
+    std::string sha512_as_str(sha_512, out_length);
+    EXPECT_EQ(sha512_as_str.size(), sha512_size);
+
+    // The value can not exists inside the set with the hash results
+    EXPECT_EQ(sha_values.find(sha512_as_str), sha_values.end());
+    sha_values.insert(sha512_as_str);
+  }
+}
+
 TEST(TestShaHashUtils, TestSha256Numeric) {
   gandiva::ExecutionContext ctx;
 
@@ -160,6 +195,57 @@ TEST(TestShaHashUtils, TestSha1Varlen) {
   EXPECT_EQ(sha2_as_str, expected_second_result);
 }
 
+TEST(TestShaHashUtils, TestSha512Varlen) {
+  gandiva::ExecutionContext ctx;
+
+  auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  std::string first_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ]";
+
+  std::string second_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+  std::string third_string = "0";
+
+  // The strings expected hashes are obtained from shell executing the 
following command:
+  // echo -n <output-string> | openssl dgst sha1
+  std::string expected_first_result =
+      
"ea11714806203ca486cbb13783c2f4c52b962072ad69cb1dbc8f2960f0fc7ff5996316fea8607bd1af"
+      "0f1f13542fef677a01f4cec3cbeb1c4a89e8567d366b0e";
+  std::string expected_second_result =
+      
"a5446a30e173baf3aa27800a7d304d16a68b87800723973156ad4362cbe4c136e4b12c950a603f25fc"
+      "3b2e1ea778a1936ee2dbf71d27a3bc0f81498df3ce060c";
+
+  std::string expected_third_result =
+      
"31bca02094eb78126a517b206a88c73cfa9ec6f704c7030d18212cace820f025f00bf0ea68dbf3f3a5"
+      "436ca63b53bf7bf80ad8d5de7d8359d0b7fed9dbc3ab99";
+
+  // Generate the hashes and compare with expected outputs
+  const int sha512_size = 128;
+  int out_length;
+
+  const char* sha_1 = gandiva::gdv_sha512_hash(ctx_ptr, first_string.c_str(),
+                                               first_string.size(), 
&out_length);
+  std::string sha1_as_str(sha_1, out_length);
+  EXPECT_EQ(sha1_as_str.size(), sha512_size);
+  EXPECT_EQ(sha1_as_str, expected_first_result);
+
+  const char* sha_2 = gandiva::gdv_sha512_hash(ctx_ptr, second_string.c_str(),
+                                               second_string.size(), 
&out_length);
+  std::string sha2_as_str(sha_2, out_length);
+  EXPECT_EQ(sha2_as_str.size(), sha512_size);
+  EXPECT_EQ(sha2_as_str, expected_second_result);
+
+  const char* sha_3 = gandiva::gdv_sha512_hash(ctx_ptr, third_string.c_str(),
+                                               third_string.size(), 
&out_length);
+  std::string sha3_as_str(sha_3, out_length);
+  EXPECT_EQ(sha3_as_str.size(), sha512_size);
+  EXPECT_EQ(sha3_as_str, expected_third_result);
+}
+
 TEST(TestShaHashUtils, TestSha256Varlen) {
   gandiva::ExecutionContext ctx;
 
diff --git a/cpp/src/gandiva/tests/hash_test.cc 
b/cpp/src/gandiva/tests/hash_test.cc
index 0a574f3267..de418f4dd7 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -149,6 +149,146 @@ TEST_F(TestHash, TestBuf) {
   }
 }
 
+TEST_F(TestHash, TestSha512Simple) {
+  // schema for input fields
+  auto field_a = field("a", int32());
+  auto field_b = field("b", int64());
+  auto field_c = field("c", float32());
+  auto field_d = field("d", float64());
+  auto schema = arrow::schema({field_a, field_b, field_c, field_d});
+
+  // output fields
+  auto res_0 = field("res0", utf8());
+  auto res_1 = field("res1", utf8());
+  auto res_2 = field("res2", utf8());
+  auto res_3 = field("res3", utf8());
+
+  // build expressions.
+  // hashSHA512(a)
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto hashSha512_1 = TreeExprBuilder::MakeFunction("hashSHA512", {node_a}, 
utf8());
+  auto expr_0 = TreeExprBuilder::MakeExpression(hashSha512_1, res_0);
+
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto hashSha512_2 = TreeExprBuilder::MakeFunction("hashSHA512", {node_b}, 
utf8());
+  auto expr_1 = TreeExprBuilder::MakeExpression(hashSha512_2, res_1);
+
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto hashSha512_3 = TreeExprBuilder::MakeFunction("hashSHA512", {node_c}, 
utf8());
+  auto expr_2 = TreeExprBuilder::MakeExpression(hashSha512_3, res_2);
+
+  auto node_d = TreeExprBuilder::MakeField(field_d);
+  auto hashSha512_4 = TreeExprBuilder::MakeFunction("hashSHA512", {node_d}, 
utf8());
+  auto expr_3 = TreeExprBuilder::MakeExpression(hashSha512_4, res_3);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3},
+                                TestConfiguration(), &projector);
+  ASSERT_OK(status) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 2;
+  auto validity_array = {false, true};
+
+  auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array);
+
+  auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array);
+
+  auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array);
+
+  auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(
+      schema, num_records, {array_int32, array_int64, array_float32, 
array_float64});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  auto response_int32 = outputs.at(0);
+  auto response_int64 = outputs.at(1);
+  auto response_float32 = outputs.at(2);
+  auto response_float64 = outputs.at(3);
+
+  // Checks if the null and zero representation for numeric values
+  // are consistent between the types
+  EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64);
+  EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32);
+  EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64);
+
+  const int sha512_hash_size = 128;
+
+  // Checks if the hash size in response is correct
+  for (int i = 1; i < num_records; ++i) {
+    const auto& value_at_position = 
response_int32->GetScalar(i).ValueOrDie()->ToString();
+
+    EXPECT_EQ(value_at_position.size(), sha512_hash_size);
+    EXPECT_NE(value_at_position,
+              response_int32->GetScalar(i - 1).ValueOrDie()->ToString());
+  }
+}
+
+TEST_F(TestHash, TestSha512Varlen) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto schema = arrow::schema({field_a});
+
+  // output fields
+  auto res_0 = field("res0", utf8());
+
+  // build expressions.
+  // hashSHA512(a)
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto hashSha512 = TreeExprBuilder::MakeFunction("hashSHA512", {node_a}, 
utf8());
+  auto expr_0 = TreeExprBuilder::MakeExpression(hashSha512, res_0);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr_0}, TestConfiguration(), 
&projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 3;
+
+  std::string first_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY "
+      "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]";
+  std::string second_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY "
+      "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+  auto array_a =
+      MakeArrowArrayUtf8({"foo", first_string, second_string}, {false, true, 
true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  auto response = outputs.at(0);
+  const int sha512_hash_size = 128;
+
+  EXPECT_EQ(response->null_count(), 0);
+
+  // Checks that the null value was hashed
+  EXPECT_NE(response->GetScalar(0).ValueOrDie()->ToString(), "");
+  EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString().size(), 
sha512_hash_size);
+
+  // Check that all generated hashes were different
+  for (int i = 1; i < num_records; ++i) {
+    const auto& value_at_position = 
response->GetScalar(i).ValueOrDie()->ToString();
+
+    EXPECT_EQ(value_at_position.size(), sha512_hash_size);
+    EXPECT_NE(value_at_position, response->GetScalar(i - 
1).ValueOrDie()->ToString());
+  }
+}
+
 TEST_F(TestHash, TestSha256Simple) {
   // schema for input fields
   auto field_a = field("a", int32());
@@ -527,6 +667,92 @@ TEST_F(TestHash, TestSha1FunctionsAlias) {
   EXPECT_ARROW_ARRAY_EQUALS(outputs.at(7), outputs.at(8));  // sha and sha1 
responses
 }
 
+TEST_F(TestHash, TestSha512FunctionsAlias) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto field_b = field("c", int64());
+  auto field_c = field("e", float64());
+  auto schema = arrow::schema({field_a, field_b, field_c});
+
+  // output fields
+  auto res_0 = field("res0", utf8());
+  auto res_0_sha512 = field("res0sha512", utf8());
+
+  auto res_1 = field("res1", utf8());
+  auto res_1_sha512 = field("res1sha512", utf8());
+
+  auto res_2 = field("res2", utf8());
+  auto res_2_sha512 = field("res2_sha512", utf8());
+
+  // build expressions.
+  // hashSHA1(a)
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto hashSha2 = TreeExprBuilder::MakeFunction("hashSHA512", {node_a}, 
utf8());
+  auto expr_0 = TreeExprBuilder::MakeExpression(hashSha2, res_0);
+  auto sha512 = TreeExprBuilder::MakeFunction("sha512", {node_a}, utf8());
+  auto expr_0_sha512 = TreeExprBuilder::MakeExpression(sha512, res_0_sha512);
+
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto hashSha2_1 = TreeExprBuilder::MakeFunction("hashSHA512", {node_b}, 
utf8());
+  auto expr_1 = TreeExprBuilder::MakeExpression(hashSha2_1, res_1);
+  auto sha512_1 = TreeExprBuilder::MakeFunction("sha512", {node_b}, utf8());
+  auto expr_1_sha512 = TreeExprBuilder::MakeExpression(sha512_1, res_1_sha512);
+
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto hashSha2_2 = TreeExprBuilder::MakeFunction("hashSHA512", {node_c}, 
utf8());
+  auto expr_2 = TreeExprBuilder::MakeExpression(hashSha2_2, res_2);
+  auto sha512_2 = TreeExprBuilder::MakeFunction("sha512", {node_c}, utf8());
+  auto expr_2_sha512 = TreeExprBuilder::MakeExpression(sha512_2, res_2_sha512);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(
+      schema, {expr_0, expr_0_sha512, expr_1, expr_1_sha512, expr_2, 
expr_2_sha512},
+      TestConfiguration(), &projector);
+  ASSERT_OK(status) << status.message();
+
+  // Create a row-batch with some sample data
+  int32_t num_records = 3;
+
+  std::string first_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ]";
+  std::string second_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+  auto array_utf8 =
+      MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, 
true});
+
+  auto validity_array = {false, true, true};
+
+  auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+  auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, 
validity_array);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+                                           {array_utf8, array_int64, 
array_float64});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  // Checks that the response for the hashSHA2, sha512 and sha2 are equals for 
the first
+  // field of utf8 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1));  // hashSha2 and 
sha512
+
+  // Checks that the response for the hashSHA2, sha512 and sha2 are equals for 
the second
+  // field of int64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3));  // hashSha2 and 
sha512
+
+  // Checks that the response for the hashSHA2, sha512 and sha2 are equals for 
the first
+  // field of float64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4),
+                            outputs.at(5));  // hashSha2 and sha512 responses
+}
+
 TEST_F(TestHash, TestSha256FunctionsAlias) {
   // schema for input fields
   auto field_a = field("a", utf8());
@@ -752,4 +978,5 @@ TEST_F(TestHash, TestMD5Varlen) {
     EXPECT_NE(value_at_position, response->GetScalar(i - 
1).ValueOrDie()->ToString());
   }
 }
+
 }  // namespace gandiva

Reply via email to