projjal commented on a change in pull request #10195:
URL: https://github.com/apache/arrow/pull/10195#discussion_r630911301



##########
File path: cpp/src/gandiva/function_registry_string.cc
##########
@@ -236,6 +236,12 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(),
                      kResultNullIfNull, "binary_string", 
NativeFunction::kNeedsContext),
 
+      NativeFunction("to_hex", {}, DataTypeVector{binary()}, utf8(), 
kResultNullIfNull,
+                     "to_hex_binary", NativeFunction::kNeedsContext),
+
+      NativeFunction("from_hex", {}, DataTypeVector{utf8()}, binary(), 
kResultNullIfNull,
+                     "from_hex", NativeFunction::kNeedsContext),

Review comment:
       nit
   > from_hex_utf8

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1521,83 @@ const char* binary_string(gdv_int64 context, const char* 
text, gdv_int32 text_le
   return ret;
 }
 
+// Gets a binary object and returns its hexadecimal representation. That 
representation
+// maps each byte in the input to a 2-length string containing a hexadecimal 
number.
+// - Examples:
+//     - foo -> 666F6F = 66[f] 6F[o] 6F[o]
+//     - bar -> 626172 = 62[b] 61[a] 72[r]
+FORCE_INLINE
+const char* to_hex_binary(int64_t context, const char* text, int32_t text_len,
+                          int32_t* out_len) {
+  if (text_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  auto ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
text_len * 2));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
+    *out_len = 0;
+    return "";
+  }
+
+  uint32_t ret_index = 0;
+  uint32_t max_len = static_cast<uint32_t>(text_len) * 2;
+  uint32_t max_char_to_write = 2 * max_len + 1;
+
+  for (gdv_int32 i = 0; i < text_len; i++) {
+    DCHECK(ret_index >= 0 && ret_index < max_len);
+
+    int32_t ch = static_cast<int32_t>(text[i]) & 0xFF;
+
+    ret_index += snprintf(ret + ret_index, max_char_to_write, "%02X", ch);
+  }
+
+  *out_len = static_cast<int32_t>(ret_index);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_hex(int64_t context, const char* text, int32_t text_len,
+                     int32_t* out_len) {
+  if (text_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // the input string should have a length multiple of two
+  if (text_len % 2 != 0) {
+    gdv_fn_context_set_error_msg(
+        context, "Error parsing hex string, length was not a multiple of 
two.");
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
text_len));

Review comment:
       text_len/2?

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1521,42 @@ const char* binary_string(gdv_int64 context, const char* 
text, gdv_int32 text_le
   return ret;
 }
 
+// Gets a binary object and returns its hexadecimal representation. That 
representation
+// maps each byte in the input to a 2-length string containing a hexadecimal 
number.
+// - Examples:
+//     - foo -> 666F6F = 66[f] 6F[o] 6F[o]
+//     - bar -> 626172 = 62[b] 61[a] 72[r]
+FORCE_INLINE
+const char* to_hex_binary(gdv_int64 context, const char* text, gdv_int32 
text_len,
+                          gdv_int32* out_len) {
+  if (text_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  auto ret =
+      reinterpret_cast<gdv_utf8>(gdv_fn_context_arena_malloc(context, text_len 
* 2));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
+    *out_len = 0;
+    return "";
+  }
+
+  gdv_uint32 ret_index = 0;
+  gdv_uint32 max_len = static_cast<gdv_uint32>(text_len) * 2;
+  gdv_uint32 max_char_to_write = 4;
+
+  for (gdv_int32 i = 0; i < text_len; i++) {
+    DCHECK(ret_index >= 0 && ret_index < max_len);
+
+    gdv_int32 ch = static_cast<gdv_int32>(text[i]) & 0xFF;
+
+    ret_index += snprintf(ret + ret_index, max_char_to_write, "%02X", ch);

Review comment:
       Why set 2 * max_len + 1 here? I meant allocate 2 * max_len + 1  sized 
buffer or else you will be writing past the buffer.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to