vvellanki commented on a change in pull request #11551:
URL: https://github.com/apache/arrow/pull/11551#discussion_r745316220
##########
File path: cpp/src/gandiva/gdv_function_stubs.cc
##########
@@ -794,6 +795,94 @@ const char* gdv_fn_initcap_utf8(int64_t context, const
char* data, int32_t data_
*out_len = out_idx;
return out;
}
+
+GANDIVA_EXPORT
+const char* gdv_fn_mask_first_n(int64_t context, const char* data, int32_t
data_len,
+ int32_t n_to_mask, int32_t* out_len) {
+ if (data_len <= 0) {
+ *out_len = 0;
+ return nullptr;
+ }
+
+ int32_t end_idx = data_len < n_to_mask ? data_len : n_to_mask;
+
+ *out_len = data_len;
+
+ char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
*out_len));
+ if (out == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for
output string");
+ *out_len = 0;
+ return nullptr;
+ }
+
+ static char mask_array[] = {
Review comment:
This mask_array is same for mask_first_n and mask_last_n. Can this be
moved to the file and not in this function?
##########
File path: cpp/src/gandiva/gdv_function_stubs.cc
##########
@@ -794,6 +795,94 @@ const char* gdv_fn_initcap_utf8(int64_t context, const
char* data, int32_t data_
*out_len = out_idx;
return out;
}
+
+GANDIVA_EXPORT
+const char* gdv_fn_mask_first_n(int64_t context, const char* data, int32_t
data_len,
+ int32_t n_to_mask, int32_t* out_len) {
+ if (data_len <= 0) {
+ *out_len = 0;
+ return nullptr;
+ }
+
+ int32_t end_idx = data_len < n_to_mask ? data_len : n_to_mask;
+
+ *out_len = data_len;
+
+ char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
*out_len));
+ if (out == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for
output string");
+ *out_len = 0;
+ return nullptr;
+ }
+
+ static char mask_array[] = {
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
'\0', '\0',
Review comment:
Can you populate this for all 255 values? The input need not be < 128.
Also, organise these into 16 values per line so that it becomes easy to read.
As of now, this is 14 values per line - you can organise these into 16 values
per line in 16 lines
##########
File path: cpp/src/gandiva/gdv_function_stubs.cc
##########
@@ -794,6 +795,94 @@ const char* gdv_fn_initcap_utf8(int64_t context, const
char* data, int32_t data_
*out_len = out_idx;
return out;
}
+
+GANDIVA_EXPORT
+const char* gdv_fn_mask_first_n(int64_t context, const char* data, int32_t
data_len,
+ int32_t n_to_mask, int32_t* out_len) {
+ if (data_len <= 0) {
+ *out_len = 0;
+ return nullptr;
+ }
+
+ int32_t end_idx = data_len < n_to_mask ? data_len : n_to_mask;
+
+ *out_len = data_len;
+
+ char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
*out_len));
+ if (out == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for
output string");
+ *out_len = 0;
+ return nullptr;
+ }
+
+ static char mask_array[] = {
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
'\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
'\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
'\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', 'n', 'n', 'n', 'n', 'n', 'n',
'n', 'n',
+ 'n', 'n', '\0', '\0', '\0', '\0', '\0', '\0', '\0', 'X', 'X', 'X',
'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X',
'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', '\0', '\0', '\0', '\0', '\0',
'\0', 'x',
+ 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x',
+ 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', '\0'};
+
+ // do the masking
+ for (int i = 0; i < end_idx; ++i) {
+ if (mask_array[(unsigned char)data[i]] != '\0') {
Review comment:
Every if condition hurts performance. Why not populate the masked value
in the array? For e.g.
mask_array[1] = (char)1
This way, you can remove this if-condition
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]