rkavanap commented on a change in pull request #11049:
URL: https://github.com/apache/arrow/pull/11049#discussion_r741133878
##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1762,6 +1762,41 @@ const char* replace_utf8_utf8_utf8(gdv_int64 context,
const char* text,
out_len);
}
+// Returns the quoted string (Includes escape character for any single quotes)
+// E.g. DONT -> 'DONT'
+// DON'T -> 'DON\'T'
+FORCE_INLINE
+const char* quote_utf8(gdv_int64 context, const char* in, gdv_int32 in_len,
+ gdv_int32* out_len) {
+ if (in_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ // try to allocate double size output string (worst case)
+ auto out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
in_len * 2));
Review comment:
shouldn't this be in_len * 2 + 1 for the worst case of the loop?
##########
File path: cpp/src/gandiva/precompiled/string_ops_test.cc
##########
@@ -912,6 +912,33 @@ TEST(TestStringOps, TestReverse) {
ctx.Reset();
}
+TEST(TestStringOps, TestQuote) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = quote_utf8(ctx_ptr, "dont", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\'dont\'");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = quote_utf8(ctx_ptr, "abc", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\'abc\'");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = quote_utf8(ctx_ptr, "don't", 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\'don\\'t\'");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = quote_utf8(ctx_ptr, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = quote_utf8(ctx_ptr, "'", 1, &out_len);
Review comment:
this is a good test. Maybe another one like this
"''''''''''''''''''''''" with more quotes?
##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1762,6 +1762,41 @@ const char* replace_utf8_utf8_utf8(gdv_int64 context,
const char* text,
out_len);
}
+// Returns the quoted string (Includes escape character for any single quotes)
+// E.g. DONT -> 'DONT'
+// DON'T -> 'DON\'T'
+FORCE_INLINE
+const char* quote_utf8(gdv_int64 context, const char* in, gdv_int32 in_len,
+ gdv_int32* out_len) {
+ if (in_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ // try to allocate double size output string (worst case)
+ auto out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
in_len * 2));
+ if (out == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for
output string");
+ *out_len = 0;
+ return "";
+ }
+ // The output string should start with a single quote
+ out[0] = '\'';
+ gdv_int32 counter = 1;
+ for (int i = 0; i < in_len; i++) {
+ if (memcmp(in + i, "'", 1) == 0) {
+ out[counter] = '\\';
+ counter++;
+ out[counter] = '\'';
+ } else {
+ out[counter] = in[i];
+ }
+ counter++;
+ }
+ out[counter] = '\'';
Review comment:
won't this overflow for the case we have all quotes. Say we have 2
quotes and in_len is 2.. we allocate only 4 bytes, but this logic may need 5
bytes as follows '\'\'' which is 5 chars and not 4.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]