lidavidm commented on a change in pull request #10356: URL: https://github.com/apache/arrow/pull/10356#discussion_r635582443
########## File path: cpp/src/arrow/compute/kernels/scalar_string.cc ########## @@ -494,6 +494,93 @@ const FunctionDoc match_substring_regex_doc( "position.\n" "Null inputs emit null. The pattern must be given in MatchSubstringOptions."), {"strings"}, "MatchSubstringOptions"); + +// SQL LIKE match + +/// Convert a SQL-style LIKE pattern (using '%' and '_') into a regex pattern +std::string MakeLikeRegex(const MatchSubstringOptions& options) { + // Allow . to match \n + std::string like_pattern = "(?s:^"; + like_pattern.reserve(options.pattern.size() + 7); + bool escaped = false; + for (const char c : options.pattern) { + if (!escaped && c == '%') { + like_pattern.append(".*"); + } else if (!escaped && c == '_') { + like_pattern.append("."); + } else if (!escaped && c == '\\') { + escaped = true; + } else { + switch (c) { + case '.': + case '?': + case '+': + case '*': + case '^': + case '$': + case '\\': + case '[': + case '{': + case '(': + case ')': + case '|': { Review comment: regex101.com lets you play around with RE2 (listed as 'Golang'). It seems escaping isn't necessary but doesn't hurt. Impala's authors may have wanted to be extra-safe. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org