This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4577cab3e81fede477b6a9ec8868133bab325ba2
Author: Mihaly Szjatinya <msz...@pm.me>
AuthorDate: Thu Sep 4 18:01:11 2025 +0200

    IMPALA-13806: Avoid per-function std::locale creation
    
    A new std::locale is constructed at each mask*() function call while in
    UTF8_MODE. Instead use a static local object.
    
    Change-Id: I9a611ba1b175b0ab1c8f0d1de3b2439be70a68f7
    Reviewed-on: http://gerrit.cloudera.org:8080/23380
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Reviewed-by: Quanlong Huang <huangquanl...@gmail.com>
---
 be/src/exprs/mask-functions-ir.cc | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/be/src/exprs/mask-functions-ir.cc 
b/be/src/exprs/mask-functions-ir.cc
index 689da79d0..fd40d3364 100644
--- a/be/src/exprs/mask-functions-ir.cc
+++ b/be/src/exprs/mask-functions-ir.cc
@@ -54,7 +54,7 @@ const static int UNMASKED_VAL = -1;
 /// Returns the masked code point.
 static inline uint32_t MaskTransform(uint32_t val, int masked_upper_char,
     int masked_lower_char, int masked_digit_char, int masked_other_char,
-    std::locale* loc = nullptr) {
+    const std::locale* loc = nullptr) {
   // Fast code path for masking ascii characters only.
   if (loc == nullptr) {
     if ('A' <= val && val <= 'Z') {
@@ -146,11 +146,9 @@ static StringVal MaskSubStrUtf8(FunctionContext* ctx, 
const StringVal& val,
   // Collect code points at range [start, end - 1) and mask them.
   vector<uint32_t> masked_code_points;
   // Create unicode locale for checking upper/lower cases or digits.
-  // TODO(quanlong): Avoid creating this everytime if this is 
time/resource-consuming.
-  boost::locale::generator gen;
-  unique_ptr<std::locale> loc = make_unique<std::locale>(gen("en_US.UTF-8"));
+  static const std::locale& loc = boost::locale::generator()("en_US.UTF-8");
   // Check facet existence to avoid predicates throws exception.
-  if (!std::has_facet<std::ctype<wchar_t>>(*loc)) {
+  if (!std::has_facet<std::ctype<wchar_t>>(loc)) {
     ctx->SetError("Cannot mask unicode strings since locale en_US.UTF-8 not 
found!");
     return StringVal();
   }
@@ -160,7 +158,7 @@ static StringVal MaskSubStrUtf8(FunctionContext* ctx, const 
StringVal& val,
     uint32_t codepoint = utf8_codecvt<char>::to_unicode(cvt_state, p, p_end);
     if (CheckAndWarnCodePoint(ctx, codepoint)) return StringVal::null();
     codepoint = MaskTransform(codepoint, masked_upper_char, masked_lower_char,
-        masked_digit_char, masked_other_char, loc.get());
+        masked_digit_char, masked_other_char, &loc);
     masked_code_points.push_back(codepoint);
     result_bytes += utf::utf_traits<char>::width(codepoint);
     ++char_cnt;

Reply via email to