This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4577cab3e81fede477b6a9ec8868133bab325ba2 Author: Mihaly Szjatinya <msz...@pm.me> AuthorDate: Thu Sep 4 18:01:11 2025 +0200 IMPALA-13806: Avoid per-function std::locale creation A new std::locale is constructed at each mask*() function call while in UTF8_MODE. Instead use a static local object. Change-Id: I9a611ba1b175b0ab1c8f0d1de3b2439be70a68f7 Reviewed-on: http://gerrit.cloudera.org:8080/23380 Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Reviewed-by: Quanlong Huang <huangquanl...@gmail.com> --- be/src/exprs/mask-functions-ir.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/be/src/exprs/mask-functions-ir.cc b/be/src/exprs/mask-functions-ir.cc index 689da79d0..fd40d3364 100644 --- a/be/src/exprs/mask-functions-ir.cc +++ b/be/src/exprs/mask-functions-ir.cc @@ -54,7 +54,7 @@ const static int UNMASKED_VAL = -1; /// Returns the masked code point. static inline uint32_t MaskTransform(uint32_t val, int masked_upper_char, int masked_lower_char, int masked_digit_char, int masked_other_char, - std::locale* loc = nullptr) { + const std::locale* loc = nullptr) { // Fast code path for masking ascii characters only. if (loc == nullptr) { if ('A' <= val && val <= 'Z') { @@ -146,11 +146,9 @@ static StringVal MaskSubStrUtf8(FunctionContext* ctx, const StringVal& val, // Collect code points at range [start, end - 1) and mask them. vector<uint32_t> masked_code_points; // Create unicode locale for checking upper/lower cases or digits. - // TODO(quanlong): Avoid creating this everytime if this is time/resource-consuming. - boost::locale::generator gen; - unique_ptr<std::locale> loc = make_unique<std::locale>(gen("en_US.UTF-8")); + static const std::locale& loc = boost::locale::generator()("en_US.UTF-8"); // Check facet existence to avoid predicates throws exception. - if (!std::has_facet<std::ctype<wchar_t>>(*loc)) { + if (!std::has_facet<std::ctype<wchar_t>>(loc)) { ctx->SetError("Cannot mask unicode strings since locale en_US.UTF-8 not found!"); return StringVal(); } @@ -160,7 +158,7 @@ static StringVal MaskSubStrUtf8(FunctionContext* ctx, const StringVal& val, uint32_t codepoint = utf8_codecvt<char>::to_unicode(cvt_state, p, p_end); if (CheckAndWarnCodePoint(ctx, codepoint)) return StringVal::null(); codepoint = MaskTransform(codepoint, masked_upper_char, masked_lower_char, - masked_digit_char, masked_other_char, loc.get()); + masked_digit_char, masked_other_char, &loc); masked_code_points.push_back(codepoint); result_bytes += utf::utf_traits<char>::width(codepoint); ++char_cnt;