sort_heap (#53803)

panxiaolei Wed, 23 Jul 2025 22:29:27 -0700

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 9e7ab8d0292 [Chore](gutil) remove gutil/numbers util/sort_heap (#53803)
9e7ab8d0292 is described below

commit 9e7ab8d0292ce54e31287e7288894a25ba2905df
Author: Pxl <[email protected]>
AuthorDate: Thu Jul 24 13:29:13 2025 +0800

    [Chore](gutil) remove gutil/numbers util/sort_heap (#53803)
    
    remove gutil/numbers util/sort_heap
---
 be/src/gutil/CMakeLists.txt                        |   1 -
 be/src/gutil/strings/numbers.cc                    | 195 ---------------------
 be/src/gutil/strings/numbers.h                     |  73 --------
 be/src/olap/types.h                                |   8 +-
 be/src/tools/meta_tool.cpp                         |   1 -
 be/src/util/mysql_row_buffer.cpp                   |   8 +-
 be/src/util/sort_heap.h                            | 121 -------------
 be/src/util/to_string.h                            |  74 ++++++++
 be/src/vec/data_types/data_type_number.cpp         |   8 +-
 be/src/vec/data_types/data_type_number_base.cpp    |  10 +-
 .../data_types/serde/data_type_number_serde.cpp    |   4 +-
 be/src/vec/exec/format/column_type_convert.h       |   8 +-
 be/test/gutil/strings/numbers_test.cpp             |  71 ++++----
 be/test/olap/delete_handler_test.cpp               |   1 -
 be/test/olap/rowid_conversion_test.cpp             |   1 -
 be/test/olap/tablet_test.cpp                       |   1 -
 be/test/testutil/test_util.cpp                     |   1 -
 be/test/util/sort_heap_test.cpp                    |  94 ----------
 be/test/vec/function/cast/cast_test.h              |  12 +-
 19 files changed, 130 insertions(+), 562 deletions(-)

diff --git a/be/src/gutil/CMakeLists.txt b/be/src/gutil/CMakeLists.txt
index 9c5a6f6231b..0a1b8375b3b 100644
--- a/be/src/gutil/CMakeLists.txt
+++ b/be/src/gutil/CMakeLists.txt
@@ -26,7 +26,6 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/gutil")
 SET(SOURCE_FILES
         hash/city.cc
         ref_counted.cc
-        strings/numbers.cc
         threading/thread_collision_warner.cc)
 
 if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86" OR 
"${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86_64")
diff --git a/be/src/gutil/strings/numbers.cc b/be/src/gutil/strings/numbers.cc
deleted file mode 100644
index 347c57b994f..00000000000
--- a/be/src/gutil/strings/numbers.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-// Refactored from contributions of various authors in strings/strutil.cc
-//
-// This file contains string processing functions related to
-// numeric values.
-
-#include "gutil/strings/numbers.h"
-
-#include <fmt/compile.h>
-#include <fmt/format.h>
-
-#include <cfloat>
-
-#include "absl/strings/ascii.h"
-#include "butil/macros.h"
-#include "common/logging.h"
-
-bool safe_strtof(const char* str, float* value) {
-    char* endptr;
-#ifdef _MSC_VER // has no strtof()
-    *value = strtod(str, &endptr);
-#else
-    *value = strtof(str, &endptr);
-#endif
-    if (endptr != str) {
-        while (absl::ascii_isspace(*endptr)) ++endptr;
-    }
-    // Ignore range errors from strtod/strtof.
-    // The values it returns on underflow and
-    // overflow are the right fallback in a
-    // robust setting.
-    return *str != '\0' && *endptr == '\0';
-}
-
-bool safe_strtod(const char* str, double* value) {
-    char* endptr;
-    *value = strtod(str, &endptr);
-    if (endptr != str) {
-        while (absl::ascii_isspace(*endptr)) ++endptr;
-    }
-    // Ignore range errors from strtod.  The values it
-    // returns on underflow and overflow are the right
-    // fallback in a robust setting.
-    return *str != '\0' && *endptr == '\0';
-}
-
-bool safe_strtof(const std::string& str, float* value) {
-    return safe_strtof(str.c_str(), value);
-}
-
-bool safe_strtod(const std::string& str, double* value) {
-    return safe_strtod(str.c_str(), value);
-}
-
-// ----------------------------------------------------------------------
-// SimpleDtoa()
-// SimpleFtoa()
-// DoubleToBuffer()
-// FloatToBuffer()
-//    We want to print the value without losing precision, but we also do
-//    not want to print more digits than necessary.  This turns out to be
-//    trickier than it sounds.  Numbers like 0.2 cannot be represented
-//    exactly in binary.  If we print 0.2 with a very large precision,
-//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
-//    On the other hand, if we set the precision too low, we lose
-//    significant digits when printing numbers that actually need them.
-//    It turns out there is no precision value that does the right thing
-//    for all numbers.
-//
-//    Our strategy is to first try printing with a precision that is never
-//    over-precise, then parse the result with strtod() to see if it
-//    matches.  If not, we print again with a precision that will always
-//    give a precise result, but may use more digits than necessary.
-//
-//    An arguably better strategy would be to use the algorithm described
-//    in "How to Print Floating-Point Numbers Accurately" by Steele &
-//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
-//    however, that the following implementation is about as fast as
-//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
-//    will not scale well on multi-core machines.  DMG's code is slightly
-//    more accurate (in that it will never use more digits than
-//    necessary), but this is probably irrelevant for most users.
-//
-//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
-//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
-//    one in that it makes guesses and then uses strtod() to check them.
-//    Their implementation is faster because they use their own code to
-//    generate the digits in the first place rather than use snprintf(),
-//    thus avoiding format string parsing overhead.  However, this makes
-//    it considerably more complicated than the following implementation,
-//    and it is embedded in a larger library.  If speed turns out to be
-//    an issue, we could re-implement this in terms of their
-//    implementation.
-// ----------------------------------------------------------------------
-int DoubleToBuffer(double value, int width, char* buffer) {
-    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
-    // platforms these days.  Just in case some system exists where DBL_DIG
-    // is significantly larger -- and risks overflowing our buffer -- we have
-    // this assert.
-    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
-
-    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
-
-    // The snprintf should never overflow because the buffer is significantly
-    // larger than the precision we asked for.
-    DCHECK(snprintf_result > 0 && snprintf_result < width);
-
-    if (strtod(buffer, nullptr) != value) {
-        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
-
-        // Should never overflow; see above.
-        DCHECK(snprintf_result > 0 && snprintf_result < width);
-    }
-
-    return snprintf_result;
-}
-
-int FloatToBuffer(float value, int width, char* buffer) {
-    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
-    // platforms these days.  Just in case some system exists where FLT_DIG
-    // is significantly larger -- and risks overflowing our buffer -- we have
-    // this assert.
-    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
-
-    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
-
-    // The snprintf should never overflow because the buffer is significantly
-    // larger than the precision we asked for.
-    DCHECK(snprintf_result > 0 && snprintf_result < width);
-
-    float parsed_value;
-    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
-        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
-
-        // Should never overflow; see above.
-        DCHECK(snprintf_result > 0 && snprintf_result < width);
-    }
-
-    return snprintf_result;
-}
-
-// refer to: 
https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10.html
-int FastDoubleToBuffer(double value, char* buffer) {
-    char* end = nullptr;
-    // output NaN and Infinity to be compatible with most of the 
implementations
-    if (std::isnan(value)) {
-        static constexpr char nan_str[] = "NaN";
-        static constexpr int nan_str_len = sizeof(nan_str) - 1;
-        memcpy(buffer, nan_str, nan_str_len);
-        end = buffer + nan_str_len;
-    } else if (std::isinf(value)) {
-        static constexpr char inf_str[] = "Infinity";
-        static constexpr int inf_str_len = sizeof(inf_str) - 1;
-        static constexpr char neg_inf_str[] = "-Infinity";
-        static constexpr int neg_inf_str_len = sizeof(neg_inf_str) - 1;
-        if (value > 0) {
-            memcpy(buffer, inf_str, inf_str_len);
-            end = buffer + inf_str_len;
-        } else {
-            memcpy(buffer, neg_inf_str, neg_inf_str_len);
-            end = buffer + neg_inf_str_len;
-        }
-    } else {
-        end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
-    }
-    *end = '\0';
-    return end - buffer;
-}
-
-int FastFloatToBuffer(float value, char* buffer) {
-    char* end = nullptr;
-    // output NaN and Infinity to be compatible with most of the 
implementations
-    if (std::isnan(value)) {
-        static constexpr char nan_str[] = "NaN";
-        static constexpr int nan_str_len = sizeof(nan_str) - 1;
-        memcpy(buffer, nan_str, nan_str_len);
-        end = buffer + nan_str_len;
-    } else if (std::isinf(value)) {
-        static constexpr char inf_str[] = "Infinity";
-        static constexpr int inf_str_len = sizeof(inf_str) - 1;
-        static constexpr char neg_inf_str[] = "-Infinity";
-        static constexpr int neg_inf_str_len = sizeof(neg_inf_str) - 1;
-        if (value > 0) {
-            memcpy(buffer, inf_str, inf_str_len);
-            end = buffer + inf_str_len;
-        } else {
-            memcpy(buffer, neg_inf_str, neg_inf_str_len);
-            end = buffer + neg_inf_str_len;
-        }
-    } else {
-        end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
-    }
-    *end = '\0';
-    return end - buffer;
-}
diff --git a/be/src/gutil/strings/numbers.h b/be/src/gutil/strings/numbers.h
deleted file mode 100644
index a4faa745583..00000000000
--- a/be/src/gutil/strings/numbers.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-// Maintainer: [email protected] (Michael Chastain)
-//
-// Convert strings to numbers or numbers to strings.
-
-#pragma once
-
-#include <cstdint>
-
-// ----------------------------------------------------------------------
-// FastIntToBuffer()
-// FastHexToBuffer()
-// FastHex64ToBuffer()
-// FastHex32ToBuffer()
-// FastTimeToBuffer()
-//    These are intended for speed.  FastIntToBuffer() assumes the
-//    integer is non-negative.  FastHexToBuffer() puts output in
-//    hex rather than decimal.  FastTimeToBuffer() puts the output
-//    into RFC822 format.
-//
-//    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
-//    padded to exactly 16 bytes (plus one byte for '\0')
-//
-//    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
-//    padded to exactly 8 bytes (plus one byte for '\0')
-//
-//    All functions take the output buffer as an arg.  FastInt() uses
-//    at most 22 bytes, FastTime() uses exactly 30 bytes.  They all
-//    return a pointer to the beginning of the output, which for
-//    FastHex() may not be the beginning of the input buffer.  (For
-//    all others, we guarantee that it is.)
-//
-//    NOTE: In 64-bit land, sizeof(time_t) is 8, so it is possible
-//    to pass to FastTimeToBuffer() a time whose year cannot be
-//    represented in 4 digits. In this case, the output buffer
-//    will contain the string "Invalid:<value>"
-// ----------------------------------------------------------------------
-
-// Previously documented minimums -- the buffers provided must be at least this
-// long, though these numbers are subject to change:
-//     Int32, UInt32:        12 bytes
-//     Int64, UInt64, Hex:   22 bytes
-//     Time:                 30 bytes
-//     Hex32:                 9 bytes
-//     Hex64:                17 bytes
-// Use kFastToBufferSize rather than hardcoding constants.
-static const int kFastToBufferSize = 32;
-
-// ----------------------------------------------------------------------
-// SimpleDtoa()
-// SimpleFtoa()
-// DoubleToBuffer()
-// FloatToBuffer()
-//    Description: converts a double or float to a string which, if
-//    passed to strtod(), will produce the exact same original double
-//    (except in case of NaN; all NaNs are considered the same value).
-//    We try to keep the string short but it's not guaranteed to be as
-//    short as possible.
-//
-//    DoubleToBuffer() and FloatToBuffer() write the text to the given
-//    buffer and return it.  The buffer must be at least
-//    kDoubleToBufferSize bytes for doubles and kFloatToBufferSize
-//    bytes for floats.  kFastToBufferSize is also guaranteed to be large
-//    enough to hold either.
-//
-//    Return value: string
-// ----------------------------------------------------------------------
-
-int DoubleToBuffer(double i, int width, char* buffer);
-int FloatToBuffer(float i, int width, char* buffer);
-
-int FastDoubleToBuffer(double i, char* buffer);
-int FastFloatToBuffer(float i, char* buffer);
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index 03d6a2ecab1..b5b107d382c 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -36,7 +36,6 @@
 #include "common/config.h"
 #include "common/consts.h"
 #include "common/status.h"
-#include "gutil/strings/numbers.h"
 #include "olap/decimal12.h"
 #include "olap/olap_common.h"
 #include "olap/olap_define.h"
@@ -49,6 +48,7 @@
 #include "util/mysql_global.h"
 #include "util/slice.h"
 #include "util/string_parser.hpp"
+#include "util/to_string.h"
 #include "util/types.h"
 #include "vec/common/arena.h"
 #include "vec/core/extended_types.h"
@@ -1044,8 +1044,7 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_FLOAT>
     }
     static std::string to_string(const void* src) {
         char buf[1024] = {'\0'};
-        int length =
-                FloatToBuffer(*reinterpret_cast<const CppType*>(src), 
MAX_FLOAT_STR_LENGTH, buf);
+        int length = to_buffer(*reinterpret_cast<const CppType*>(src), 
MAX_FLOAT_STR_LENGTH, buf);
         DCHECK(length >= 0) << "gcvt float failed, float value="
                             << *reinterpret_cast<const CppType*>(src);
         return std::string(buf);
@@ -1066,8 +1065,7 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DOUBLE>
     }
     static std::string to_string(const void* src) {
         char buf[1024] = {'\0'};
-        int length =
-                DoubleToBuffer(*reinterpret_cast<const CppType*>(src), 
MAX_DOUBLE_STR_LENGTH, buf);
+        int length = to_buffer(*reinterpret_cast<const CppType*>(src), 
MAX_DOUBLE_STR_LENGTH, buf);
         DCHECK(length >= 0) << "gcvt float failed, float value="
                             << *reinterpret_cast<const CppType*>(src);
         return std::string(buf);
diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp
index 103846b80e1..199214566fa 100644
--- a/be/src/tools/meta_tool.cpp
+++ b/be/src/tools/meta_tool.cpp
@@ -27,7 +27,6 @@
 #include <string>
 
 #include "common/status.h"
-#include "gutil/strings/numbers.h"
 #include "io/fs/file_reader.h"
 #include "io/fs/local_file_system.h"
 #include "json2pb/pb_to_json.h"
diff --git a/be/src/util/mysql_row_buffer.cpp b/be/src/util/mysql_row_buffer.cpp
index 497295b6ca2..9818af499e7 100644
--- a/be/src/util/mysql_row_buffer.cpp
+++ b/be/src/util/mysql_row_buffer.cpp
@@ -30,11 +30,11 @@
 
 #include "common/logging.h"
 #include "date_func.h"
-#include "gutil/strings/numbers.h"
 #include "olap/olap_common.h"
 #include "runtime/decimalv2_value.h"
 #include "runtime/large_int_value.h"
 #include "util/mysql_global.h"
+#include "util/to_string.h"
 #include "vec/runtime/ipv4_value.h"
 #include "vec/runtime/ipv6_value.h"
 #include "vec/runtime/vdatetime_value.h" // IWYU pragma: keep
@@ -180,10 +180,8 @@ static char* add_largeint(int128_t data, char* pos, bool 
dynamic_mode) {
 template <typename T>
 char* add_float(T data, char* pos, bool dynamic_mode) {
     int length = 0;
-    if constexpr (std::is_same_v<T, float>) {
-        length = FastFloatToBuffer(data, pos + !dynamic_mode);
-    } else if constexpr (std::is_same_v<T, double>) {
-        length = FastDoubleToBuffer(data, pos + !dynamic_mode);
+    if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
+        length = fast_to_buffer(data, pos + !dynamic_mode);
     }
     if (!dynamic_mode) {
         int1store(pos++, length);
diff --git a/be/src/util/sort_heap.h b/be/src/util/sort_heap.h
deleted file mode 100644
index b6df8f395e2..00000000000
--- a/be/src/util/sort_heap.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <cassert>
-#include <queue>
-#include <utility>
-
-#include "common/compiler_util.h" // IWYU pragma: keep
-
-namespace doris {
-
-template <typename T, typename Sequence, typename Compare>
-class SortingHeap {
-public:
-    SortingHeap(const Compare& comp) : _comp(comp) {}
-
-    bool is_valid() const { return !_queue.empty(); }
-
-    T top() { return _queue.front(); }
-
-    size_t size() { return _queue.size(); }
-
-    bool empty() { return _queue.empty(); }
-
-    T& next_child() { return _queue[_next_child_index()]; }
-
-    void replace_top(T new_top) {
-        *_queue.begin() = new_top;
-        update_top();
-    }
-
-    void remove_top() {
-        std::pop_heap(_queue.begin(), _queue.end(), _comp);
-        _queue.pop_back();
-        _next_idx = 0;
-    }
-
-    void push(T cursor) {
-        _queue.emplace_back(cursor);
-        std::push_heap(_queue.begin(), _queue.end(), _comp);
-        _next_idx = 0;
-    }
-
-    Sequence&& sorted_seq() {
-        std::sort_heap(_queue.begin(), _queue.end(), _comp);
-        return std::move(_queue);
-    }
-
-private:
-    Sequence _queue;
-    Compare _comp;
-
-    /// Cache comparison between first and second child if the order in queue 
has not been changed.
-    size_t _next_idx = 0;
-
-    size_t _next_child_index() {
-        if (_next_idx == 0) {
-            _next_idx = 1;
-            if (_queue.size() > 2 && _comp(_queue[1], _queue[2])) ++_next_idx;
-        }
-
-        return _next_idx;
-    }
-
-    void update_top() {
-        size_t size = _queue.size();
-        if (size < 2) return;
-
-        auto begin = _queue.begin();
-
-        size_t child_idx = _next_child_index();
-        auto child_it = begin + child_idx;
-
-        /// Check if we are in order.
-        if (_comp(*child_it, *begin)) return;
-
-        _next_idx = 0;
-
-        auto curr_it = begin;
-        auto top = *begin;
-        do {
-            /// We are not in heap-order, swap the parent with it's largest 
child.
-            *curr_it = *child_it;
-            curr_it = child_it;
-
-            // recompute the child based off of the updated parent
-            child_idx = 2 * child_idx + 1;
-
-            if (child_idx >= size) break;
-
-            child_it = begin + child_idx;
-
-            if ((child_idx + 1) < size && _comp(*child_it, *(child_it + 1))) {
-                /// Right child exists and is greater than left child.
-                ++child_it;
-                ++child_idx;
-            }
-
-            /// Check if we are in order.
-        } while (!(_comp(*child_it, top)));
-        *curr_it = top;
-    }
-};
-} // namespace doris
diff --git a/be/src/util/to_string.h b/be/src/util/to_string.h
index 203ea8a0ae4..6132f14c0b4 100644
--- a/be/src/util/to_string.h
+++ b/be/src/util/to_string.h
@@ -18,14 +18,20 @@
 
 #pragma once
 
+#include <absl/strings/ascii.h>
+#include <fmt/compile.h>
 #include <fmt/format.h>
+#include <glog/logging.h>
 
+#include <cfloat>
 #include <map>
 #include <set>
 #include <string>
+#include <type_traits>
 #include <vector>
 
 namespace doris {
+
 template <typename T>
 std::string to_string(const T& t) {
     return fmt::format("{}", t);
@@ -70,4 +76,72 @@ std::string to_string(const std::set<T>& s) {
     return "{" + to_string(s.begin(), s.end()) + "}";
 }
 
+// refer to: 
https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10.html
+template <typename T>
+inline int fast_to_buffer(T value, char* buffer) {
+    char* end = nullptr;
+    // output NaN and Infinity to be compatible with most of the 
implementations
+    if (std::isnan(value)) {
+        static constexpr char nan_str[] = "NaN";
+        static constexpr int nan_str_len = sizeof(nan_str) - 1;
+        memcpy(buffer, nan_str, nan_str_len);
+        end = buffer + nan_str_len;
+    } else if (std::isinf(value)) {
+        static constexpr char inf_str[] = "Infinity";
+        static constexpr int inf_str_len = sizeof(inf_str) - 1;
+        static constexpr char neg_inf_str[] = "-Infinity";
+        static constexpr int neg_inf_str_len = sizeof(neg_inf_str) - 1;
+        if (value > 0) {
+            memcpy(buffer, inf_str, inf_str_len);
+            end = buffer + inf_str_len;
+        } else {
+            memcpy(buffer, neg_inf_str, neg_inf_str_len);
+            end = buffer + neg_inf_str_len;
+        }
+    } else {
+        end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
+    }
+    *end = '\0';
+    return end - buffer;
+}
+
+template <typename T>
+int to_buffer(const T& value, int width, char* buffer) {
+    constexpr int DIG = (std::is_same_v<T, double> ? DBL_DIG : FLT_DIG);
+    int snprintf_result = snprintf(buffer, width, "%.*g", DIG, value);
+    // The snprintf should never overflow because the buffer is significantly
+    // larger than the precision we asked for.
+    DCHECK(snprintf_result > 0 && snprintf_result < width);
+
+    bool need_reformat = false;
+    if constexpr (std::is_same_v<T, double>) {
+        need_reformat = (strtod(buffer, nullptr) != value);
+    } else {
+        auto safe_strtof = [](const char* str, float* value) {
+            char* endptr;
+            *value = strtof(str, &endptr);
+            if (endptr != str) {
+                while (absl::ascii_isspace(*endptr)) {
+                    ++endptr;
+                }
+            }
+            // Ignore range errors from strtod/strtof.
+            // The values it returns on underflow and
+            // overflow are the right fallback in a
+            // robust setting.
+            return *str != '\0' && *endptr == '\0';
+        };
+
+        if (float parsed_value; !safe_strtof(buffer, &parsed_value) || 
parsed_value != value) {
+            need_reformat = true;
+        }
+    }
+
+    if (need_reformat) {
+        snprintf_result = snprintf(buffer, width, "%.*g", DIG + 2, value);
+        // Should never overflow; see above.
+        DCHECK(snprintf_result > 0 && snprintf_result < width);
+    }
+    return snprintf_result;
+}
 } // namespace doris
diff --git a/be/src/vec/data_types/data_type_number.cpp 
b/be/src/vec/data_types/data_type_number.cpp
index c4b1bc1213e..e7d295fce58 100644
--- a/be/src/vec/data_types/data_type_number.cpp
+++ b/be/src/vec/data_types/data_type_number.cpp
@@ -19,12 +19,8 @@
 
 #include <fmt/format.h>
 
-#include <cmath>
-#include <type_traits>
-
-#include "gutil/strings/numbers.h"
 #include "util/mysql_global.h"
-#include "vec/io/io_helper.h"
+#include "util/to_string.h"
 
 namespace doris::vectorized {
 
@@ -47,7 +43,7 @@ void DataTypeNumber<T>::push_number(
         const typename PrimitiveTypeTraits<T>::ColumnItemType& num) const {
     if constexpr (T == TYPE_FLOAT) {
         char buf[MAX_FLOAT_STR_LENGTH + 2];
-        int len = FloatToBuffer(num, MAX_FLOAT_STR_LENGTH + 2, buf);
+        int len = to_buffer(num, MAX_FLOAT_STR_LENGTH + 2, buf);
         chars.insert(buf, buf + len);
     } else if constexpr (T == TYPE_LARGEINT ||
                          std::numeric_limits<
diff --git a/be/src/vec/data_types/data_type_number_base.cpp 
b/be/src/vec/data_types/data_type_number_base.cpp
index 7543aa5df2e..d4c1c4558b8 100644
--- a/be/src/vec/data_types/data_type_number_base.cpp
+++ b/be/src/vec/data_types/data_type_number_base.cpp
@@ -31,11 +31,11 @@
 
 #include "agent/be_exec_version_manager.h"
 #include "common/cast_set.h"
-#include "gutil/strings/numbers.h"
 #include "runtime/large_int_value.h"
 #include "runtime/primitive_type.h"
 #include "util/mysql_global.h"
 #include "util/string_parser.hpp"
+#include "util/to_string.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_const.h"
 #include "vec/columns/column_vector.h"
@@ -63,10 +63,10 @@ void DataTypeNumberBase<T>::to_string(const IColumn& 
column, size_t row_num,
     } else if constexpr (std::is_same_v<typename 
PrimitiveTypeTraits<T>::ColumnItemType, float>) {
         // fmt::format_to maybe get inaccurate results at float type, so we 
use gutil implement.
         char buf[MAX_FLOAT_STR_LENGTH + 2];
-        int len = FloatToBuffer(assert_cast<const typename 
PrimitiveTypeTraits<T>::ColumnType&,
-                                            TypeCheckOnRelease::DISABLE>(*ptr)
-                                        .get_element(row_num),
-                                MAX_FLOAT_STR_LENGTH + 2, buf);
+        int len = to_buffer(assert_cast<const typename 
PrimitiveTypeTraits<T>::ColumnType&,
+                                        TypeCheckOnRelease::DISABLE>(*ptr)
+                                    .get_element(row_num),
+                            MAX_FLOAT_STR_LENGTH + 2, buf);
         ostr.write(buf, len);
     } else if constexpr (std::is_integral<typename 
PrimitiveTypeTraits<T>::ColumnItemType>::value ||
                          std::numeric_limits<
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index 10566eafe29..b3d3c9d85fd 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -21,10 +21,10 @@
 
 #include "common/exception.h"
 #include "common/status.h"
-#include "gutil/strings/numbers.h"
 #include "util/jsonb_document.h"
 #include "util/jsonb_writer.h"
 #include "util/mysql_global.h"
+#include "util/to_string.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/core/types.h"
 #include "vec/functions/cast/cast_to_boolean.h"
@@ -177,7 +177,7 @@ Status 
DataTypeNumberSerDe<T>::serialize_one_cell_to_json(const IColumn& column,
     } else if constexpr (T == TYPE_FLOAT) {
         // fmt::format_to maybe get inaccurate results at float type, so we 
use gutil implement.
         char buf[MAX_FLOAT_STR_LENGTH + 2];
-        int len = FloatToBuffer(data, MAX_FLOAT_STR_LENGTH + 2, buf);
+        int len = to_buffer(data, MAX_FLOAT_STR_LENGTH + 2, buf);
         bw.write(buf, len);
     } else if constexpr (is_int_or_bool(T) ||
                          std::numeric_limits<
diff --git a/be/src/vec/exec/format/column_type_convert.h 
b/be/src/vec/exec/format/column_type_convert.h
index b5a3f24cb69..b6253cf6a70 100644
--- a/be/src/vec/exec/format/column_type_convert.h
+++ b/be/src/vec/exec/format/column_type_convert.h
@@ -23,7 +23,7 @@
 #include <cstdint>
 #include <utility>
 
-#include "gutil/strings/numbers.h"
+#include "util/to_string.h"
 #include "vec/columns/column_string.h"
 #include "vec/common/arithmetic_overflow.h"
 #include "vec/core/types.h"
@@ -287,11 +287,7 @@ public:
                 }
                 char buf[128];
                 int strlen;
-                if constexpr (SrcPrimitiveType == TYPE_FLOAT) {
-                    strlen = FastFloatToBuffer(src_data[i], buf);
-                } else {
-                    strlen = FastDoubleToBuffer(src_data[i], buf);
-                }
+                strlen = fast_to_buffer(src_data[i], buf);
                 string_col.insert_data(buf, strlen);
             } else {
                 std::string value;
diff --git a/be/test/gutil/strings/numbers_test.cpp 
b/be/test/gutil/strings/numbers_test.cpp
index fcabb679358..6a25a2ad610 100644
--- a/be/test/gutil/strings/numbers_test.cpp
+++ b/be/test/gutil/strings/numbers_test.cpp
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "gutil/strings/numbers.h"
-
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
 
@@ -24,6 +22,7 @@
 
 #include "gtest/gtest_pred_impl.h"
 #include "util/mysql_global.h"
+#include "util/to_string.h"
 
 namespace doris {
 
@@ -33,50 +32,50 @@ TEST_F(NumbersTest, test_float_to_buffer) {
     char buffer1[100];
     char buffer2[100];
     float v1 = 0;
-    int len1 = FloatToBuffer(v1, MAX_FLOAT_STR_LENGTH, buffer1);
-    int len2 = FastFloatToBuffer(v1, buffer2);
+    int len1 = to_buffer(v1, MAX_FLOAT_STR_LENGTH, buffer1);
+    int len2 = fast_to_buffer(v1, buffer2);
     EXPECT_EQ(std::string("0"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("0"), std::string(buffer2, len2));
 
     float v2 = 0.00;
-    len1 = FloatToBuffer(v2, MAX_FLOAT_STR_LENGTH, buffer1);
-    len2 = FastFloatToBuffer(v2, buffer2);
+    len1 = to_buffer(v2, MAX_FLOAT_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v2, buffer2);
     EXPECT_EQ(std::string("0"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("0"), std::string(buffer2, len2));
 
     float v3 = 20001230;
-    len1 = FloatToBuffer(v3, MAX_FLOAT_STR_LENGTH, buffer1);
-    len2 = FastFloatToBuffer(v3, buffer2);
+    len1 = to_buffer(v3, MAX_FLOAT_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v3, buffer2);
     EXPECT_EQ(std::string("20001230"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("20001230"), std::string(buffer2, len2));
 
     float v4 = static_cast<float>(200012303131);
-    len1 = FloatToBuffer(v4, MAX_FLOAT_STR_LENGTH, buffer1);
-    len2 = FastFloatToBuffer(v4, buffer2);
+    len1 = to_buffer(v4, MAX_FLOAT_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v4, buffer2);
     EXPECT_EQ(std::string("2.000123e+11"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("200012300000"), std::string(buffer2, len2));
 
     float v5 = -3167.3131;
-    len1 = FloatToBuffer(v5, MAX_FLOAT_STR_LENGTH, buffer1);
-    len2 = FastFloatToBuffer(v5, buffer2);
+    len1 = to_buffer(v5, MAX_FLOAT_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v5, buffer2);
     EXPECT_EQ(std::string("-3167.313"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("-3167.313"), std::string(buffer2, len2));
 
     float v6 = std::numeric_limits<float>::max();
-    len1 = FloatToBuffer(v6, MAX_FLOAT_STR_LENGTH, buffer1);
-    len2 = FastFloatToBuffer(v6, buffer2);
+    len1 = to_buffer(v6, MAX_FLOAT_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v6, buffer2);
     EXPECT_EQ(std::string("3.4028235e+38"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("3.4028235e+38"), std::string(buffer2, len2));
 
     float v7 = std::numeric_limits<float>::min();
-    len1 = FloatToBuffer(v7, MAX_FLOAT_STR_LENGTH, buffer1);
-    len2 = FastFloatToBuffer(v7, buffer2);
+    len1 = to_buffer(v7, MAX_FLOAT_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v7, buffer2);
     EXPECT_EQ(std::string("1.1754944e-38"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("1.1754944e-38"), std::string(buffer2, len2));
 
     float v8 = 0 - std::numeric_limits<float>::max();
-    len1 = FloatToBuffer(v8, MAX_FLOAT_STR_LENGTH, buffer1);
-    len2 = FastFloatToBuffer(v8, buffer2);
+    len1 = to_buffer(v8, MAX_FLOAT_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v8, buffer2);
     EXPECT_EQ(std::string("-3.4028235e+38"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("-3.4028235e+38"), std::string(buffer2, len2));
 }
@@ -85,50 +84,50 @@ TEST_F(NumbersTest, test_double_to_buffer) {
     char buffer1[100];
     char buffer2[100];
     double v1 = 0;
-    int len1 = DoubleToBuffer(v1, MAX_DOUBLE_STR_LENGTH, buffer1);
-    int len2 = FastDoubleToBuffer(v1, buffer2);
+    int len1 = to_buffer(v1, MAX_DOUBLE_STR_LENGTH, buffer1);
+    int len2 = fast_to_buffer(v1, buffer2);
     EXPECT_EQ(std::string("0"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("0"), std::string(buffer2, len2));
 
     double v2 = 0.00;
-    len1 = DoubleToBuffer(v2, MAX_DOUBLE_STR_LENGTH, buffer1);
-    len2 = FastDoubleToBuffer(v2, buffer2);
+    len1 = to_buffer(v2, MAX_DOUBLE_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v2, buffer2);
     EXPECT_EQ(std::string("0"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("0"), std::string(buffer2, len2));
 
     double v3 = 20001230;
-    len1 = DoubleToBuffer(v3, MAX_DOUBLE_STR_LENGTH, buffer1);
-    len2 = FastDoubleToBuffer(v3, buffer2);
+    len1 = to_buffer(v3, MAX_DOUBLE_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v3, buffer2);
     EXPECT_EQ(std::string("20001230"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("20001230"), std::string(buffer2, len2));
 
     double v4 = 200012303131;
-    len1 = DoubleToBuffer(v4, MAX_DOUBLE_STR_LENGTH, buffer1);
-    len2 = FastDoubleToBuffer(v4, buffer2);
+    len1 = to_buffer(v4, MAX_DOUBLE_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v4, buffer2);
     EXPECT_EQ(std::string("200012303131"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("200012303131"), std::string(buffer2, len2));
 
     double v5 = -3167.3131;
-    len1 = DoubleToBuffer(v5, MAX_DOUBLE_STR_LENGTH, buffer1);
-    len2 = FastDoubleToBuffer(v5, buffer2);
+    len1 = to_buffer(v5, MAX_DOUBLE_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v5, buffer2);
     EXPECT_EQ(std::string("-3167.3131"), std::string(buffer1, len1));
     EXPECT_EQ(std::string("-3167.3131"), std::string(buffer2, len2));
 
     double v6 = std::numeric_limits<double>::max();
-    len1 = DoubleToBuffer(v6, MAX_DOUBLE_STR_LENGTH, buffer1);
-    len2 = FastDoubleToBuffer(v6, buffer2);
+    len1 = to_buffer(v6, MAX_DOUBLE_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v6, buffer2);
     EXPECT_EQ(std::string("1.7976931348623157e+308"), std::string(buffer1, 
len1));
     EXPECT_EQ(std::string("1.7976931348623157e+308"), std::string(buffer2, 
len2));
 
     double v7 = std::numeric_limits<double>::min();
-    len1 = DoubleToBuffer(v7, MAX_DOUBLE_STR_LENGTH, buffer1);
-    len2 = FastDoubleToBuffer(v7, buffer2);
+    len1 = to_buffer(v7, MAX_DOUBLE_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v7, buffer2);
     EXPECT_EQ(std::string("2.2250738585072014e-308"), std::string(buffer1, 
len1));
     EXPECT_EQ(std::string("2.2250738585072014e-308"), std::string(buffer2, 
len2));
 
     double v8 = 0 - std::numeric_limits<double>::max();
-    len1 = DoubleToBuffer(v8, MAX_DOUBLE_STR_LENGTH, buffer1);
-    len2 = FastDoubleToBuffer(v8, buffer2);
+    len1 = to_buffer(v8, MAX_DOUBLE_STR_LENGTH, buffer1);
+    len2 = fast_to_buffer(v8, buffer2);
     EXPECT_EQ(std::string("-1.7976931348623157e+308"), std::string(buffer1, 
len1));
     EXPECT_EQ(std::string("-1.7976931348623157e+308"), std::string(buffer2, 
len2));
 }
@@ -158,7 +157,7 @@ TEST_F(NumbersTest, test_float_to_buffer2) {
     for (const auto& value : input_values) {
         std::string str;
         str.resize(64);
-        auto len = FastFloatToBuffer(value.first, str.data());
+        auto len = fast_to_buffer(value.first, str.data());
         str.resize(len);
         EXPECT_EQ(str, value.second);
     }
@@ -187,7 +186,7 @@ TEST_F(NumbersTest, test_double_to_buffer2) {
     for (const auto& value : input_values) {
         std::string str;
         str.resize(64);
-        auto len = FastDoubleToBuffer(value.first, str.data());
+        auto len = fast_to_buffer(value.first, str.data());
         str.resize(len);
         EXPECT_EQ(str, value.second);
     }
diff --git a/be/test/olap/delete_handler_test.cpp 
b/be/test/olap/delete_handler_test.cpp
index 0d45d28c284..6dadda8c763 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -35,7 +35,6 @@
 
 #include "common/config.h"
 #include "gtest/gtest_pred_impl.h"
-#include "gutil/strings/numbers.h"
 #include "io/fs/local_file_system.h"
 #include "json2pb/json_to_pb.h"
 #include "olap/olap_common.h"
diff --git a/be/test/olap/rowid_conversion_test.cpp 
b/be/test/olap/rowid_conversion_test.cpp
index 118f257a696..54261f365ea 100644
--- a/be/test/olap/rowid_conversion_test.cpp
+++ b/be/test/olap/rowid_conversion_test.cpp
@@ -36,7 +36,6 @@
 
 #include "common/status.h"
 #include "gtest/gtest_pred_impl.h"
-#include "gutil/strings/numbers.h"
 #include "io/fs/local_file_system.h"
 #include "io/io_common.h"
 #include "json2pb/json_to_pb.h"
diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp
index b1eb148e8a3..ea7ddc24bbf 100644
--- a/be/test/olap/tablet_test.cpp
+++ b/be/test/olap/tablet_test.cpp
@@ -27,7 +27,6 @@
 #include <memory>
 
 #include "gtest/gtest_pred_impl.h"
-#include "gutil/strings/numbers.h"
 #include "http/action/pad_rowset_action.h"
 #include "http/http_request.h"
 #include "io/fs/local_file_system.h"
diff --git a/be/test/testutil/test_util.cpp b/be/test/testutil/test_util.cpp
index 95a9b0929c2..ec84d32966b 100644
--- a/be/test/testutil/test_util.cpp
+++ b/be/test/testutil/test_util.cpp
@@ -43,7 +43,6 @@
 
 #include "absl/strings/substitute.h"
 #include "gflags/gflags.h"
-#include "gutil/strings/numbers.h"
 #include "olap/olap_common.h"
 
 DEFINE_bool(gen_out, false, "generate expected check data for test");
diff --git a/be/test/util/sort_heap_test.cpp b/be/test/util/sort_heap_test.cpp
deleted file mode 100644
index e129cc4e0aa..00000000000
--- a/be/test/util/sort_heap_test.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "util/sort_heap.h"
-
-#include <gtest/gtest-message.h>
-#include <gtest/gtest-test-part.h>
-
-#include <algorithm>
-#include <queue>
-#include <random>
-#include <vector>
-
-#include "gtest/gtest_pred_impl.h"
-
-namespace doris {
-
-struct int_comparator {
-    bool operator()(int l, int r) { return l < r; }
-};
-
-class SortHeapTest : public testing::Test {
-public:
-    SortHeapTest() = default;
-    ~SortHeapTest() = default;
-
-private:
-    std::default_random_engine _re;
-    int_comparator cp;
-};
-
-TEST_F(SortHeapTest, IntBasicTest) {
-    std::priority_queue<int, std::vector<int>, int_comparator> pq(cp);
-    doris::SortingHeap<int, std::vector<int>, int_comparator> sh(cp);
-    // test default result
-    const int test_case_1 = 10;
-    for (size_t i = 0; i < test_case_1; ++i) {
-        int res = _re();
-        pq.push(res);
-        sh.push(res);
-    }
-    EXPECT_EQ(pq.size(), sh.size());
-    for (size_t i = 0; i < test_case_1; ++i) {
-        EXPECT_EQ(sh.top(), pq.top());
-        pq.pop();
-        sh.remove_top();
-    }
-}
-
-TEST_F(SortHeapTest, IntReplaceTest) {
-    std::priority_queue<int, std::vector<int>, int_comparator> pq(cp);
-    doris::SortingHeap<int, std::vector<int>, int_comparator> sh(cp);
-    // test replace
-    const int test_case_2 = 10;
-    for (size_t i = 0; i < test_case_2; ++i) {
-        int res = _re();
-        pq.push(res);
-        sh.push(res);
-    }
-
-    for (size_t i = 0; i < 2 * test_case_2; ++i) {
-        int res = _re();
-        EXPECT_EQ(sh.top(), pq.top());
-        if (res < sh.top()) {
-            sh.replace_top(res);
-            pq.pop();
-            pq.push(res);
-        }
-    }
-
-    EXPECT_EQ(sh.size(), pq.size());
-    int container_size = sh.size();
-    for (size_t i = 0; i < container_size; ++i) {
-        EXPECT_EQ(sh.top(), pq.top());
-        pq.pop();
-        sh.remove_top();
-    }
-}
-
-} // namespace doris
diff --git a/be/test/vec/function/cast/cast_test.h 
b/be/test/vec/function/cast/cast_test.h
index 9c8cc1f39d0..3992dc7e038 100644
--- a/be/test/vec/function/cast/cast_test.h
+++ b/be/test/vec/function/cast/cast_test.h
@@ -24,6 +24,7 @@
 
 #include "runtime/primitive_type.h"
 #include "testutil/column_helper.h"
+#include "util/to_string.h"
 #include "vec/core/column_with_type_and_name.h"
 #include "vec/core/types.h"
 #include "vec/function/function_test_util.h"
@@ -450,15 +451,10 @@ struct FunctionCastTest : public testing::Test {
                         (*ofs_const_expected_result)
                                 << fmt::format("-- !sql_{}_{}_{} --\n", 
table_index, i,
                                                enable_strict_cast ? "strict" : 
"non_strict");
-                        if constexpr (std::is_same_v<ToValueType, float>) {
+                        if constexpr (std::is_same_v<ToValueType, float> ||
+                                      std::is_same_v<ToValueType, double>) {
                             char buffer[64] = {0};
-                            FastFloatToBuffer(test_data_set[i].second, buffer);
-                            (*ofs_const_expected_result)
-                                    << fmt::format("{}\t{}\n\n", 
test_data_set[i].first, buffer);
-
-                        } else if constexpr (std::is_same_v<ToValueType, 
double>) {
-                            char buffer[64] = {0};
-                            FastDoubleToBuffer(test_data_set[i].second, 
buffer);
+                            fast_to_buffer(test_data_set[i].second, buffer);
                             (*ofs_const_expected_result)
                                     << fmt::format("{}\t{}\n\n", 
test_data_set[i].first, buffer);
                         } else {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [Chore](gutil) remove gutil/numbers util/sort_heap (#53803)

Reply via email to