Mryange commented on code in PR #47102:
URL: https://github.com/apache/doris/pull/47102#discussion_r2250602315


##########
be/src/vec/functions/function_string.cpp:
##########
@@ -67,6 +67,64 @@ struct StringASCII {
     }
 };
 
+struct NameParseDataSize {
+    static constexpr auto name = "parse_data_size";
+};
+
+static const std::map<std::string_view, Int128> UNITS = {
+        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 
10},
+        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 
30},
+        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 
50},
+        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 
70},
+        {"YB", static_cast<Int128>(1) << 80}};
+
+struct ParseDataSize {
+    using ReturnType = DataTypeInt128;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnVector<Int128>;
+
+    static Status vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
+                         PaddedPODArray<Int128>& res) {
+        auto size = offsets.size();
+        res.resize(size);
+        for (int i = 0; i < size; ++i) {
+            const char* raw_str = reinterpret_cast<const 
char*>(&data[offsets[i - 1]]);
+            int str_size = offsets[i] - offsets[i - 1];
+            res[i] = parse_data_size(std::string_view(raw_str, str_size));
+        }
+        return Status::OK();
+    }
+
+    static Int128 parse_data_size(const std::string_view& dataSize) {
+        int digit_length = 0;
+        for (char c : dataSize) {
+            if (isdigit(c) || c == '.') {
+                digit_length++;
+            } else {
+                break;
+            }
+        }
+
+        if (digit_length == 0) {
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                                   "Invalid Input argument \"{}\" of function 
parse_data_size",
+                                   dataSize);
+        }
+        // 123.45MB--->123.45 : MB
+        double value = std::stod(std::string(dataSize.substr(0, 
digit_length)));

Review Comment:
   stod 可能会抛异常的(比如非法的输入),要不catch一下?
   或者不要用std的解析,用Doris自己的解析
   ```C++
   struct CastToFloat {
       static inline bool from_string(const StringRef& from, ToCppT& to, 
CastParameters& params) {
           return try_read_float_text(to, from);
       }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to