This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 17c1c72f5913514d13fbfc724d1efdb2fa734f19 Author: zhangstar333 <[email protected]> AuthorDate: Tue Jul 4 20:35:35 2023 +0800 [enchanment](udf) add more info when download jar package failed (#21440) when download jar package, some times show the checksum is not equal, but the root reason is unknown, now add some error msg if failed. --- be/src/runtime/user_function_cache.cpp | 39 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp index f7ec0890a6..9edc4dbd46 100644 --- a/be/src/runtime/user_function_cache.cpp +++ b/be/src/runtime/user_function_cache.cpp @@ -25,6 +25,7 @@ #include <unistd.h> #include <atomic> +#include <cstdint> #include <memory> #include <ostream> #include <regex> @@ -55,6 +56,15 @@ struct UserFunctionCacheEntry { : function_id(fid_), checksum(checksum_), lib_file(lib_file_), type(type) {} ~UserFunctionCacheEntry(); + std::string debug_string() { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, + " the info of UserFunctionCacheEntry save in BE, function_id:{}, " + "checksum:{}, lib_file:{}, is_downloaded:{}. ", + function_id, checksum, lib_file, is_downloaded); + return fmt::to_string(error_msg); + } + int64_t function_id = 0; // used to check if this library is valid. std::string checksum; @@ -136,13 +146,17 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std } else if (ends_with(file, ".jar")) { lib_type = LibType::JAR; } else { - return Status::InternalError("unknown library file format: " + file); + return Status::InternalError( + "unknown library file format. the file type is not end with xxx.jar or xxx.so : " + + file); } std::vector<std::string> split_parts = strings::Split(file, "."); if (split_parts.size() != 3 && split_parts.size() != 4) { return Status::InternalError( - "user function's name should be function_id.checksum[.file_name].file_type"); + "user function's name should be function_id.checksum[.file_name].file_type, now " + "the all split parts are by delimiter(.): " + + file); } int64_t function_id = std::stol(split_parts[0]); std::string checksum = split_parts[1]; @@ -150,7 +164,7 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std if (it != _entry_map.end()) { LOG(WARNING) << "meet a same function id user function library, function_id=" << function_id << ", one_checksum=" << checksum - << ", other_checksum=" << it->second->checksum; + << ", other_checksum info: = " << it->second->debug_string(); return Status::InternalError("duplicate function id"); } // create a cache entry and put it into entry map @@ -213,7 +227,7 @@ Status UserFunctionCache::_get_cache_entry(int64_t fid, const std::string& url, } auto st = _load_cache_entry(url, entry); if (!st.ok()) { - LOG(WARNING) << "fail to load cache entry, fid=" << fid; + LOG(WARNING) << "fail to load cache entry, fid=" << fid << " " << file_name << " " << url; // if we load a cache entry failed, I think we should delete this entry cache // even if this cache was valid before. _destroy_cache_entry(entry); @@ -271,10 +285,13 @@ Status UserFunctionCache::_download_lib(const std::string& url, Md5Digest digest; HttpClient client; + int64_t file_size = 0; RETURN_IF_ERROR(client.init(real_url)); Status status; - auto download_cb = [&status, &tmp_file, &fp, &digest](const void* data, size_t length) { + auto download_cb = [&status, &tmp_file, &fp, &digest, &file_size](const void* data, + size_t length) { digest.update(data, length); + file_size = file_size + length; auto res = fwrite(data, length, 1, fp.get()); if (res != 1) { LOG(WARNING) << "fail to write data to file, file=" << tmp_file @@ -288,9 +305,15 @@ Status UserFunctionCache::_download_lib(const std::string& url, RETURN_IF_ERROR(status); digest.digest(); if (!iequal(digest.hex(), entry->checksum)) { - LOG(WARNING) << "UDF's checksum is not equal, one=" << digest.hex() - << ", other=" << entry->checksum; - return Status::InternalError("UDF's library checksum is not match"); + fmt::memory_buffer error_msg; + fmt::format_to( + error_msg, + " The checksum is not equal of {} ({}). The init info of first create entry is:" + "{} But download file check_sum is: {}, file_size is: {}.", + url, real_url, entry->debug_string(), digest.hex(), file_size); + std::string error(fmt::to_string(error_msg)); + LOG(WARNING) << error; + return Status::InternalError(error); } // close this file fp.reset(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
