This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 585662b37af [improvement](http) Support to acquire md5 digest of the
file to download (#35807)
585662b37af is described below
commit 585662b37af9c77ee0caf6bdf4bec7c81d44366e
Author: walter <[email protected]>
AuthorDate: Tue Jun 4 10:05:00 2024 +0800
[improvement](http) Support to acquire md5 digest of the file to download
(#35807)
---
be/src/http/http_client.cpp | 46 ++++++++++++++++++-
be/src/http/http_client.h | 5 +-
be/src/http/utils.cpp | 37 +++++++++------
be/src/http/utils.h | 3 +-
be/test/http/http_client_test.cpp | 96 +++++++++++++++++++++++++++++++++++++++
5 files changed, 169 insertions(+), 18 deletions(-)
diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp
index d7a6c9c9665..b8ef9834341 100644
--- a/be/src/http/http_client.cpp
+++ b/be/src/http/http_client.cpp
@@ -24,12 +24,36 @@
#include <ostream>
#include "common/config.h"
+#include "http/http_headers.h"
#include "http/http_status.h"
#include "util/stack_util.h"
namespace doris {
-HttpClient::HttpClient() {}
+static const char* header_error_msg(CURLHcode code) {
+ switch (code) {
+ case CURLHE_OK:
+ return "OK";
+ case CURLHE_BADINDEX:
+ return "header exists but not with this index ";
+ case CURLHE_MISSING:
+ return "no such header exists";
+ case CURLHE_NOHEADERS:
+ return "no headers at all exist (yet)";
+ case CURLHE_NOREQUEST:
+ return "no request with this number was used";
+ case CURLHE_OUT_OF_MEMORY:
+ return "out of memory while processing";
+ case CURLHE_BAD_ARGUMENT:
+ return "a function argument was not okay";
+ case CURLHE_NOT_BUILT_IN:
+ return "curl_easy_header() was disabled in the build";
+ default:
+ return "unknown";
+ }
+}
+
+HttpClient::HttpClient() = default;
HttpClient::~HttpClient() {
if (_curl != nullptr) {
@@ -88,7 +112,7 @@ Status HttpClient::init(const std::string& url) {
}
curl_write_callback callback = [](char* buffer, size_t size, size_t nmemb,
void* param) {
- HttpClient* client = (HttpClient*)param;
+ auto* client = (HttpClient*)param;
return client->on_response_data(buffer, size * nmemb);
};
@@ -177,6 +201,24 @@ Status HttpClient::execute(const std::function<bool(const
void* data, size_t len
return Status::OK();
}
+Status HttpClient::get_content_md5(std::string* md5) const {
+ struct curl_header* header_ptr;
+ auto code = curl_easy_header(_curl, HttpHeaders::CONTENT_MD5, 0,
CURLH_HEADER, 0, &header_ptr);
+ if (code == CURLHE_MISSING || code == CURLHE_NOHEADERS) {
+ // no such headers exists
+ md5->clear();
+ return Status::OK();
+ } else if (code != CURLHE_OK) {
+ auto msg = fmt::format("failed to get http header {}: {} ({})",
HttpHeaders::CONTENT_MD5,
+ header_error_msg(code), code);
+ LOG(WARNING) << msg << ", trace=" << get_stack_trace();
+ return Status::HttpError(std::move(msg));
+ }
+
+ *md5 = header_ptr->value;
+ return Status::OK();
+}
+
Status HttpClient::download(const std::string& local_path) {
// set method to GET
set_method(GET);
diff --git a/be/src/http/http_client.h b/be/src/http/http_client.h
index d80f484ce80..e379895a73e 100644
--- a/be/src/http/http_client.h
+++ b/be/src/http/http_client.h
@@ -106,7 +106,7 @@ public:
if (cl < 0) {
return Status::InternalError(
fmt::format("failed to get content length, it should
be a positive value, "
- "actrual is : {}",
+ "actual is : {}",
cl));
}
*length = (uint64_t)cl;
@@ -115,6 +115,9 @@ public:
return Status::InternalError("failed to get content length. err code:
{}", code);
}
+ // Get the value of the header CONTENT-MD5. The output is empty if no such
header exists.
+ Status get_content_md5(std::string* md5) const;
+
long get_http_status() const {
long code;
curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &code);
diff --git a/be/src/http/utils.cpp b/be/src/http/utils.cpp
index 49f9d2c4993..b03017c12a7 100644
--- a/be/src/http/utils.cpp
+++ b/be/src/http/utils.cpp
@@ -22,8 +22,6 @@
#include <sys/stat.h>
#include <unistd.h>
-#include <algorithm>
-#include <memory>
#include <ostream>
#include <vector>
@@ -41,6 +39,7 @@
#include "io/fs/local_file_system.h"
#include "olap/wal/wal_manager.h"
#include "runtime/exec_env.h"
+#include "util/md5.h"
#include "util/path_util.h"
#include "util/url_coding.h"
@@ -56,7 +55,7 @@ std::string encode_basic_auth(const std::string& user, const
std::string& passwd
bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string*
passwd) {
const char k_basic[] = "Basic ";
- auto& auth = req.header(HttpHeaders::AUTHORIZATION);
+ const auto& auth = req.header(HttpHeaders::AUTHORIZATION);
if (auth.compare(0, sizeof(k_basic) - 1, k_basic, sizeof(k_basic) - 1) !=
0) {
return false;
}
@@ -76,8 +75,8 @@ bool parse_basic_auth(const HttpRequest& req, std::string*
user, std::string* pa
}
bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth) {
- auto& token = req.header("token");
- auto& auth_code = req.header(HTTP_AUTH_CODE);
+ const auto& token = req.header("token");
+ const auto& auth_code = req.header(HTTP_AUTH_CODE);
if (!token.empty()) {
auth->token = token;
} else if (!auth_code.empty()) {
@@ -111,25 +110,24 @@ std::string get_content_type(const std::string&
file_name) {
std::string file_ext = path_util::file_extension(file_name);
VLOG_TRACE << "file_name: " << file_name << "; file extension: [" <<
file_ext << "]";
if (file_ext == std::string(".html") || file_ext == std::string(".htm")) {
- return std::string("text/html; charset=utf-8");
+ return "text/html; charset=utf-8";
} else if (file_ext == std::string(".js")) {
- return std::string("application/javascript; charset=utf-8");
+ return "application/javascript; charset=utf-8";
} else if (file_ext == std::string(".css")) {
- return std::string("text/css; charset=utf-8");
+ return "text/css; charset=utf-8";
} else if (file_ext == std::string(".txt")) {
- return std::string("text/plain; charset=utf-8");
+ return "text/plain; charset=utf-8";
} else if (file_ext == std::string(".png")) {
- return std::string("image/png");
+ return "image/png";
} else if (file_ext == std::string(".ico")) {
- return std::string("image/x-icon");
+ return "image/x-icon";
} else {
return "text/plain; charset=utf-8";
}
- return "";
}
void do_file_response(const std::string& file_path, HttpRequest* req,
- bufferevent_rate_limit_group* rate_limit_group) {
+ bufferevent_rate_limit_group* rate_limit_group, bool
is_acquire_md5) {
if (file_path.find("..") != std::string::npos) {
LOG(WARNING) << "Not allowed to read relative path: " << file_path;
HttpChannel::send_error(req, HttpStatus::FORBIDDEN);
@@ -163,6 +161,17 @@ void do_file_response(const std::string& file_path,
HttpRequest* req,
req->add_output_header(HttpHeaders::CONTENT_TYPE,
get_content_type(file_path).c_str());
+ if (is_acquire_md5) {
+ Md5Digest md5;
+
+ void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
+ md5.update(buf, file_size);
+ md5.digest();
+ munmap(buf, file_size);
+
+ req->add_output_header(HttpHeaders::CONTENT_MD5, md5.hex().c_str());
+ }
+
if (req->method() == HttpMethod::HEAD) {
close(fd);
req->add_output_header(HttpHeaders::CONTENT_LENGTH,
std::to_string(file_size).c_str());
@@ -194,7 +203,7 @@ void do_dir_response(const std::string& dir_path,
HttpRequest* req) {
}
bool load_size_smaller_than_wal_limit(int64_t content_length) {
- // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload
content length. If it is empty or equels to 0, it means this streamload
+ // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload
content length. If it is empty or equals to 0, it means this streamload
// is a chunked streamload and we are not sure its size.
// 2. if streamload content length is too large, like larger than 80% of
the WAL constrain.
//
diff --git a/be/src/http/utils.h b/be/src/http/utils.h
index 254d59cf13d..20be6c0fcd7 100644
--- a/be/src/http/utils.h
+++ b/be/src/http/utils.h
@@ -37,7 +37,8 @@ bool parse_basic_auth(const HttpRequest& req, std::string*
user, std::string* pa
bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth);
void do_file_response(const std::string& dir_path, HttpRequest* req,
- bufferevent_rate_limit_group* rate_limit_group =
nullptr);
+ bufferevent_rate_limit_group* rate_limit_group = nullptr,
+ bool is_acquire_md5 = false);
void do_dir_response(const std::string& dir_path, HttpRequest* req);
diff --git a/be/test/http/http_client_test.cpp
b/be/test/http/http_client_test.cpp
index 729a709fb93..c157f1a13c0 100644
--- a/be/test/http/http_client_test.cpp
+++ b/be/test/http/http_client_test.cpp
@@ -17,8 +17,11 @@
#include "http/http_client.h"
+#include <fcntl.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <boost/algorithm/string/predicate.hpp>
@@ -30,6 +33,7 @@
#include "http/http_headers.h"
#include "http/http_request.h"
#include "http/utils.h"
+#include "util/md5.h"
namespace doris {
@@ -43,8 +47,15 @@ public:
return;
}
req->add_output_header(HttpHeaders::CONTENT_TYPE, "text/plain;
version=0.0.4");
+ bool is_acquire_md5 = !req->param("acquire_md5").empty();
if (req->method() == HttpMethod::HEAD) {
req->add_output_header(HttpHeaders::CONTENT_LENGTH,
std::to_string(5).c_str());
+ if (is_acquire_md5) {
+ Md5Digest md5;
+ md5.update("md5sum", 6);
+ md5.digest();
+ req->add_output_header(HttpHeaders::CONTENT_MD5,
md5.hex().c_str());
+ }
HttpChannel::send_reply(req);
} else {
std::string response = "test1";
@@ -80,6 +91,13 @@ public:
}
};
+class HttpDownloadFileHandler : public HttpHandler {
+public:
+ void handle(HttpRequest* req) override {
+ do_file_response("/proc/self/exe", req, nullptr, true);
+ }
+};
+
static EvHttpServer* s_server = nullptr;
static int real_port = 0;
static std::string hostname = "";
@@ -87,6 +105,7 @@ static std::string hostname = "";
static HttpClientTestSimpleGetHandler s_simple_get_handler;
static HttpClientTestSimplePostHandler s_simple_post_handler;
static HttpNotFoundHandler s_not_found_handler;
+static HttpDownloadFileHandler s_download_file_handler;
class HttpClientTest : public testing::Test {
public:
@@ -99,6 +118,7 @@ public:
s_server->register_handler(HEAD, "/simple_get", &s_simple_get_handler);
s_server->register_handler(POST, "/simple_post",
&s_simple_post_handler);
s_server->register_handler(GET, "/not_found", &s_not_found_handler);
+ s_server->register_handler(HEAD, "/download_file",
&s_download_file_handler);
static_cast<void>(s_server->start());
real_port = s_server->get_real_port();
EXPECT_NE(0, real_port);
@@ -203,4 +223,80 @@ TEST_F(HttpClientTest, not_found) {
EXPECT_FALSE(status.ok());
}
+TEST_F(HttpClientTest, header_content_md5) {
+ std::string url = hostname + "/simple_get";
+
+ {
+ // without md5
+ HttpClient client;
+ auto st = client.init(url);
+ EXPECT_TRUE(st.ok());
+ client.set_method(HEAD);
+ client.set_basic_auth("test1", "");
+ st = client.execute();
+ EXPECT_TRUE(st.ok());
+ uint64_t len = 0;
+ st = client.get_content_length(&len);
+ EXPECT_TRUE(st.ok());
+ EXPECT_EQ(5, len);
+ std::string md5;
+ st = client.get_content_md5(&md5);
+ EXPECT_TRUE(st.ok());
+ EXPECT_TRUE(md5.empty());
+ }
+
+ {
+ // with md5
+ HttpClient client;
+ auto st = client.init(url + "?acquire_md5=true");
+ EXPECT_TRUE(st.ok());
+ client.set_method(HEAD);
+ client.set_basic_auth("test1", "");
+ st = client.execute();
+ EXPECT_TRUE(st.ok());
+ uint64_t len = 0;
+ st = client.get_content_length(&len);
+ EXPECT_TRUE(st.ok());
+ EXPECT_EQ(5, len);
+ std::string md5_value;
+ st = client.get_content_md5(&md5_value);
+ EXPECT_TRUE(st.ok());
+
+ Md5Digest md5;
+ md5.update("md5sum", 6);
+ md5.digest();
+ EXPECT_EQ(md5_value, md5.hex());
+ }
+}
+
+TEST_F(HttpClientTest, download_file_md5) {
+ std::string url = hostname + "/download_file";
+ HttpClient client;
+ auto st = client.init(url);
+ EXPECT_TRUE(st.ok());
+ client.set_method(HEAD);
+ client.set_basic_auth("test1", "");
+ st = client.execute();
+ EXPECT_TRUE(st.ok());
+
+ std::string md5_value;
+ st = client.get_content_md5(&md5_value);
+ EXPECT_TRUE(st.ok());
+
+ int fd = open("/proc/self/exe", O_RDONLY);
+ ASSERT_TRUE(fd >= 0);
+ struct stat stat;
+ ASSERT_TRUE(fstat(fd, &stat) >= 0);
+
+ int64_t file_size = stat.st_size;
+ Md5Digest md5;
+ void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
+ md5.update(buf, file_size);
+ md5.digest();
+ munmap(buf, file_size);
+
+ EXPECT_EQ(md5_value, md5.hex());
+ close(fd);
+}
+
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]