This is an automated email from the ASF dual-hosted git repository.
mgreber pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 87e53ae41 Implement path parameter handling in webserver
87e53ae41 is described below
commit 87e53ae41fa0e68cb5f9d4e65385dc513f864774
Author: gabriellalotz <[email protected]>
AuthorDate: Thu Oct 10 13:38:55 2024 +0200
Implement path parameter handling in webserver
Introduced path parameter parsing in the webserver to support dynamic
REST API endpoints. Parameters within URIs are now identified using
<parameter_name> notation. Extracted parameters are passed to the
WebRequest as path_params.
Example usage:
Registering a path like "/api/v1/tables/<table_id>" will map the
corresponding part of the URI to the table_id key in the path_params
of the WebRequest.
Query parameter handling won't change with path parameters.
Example for query and path parameters:
Given path "api/v1/tables/<table_id>", we can reach URIs like
"api/v1/tables/1234?var1=1". We can find these in the following way,
assuming we have a WebRequest struct.
req.parsed_args["var1"] = "1"
req.path_params["table_id"] = "1234"
Change-Id: Id2085c55d3b86985c87c5fd5a8b48568f8e6fa63
Reviewed-on: http://gerrit.cloudera.org:8080/21920
Tested-by: Kudu Jenkins
Reviewed-by: Zoltan Chovan <[email protected]>
Reviewed-by: Marton Greber <[email protected]>
---
src/kudu/server/webserver-test.cc | 96 ++++++++++++++++++++++++++++++++
src/kudu/server/webserver.cc | 102 +++++++++++++++++++++++++++++-----
src/kudu/server/webserver.h | 12 +++-
src/kudu/util/web_callback_registry.h | 3 +
4 files changed, 197 insertions(+), 16 deletions(-)
diff --git a/src/kudu/server/webserver-test.cc
b/src/kudu/server/webserver-test.cc
index 07688ab63..e5ca8aad2 100644
--- a/src/kudu/server/webserver-test.cc
+++ b/src/kudu/server/webserver-test.cc
@@ -28,6 +28,8 @@
#include <iosfwd>
#include <memory>
#include <string>
+#include <unordered_map>
+#include <utility>
#include <vector>
#include <gflags/gflags.h>
@@ -46,6 +48,7 @@
#include "kudu/server/default_path_handlers.h"
#include "kudu/server/webserver_options.h"
#include "kudu/util/curl_util.h"
+#include "kudu/util/easy_json.h"
#include "kudu/util/env.h"
#include "kudu/util/faststring.h"
#include "kudu/util/flag_tags.h"
@@ -55,6 +58,7 @@
#include "kudu/util/status.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
+#include "kudu/util/web_callback_registry.h"
#include "kudu/util/zlib.h"
using std::string;
@@ -658,6 +662,98 @@ TEST_F(WebserverTest, TestStaticFiles) {
ASSERT_EQ("Remote error: HTTP 403", s.ToString());
}
+namespace {
+
+// Handler that echoes back the path parameters and query parameters in
key-value pairs.
+void PathParamHandler(const Webserver::WebRequest& req,
Webserver::WebResponse* resp) {
+ EasyJson* output = &resp->output;
+
+ for (const auto& param : req.path_params) {
+ (*output)["path_params"][param.first] = param.second;
+ }
+
+ for (const auto& param : req.parsed_args) {
+ (*output)["query_params"][param.first] = param.second;
+ }
+}
+
+class PathParamWebserverTest : public WebserverTest {
+ protected:
+ void SetUp() override {
+ WebserverTest::SetUp();
+ server_->RegisterPathHandler("/api/tables/<table_id>/tablets/<tablet_id>",
+ "PathParamWebserverTest",
+ PathParamHandler,
+ StyleMode::UNSTYLED,
+ false);
+ server_->RegisterPathHandler("/api/tables/<table_id>",
+ "PathParamWebserverTest",
+ PathParamHandler,
+ StyleMode::UNSTYLED,
+ false);
+ server_->RegisterPathHandler("/columns/ <column_id>",
+ "PathParamWebserverTest",
+ PathParamHandler,
+ StyleMode::UNSTYLED,
+ false);
+ }
+};
+} // anonymous namespace
+
+TEST_F(PathParamWebserverTest, TestPathParameterAtEnd) {
+ ASSERT_OK(
+
curl_.FetchURL(Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3",
url_), &buf_));
+ ASSERT_STR_CONTAINS(buf_.ToString(),
"\"table_id\":\"45dc8d192549427b8dca871dbbb20bb3\"");
+}
+
+TEST_F(PathParamWebserverTest, TestPathParameterInMiddleAndEnd) {
+ ASSERT_OK(curl_.FetchURL(
+ Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3/tablets/1",
url_), &buf_));
+ ASSERT_STR_CONTAINS(buf_.ToString(),
"\"table_id\":\"45dc8d192549427b8dca871dbbb20bb3\"");
+ ASSERT_STR_CONTAINS(buf_.ToString(), "\"tablet_id\":\"1\"");
+}
+
+TEST_F(PathParamWebserverTest, TestInvalidPathParameter) {
+ Status s = curl_.FetchURL(Substitute("$0/api/tables//tablets/1", url_),
&buf_);
+ ASSERT_EQ("Remote error: HTTP 404", s.ToString());
+}
+
+// Test that the query string is correctly parsed and returned in the response,
+// even when the path contains a path parameter.
+TEST_F(PathParamWebserverTest, TestPathWithQueryString) {
+ ASSERT_OK(curl_.FetchURL(
+ Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3?foo=bar",
url_), &buf_));
+ ASSERT_STR_CONTAINS(buf_.ToString(),
+
"\"path_params\":{\"table_id\":\"45dc8d192549427b8dca871dbbb20bb3\"}");
+ ASSERT_STR_CONTAINS(buf_.ToString(), "\"query_params\":{\"foo\":\"bar\"}");
+}
+
+TEST_F(PathParamWebserverTest, TestInvalidPathWithSpace) {
+ Status s = curl_.FetchURL(
+ Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3 /tablets/1",
url_), &buf_);
+ ASSERT_EQ(
+ "Network error: curl error: URL using bad/illegal format or missing URL:
URL rejected: "
+ "Malformed input to a URL function",
+ s.ToString());
+}
+
+TEST_F(PathParamWebserverTest, TestRegisteredPathWithSpace) {
+ Status s =
curl_.FetchURL(Substitute("$0/columns/45dc8d192549427b8dca871dbbb20bb3", url_),
&buf_);
+ ASSERT_EQ("Remote error: HTTP 404", s.ToString());
+ Status s2 =
+ curl_.FetchURL(Substitute("$0/columns/
45dc8d192549427b8dca871dbbb20bb3", url_), &buf_);
+ ASSERT_EQ(
+ "Network error: curl error: URL using bad/illegal format or missing URL:
URL rejected: "
+ "Malformed input to a URL function",
+ s2.ToString());
+}
+
+TEST_F(PathParamWebserverTest, TestPathParamWithNonAsciiCharacter) {
+ Status s = curl_.FetchURL(
+
Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3ΓΌ20/tablets/1",
url_), &buf_);
+ ASSERT_EQ("Remote error: HTTP 400", s.ToString());
+}
+
class DisabledDocRootWebserverTest : public WebserverTest {
protected:
bool enable_doc_root() const override { return false; }
diff --git a/src/kudu/server/webserver.cc b/src/kudu/server/webserver.cc
index a734259cb..ab5f3f149 100644
--- a/src/kudu/server/webserver.cc
+++ b/src/kudu/server/webserver.cc
@@ -592,40 +592,85 @@ sq_callback_result_t Webserver::BeginRequestCallback(
request_info->remote_user = strdup(authn_princ.c_str());
}
- PathHandler* handler;
+ PathHandler* handler = nullptr;
+ std::unordered_map<std::string, std::string> path_params;
{
+ bool has_non_ascii = ContainsNonAscii(request_info->uri);
+ if (has_non_ascii) {
+ resp.output << "Path contains non-ASCII characters";
+ resp.status_code = HttpStatusCode::BadRequest;
+ SendResponse(connection, &resp);
+ return SQ_HANDLED_OK;
+ }
shared_lock<RWMutex> l(lock_);
PathHandlerMap::const_iterator it = path_handlers_.find(request_info->uri);
+
if (it == path_handlers_.end()) {
- // Let Mongoose deal with this request; returning NULL will fall through
- // to the default handler which will serve files.
- if (!opts_.doc_root.empty() && opts_.enable_doc_root) {
- VLOG(2) << "HTTP File access: " << request_info->uri;
- // TODO(adar): if using SPNEGO, do we need to somehow send the
- // authentication response header here?
- return SQ_CONTINUE_HANDLING;
+ std::vector<std::string> uri_segments = SplitPath(request_info->uri);
+ if (uri_segments.empty()) {
+ resp.output << "Invalid path";
+ resp.status_code = HttpStatusCode::BadRequest;
+ SendResponse(connection, &resp);
+ return SQ_HANDLED_OK;
}
- resp.output << Substitute("No handler for URI $0\r\n\r\n",
request_info->uri);
- resp.status_code = HttpStatusCode::NotFound;
- SendResponse(connection, &resp);
- return SQ_HANDLED_OK;
+ for (const auto& path_handler : path_handlers_) {
+ std::vector<std::string> handler_segments =
SplitPath(path_handler.first);
+
+ if (handler_segments.size() != uri_segments.size()) {
+ continue;
+ }
+ bool match = false;
+ for (size_t i = 0; i < handler_segments.size(); ++i) {
+ if (handler_segments[i][0] == '<' &&
+ handler_segments[i][handler_segments[i].size() - 1] == '>' &&
+ handler_segments[i].size() >= 3) {
+ std::string param_name = handler_segments[i].substr(1,
handler_segments[i].size() - 2);
+ path_params[param_name] = uri_segments[i];
+ match = true;
+ } else if (handler_segments[i] != uri_segments[i]) {
+ match = false;
+ break;
+ }
+ }
+ if (match) {
+ handler = path_handler.second;
+ break;
+ }
+ }
+ if (!handler) {
+ // Let Mongoose deal with this request; returning NULL will fall
through
+ // to the default handler which will serve files.
+ if (!opts_.doc_root.empty() && opts_.enable_doc_root) {
+ VLOG(2) << "HTTP File access: " << request_info->uri;
+ // TODO(adar): if using SPNEGO, do we need to somehow send the
+ // authentication response header here?
+ return SQ_CONTINUE_HANDLING;
+ }
+ resp.output << Substitute("No handler for URI $0\r\n\r\n",
request_info->uri);
+ resp.status_code = HttpStatusCode::NotFound;
+ SendResponse(connection, &resp);
+ return SQ_HANDLED_OK;
+ }
+ } else {
+ handler = it->second;
}
- handler = it->second;
}
- return RunPathHandler(*handler, connection, request_info, &resp);
+ return RunPathHandler(*handler, connection, request_info, &resp,
path_params);
}
sq_callback_result_t Webserver::RunPathHandler(
const PathHandler& handler,
struct sq_connection* connection,
struct sq_request_info* request_info,
- PrerenderedWebResponse* resp) {
+ PrerenderedWebResponse* resp,
+ const std::unordered_map<std::string, std::string>& path_params) {
WebRequest req;
if (request_info->query_string != nullptr) {
req.query_string = request_info->query_string;
BuildArgumentMap(request_info->query_string, &req.parsed_args);
}
+ req.path_params = path_params;
for (int i = 0; i < request_info->num_headers; i++) {
const auto& h = request_info->http_headers[i];
string key = h.name;
@@ -688,6 +733,33 @@ sq_callback_result_t Webserver::RunPathHandler(
return SQ_HANDLED_OK;
}
+std::vector<std::string> Webserver::SplitPath(const std::string& path) {
+ std::vector<std::string> segments;
+ // Reserve space based on '/' count
+ segments.reserve(std::count(path.begin(), path.end(), '/') + 1);
+
+ size_t start = 0;
+ size_t end;
+ while ((end = path.find('/', start)) != std::string::npos) {
+ if (end != start) { // Avoid empty segments
+ segments.emplace_back(path.substr(start, end - start));
+ }
+ start = end + 1;
+ }
+ // Add the last segment if it exists
+ if (start < path.size()) {
+ segments.emplace_back(path.substr(start));
+ }
+
+ return segments;
+}
+
+bool Webserver::ContainsNonAscii(const std::string& path) {
+ return std::any_of(path.begin(), path.end(), [](char c) {
+ return c < 0 || c > 127;
+ });
+}
+
void Webserver::SendResponse(struct sq_connection* connection,
PrerenderedWebResponse* resp,
const WebRequest* req,
diff --git a/src/kudu/server/webserver.h b/src/kudu/server/webserver.h
index 3de8a8784..ec86af287 100644
--- a/src/kudu/server/webserver.h
+++ b/src/kudu/server/webserver.h
@@ -21,6 +21,7 @@
#include <iosfwd>
#include <map>
#include <string>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -177,7 +178,16 @@ class Webserver : public WebCallbackRegistry {
const PathHandler& handler,
struct sq_connection* connection,
struct sq_request_info* request_info,
- PrerenderedWebResponse* resp);
+ PrerenderedWebResponse* resp,
+ const std::unordered_map<std::string, std::string>& params);
+
+ // Splits a path into its components, e.g. "/foo/bar" -> ["foo", "bar"]
+ // Only ASCII characters are supported.
+ // If a non-ASCII character is provided, an empty vector is returned.
+ static std::vector<std::string> SplitPath(const std::string& path);
+
+ // Checks whether the given path has non-ASCII characters.
+ static bool ContainsNonAscii(const std::string& path);
// Callback to funnel mongoose logs through glog.
static int LogMessageCallbackStatic(const struct sq_connection* connection,
diff --git a/src/kudu/util/web_callback_registry.h
b/src/kudu/util/web_callback_registry.h
index 2c405de98..1d46bd8df 100644
--- a/src/kudu/util/web_callback_registry.h
+++ b/src/kudu/util/web_callback_registry.h
@@ -78,6 +78,9 @@ class WebCallbackRegistry {
// In the case of a POST, the posted data.
std::string post_data;
+
+ // Parameters extracted from the URL path.
+ ArgumentMap path_params;
};
// A response to an HTTP request whose body is rendered by template.