This is an automated email from the ASF dual-hosted git repository.

mgreber pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 87e53ae41 Implement path parameter handling in webserver
87e53ae41 is described below

commit 87e53ae41fa0e68cb5f9d4e65385dc513f864774
Author: gabriellalotz <[email protected]>
AuthorDate: Thu Oct 10 13:38:55 2024 +0200

    Implement path parameter handling in webserver
    
    Introduced path parameter parsing in the webserver to support dynamic
    REST API endpoints. Parameters within URIs are now identified using
    <parameter_name> notation. Extracted parameters are passed to the
    WebRequest as path_params.
    Example usage:
    Registering a path like "/api/v1/tables/<table_id>" will map the
    corresponding part of the URI to the table_id key in the path_params
    of the WebRequest.
    
    Query parameter handling won't change with path parameters.
    Example for query and path parameters:
    Given path "api/v1/tables/<table_id>", we can reach URIs like
    "api/v1/tables/1234?var1=1". We can find these in the following way,
    assuming we have a WebRequest struct.
    req.parsed_args["var1"] = "1"
    req.path_params["table_id"] = "1234"
    
    Change-Id: Id2085c55d3b86985c87c5fd5a8b48568f8e6fa63
    Reviewed-on: http://gerrit.cloudera.org:8080/21920
    Tested-by: Kudu Jenkins
    Reviewed-by: Zoltan Chovan <[email protected]>
    Reviewed-by: Marton Greber <[email protected]>
---
 src/kudu/server/webserver-test.cc     |  96 ++++++++++++++++++++++++++++++++
 src/kudu/server/webserver.cc          | 102 +++++++++++++++++++++++++++++-----
 src/kudu/server/webserver.h           |  12 +++-
 src/kudu/util/web_callback_registry.h |   3 +
 4 files changed, 197 insertions(+), 16 deletions(-)

diff --git a/src/kudu/server/webserver-test.cc 
b/src/kudu/server/webserver-test.cc
index 07688ab63..e5ca8aad2 100644
--- a/src/kudu/server/webserver-test.cc
+++ b/src/kudu/server/webserver-test.cc
@@ -28,6 +28,8 @@
 #include <iosfwd>
 #include <memory>
 #include <string>
+#include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include <gflags/gflags.h>
@@ -46,6 +48,7 @@
 #include "kudu/server/default_path_handlers.h"
 #include "kudu/server/webserver_options.h"
 #include "kudu/util/curl_util.h"
+#include "kudu/util/easy_json.h"
 #include "kudu/util/env.h"
 #include "kudu/util/faststring.h"
 #include "kudu/util/flag_tags.h"
@@ -55,6 +58,7 @@
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
+#include "kudu/util/web_callback_registry.h"
 #include "kudu/util/zlib.h"
 
 using std::string;
@@ -658,6 +662,98 @@ TEST_F(WebserverTest, TestStaticFiles) {
   ASSERT_EQ("Remote error: HTTP 403", s.ToString());
 }
 
+namespace {
+
+// Handler that echoes back the path parameters and query parameters in 
key-value pairs.
+void PathParamHandler(const Webserver::WebRequest& req, 
Webserver::WebResponse* resp) {
+  EasyJson* output = &resp->output;
+
+  for (const auto& param : req.path_params) {
+    (*output)["path_params"][param.first] = param.second;
+  }
+
+  for (const auto& param : req.parsed_args) {
+    (*output)["query_params"][param.first] = param.second;
+  }
+}
+
+class PathParamWebserverTest : public WebserverTest {
+ protected:
+  void SetUp() override {
+    WebserverTest::SetUp();
+    server_->RegisterPathHandler("/api/tables/<table_id>/tablets/<tablet_id>",
+                                 "PathParamWebserverTest",
+                                 PathParamHandler,
+                                 StyleMode::UNSTYLED,
+                                 false);
+    server_->RegisterPathHandler("/api/tables/<table_id>",
+                                 "PathParamWebserverTest",
+                                 PathParamHandler,
+                                 StyleMode::UNSTYLED,
+                                 false);
+    server_->RegisterPathHandler("/columns/ <column_id>",
+                                 "PathParamWebserverTest",
+                                 PathParamHandler,
+                                 StyleMode::UNSTYLED,
+                                 false);
+  }
+};
+}  // anonymous namespace
+
+TEST_F(PathParamWebserverTest, TestPathParameterAtEnd) {
+  ASSERT_OK(
+      
curl_.FetchURL(Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3", 
url_), &buf_));
+  ASSERT_STR_CONTAINS(buf_.ToString(), 
"\"table_id\":\"45dc8d192549427b8dca871dbbb20bb3\"");
+}
+
+TEST_F(PathParamWebserverTest, TestPathParameterInMiddleAndEnd) {
+  ASSERT_OK(curl_.FetchURL(
+      Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3/tablets/1", 
url_), &buf_));
+  ASSERT_STR_CONTAINS(buf_.ToString(), 
"\"table_id\":\"45dc8d192549427b8dca871dbbb20bb3\"");
+  ASSERT_STR_CONTAINS(buf_.ToString(), "\"tablet_id\":\"1\"");
+}
+
+TEST_F(PathParamWebserverTest, TestInvalidPathParameter) {
+  Status s = curl_.FetchURL(Substitute("$0/api/tables//tablets/1", url_), 
&buf_);
+  ASSERT_EQ("Remote error: HTTP 404", s.ToString());
+}
+
+// Test that the query string is correctly parsed and returned in the response,
+// even when the path contains a path parameter.
+TEST_F(PathParamWebserverTest, TestPathWithQueryString) {
+  ASSERT_OK(curl_.FetchURL(
+      Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3?foo=bar", 
url_), &buf_));
+  ASSERT_STR_CONTAINS(buf_.ToString(),
+                      
"\"path_params\":{\"table_id\":\"45dc8d192549427b8dca871dbbb20bb3\"}");
+  ASSERT_STR_CONTAINS(buf_.ToString(), "\"query_params\":{\"foo\":\"bar\"}");
+}
+
+TEST_F(PathParamWebserverTest, TestInvalidPathWithSpace) {
+  Status s = curl_.FetchURL(
+      Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3 /tablets/1", 
url_), &buf_);
+  ASSERT_EQ(
+      "Network error: curl error: URL using bad/illegal format or missing URL: 
URL rejected: "
+      "Malformed input to a URL function",
+      s.ToString());
+}
+
+TEST_F(PathParamWebserverTest, TestRegisteredPathWithSpace) {
+  Status s = 
curl_.FetchURL(Substitute("$0/columns/45dc8d192549427b8dca871dbbb20bb3", url_), 
&buf_);
+  ASSERT_EQ("Remote error: HTTP 404", s.ToString());
+  Status s2 =
+      curl_.FetchURL(Substitute("$0/columns/ 
45dc8d192549427b8dca871dbbb20bb3", url_), &buf_);
+  ASSERT_EQ(
+      "Network error: curl error: URL using bad/illegal format or missing URL: 
URL rejected: "
+      "Malformed input to a URL function",
+      s2.ToString());
+}
+
+TEST_F(PathParamWebserverTest, TestPathParamWithNonAsciiCharacter) {
+  Status s = curl_.FetchURL(
+      
Substitute("$0/api/tables/45dc8d192549427b8dca871dbbb20bb3ΓΌ20/tablets/1", 
url_), &buf_);
+  ASSERT_EQ("Remote error: HTTP 400", s.ToString());
+}
+
 class DisabledDocRootWebserverTest : public WebserverTest {
  protected:
   bool enable_doc_root() const override { return false; }
diff --git a/src/kudu/server/webserver.cc b/src/kudu/server/webserver.cc
index a734259cb..ab5f3f149 100644
--- a/src/kudu/server/webserver.cc
+++ b/src/kudu/server/webserver.cc
@@ -592,40 +592,85 @@ sq_callback_result_t Webserver::BeginRequestCallback(
     request_info->remote_user = strdup(authn_princ.c_str());
   }
 
-  PathHandler* handler;
+  PathHandler* handler = nullptr;
+  std::unordered_map<std::string, std::string> path_params;
   {
+    bool has_non_ascii = ContainsNonAscii(request_info->uri);
+    if (has_non_ascii) {
+      resp.output << "Path contains non-ASCII characters";
+      resp.status_code = HttpStatusCode::BadRequest;
+      SendResponse(connection, &resp);
+      return SQ_HANDLED_OK;
+    }
     shared_lock<RWMutex> l(lock_);
     PathHandlerMap::const_iterator it = path_handlers_.find(request_info->uri);
+
     if (it == path_handlers_.end()) {
-      // Let Mongoose deal with this request; returning NULL will fall through
-      // to the default handler which will serve files.
-      if (!opts_.doc_root.empty() && opts_.enable_doc_root) {
-        VLOG(2) << "HTTP File access: " << request_info->uri;
-        // TODO(adar): if using SPNEGO, do we need to somehow send the
-        // authentication response header here?
-        return SQ_CONTINUE_HANDLING;
+      std::vector<std::string> uri_segments = SplitPath(request_info->uri);
+      if (uri_segments.empty()) {
+        resp.output << "Invalid path";
+        resp.status_code = HttpStatusCode::BadRequest;
+        SendResponse(connection, &resp);
+        return SQ_HANDLED_OK;
       }
-      resp.output << Substitute("No handler for URI $0\r\n\r\n", 
request_info->uri);
-      resp.status_code = HttpStatusCode::NotFound;
-      SendResponse(connection, &resp);
-      return SQ_HANDLED_OK;
+      for (const auto& path_handler : path_handlers_) {
+        std::vector<std::string> handler_segments = 
SplitPath(path_handler.first);
+
+        if (handler_segments.size() != uri_segments.size()) {
+          continue;
+        }
+        bool match = false;
+        for (size_t i = 0; i < handler_segments.size(); ++i) {
+          if (handler_segments[i][0] == '<' &&
+              handler_segments[i][handler_segments[i].size() - 1] == '>' &&
+              handler_segments[i].size() >= 3) {
+            std::string param_name = handler_segments[i].substr(1, 
handler_segments[i].size() - 2);
+            path_params[param_name] = uri_segments[i];
+            match = true;
+          } else if (handler_segments[i] != uri_segments[i]) {
+            match = false;
+            break;
+          }
+        }
+        if (match) {
+          handler = path_handler.second;
+          break;
+        }
+      }
+      if (!handler) {
+        // Let Mongoose deal with this request; returning NULL will fall 
through
+        // to the default handler which will serve files.
+        if (!opts_.doc_root.empty() && opts_.enable_doc_root) {
+          VLOG(2) << "HTTP File access: " << request_info->uri;
+          // TODO(adar): if using SPNEGO, do we need to somehow send the
+          // authentication response header here?
+          return SQ_CONTINUE_HANDLING;
+        }
+        resp.output << Substitute("No handler for URI $0\r\n\r\n", 
request_info->uri);
+        resp.status_code = HttpStatusCode::NotFound;
+        SendResponse(connection, &resp);
+        return SQ_HANDLED_OK;
+      }
+    } else {
+      handler = it->second;
     }
-    handler = it->second;
   }
 
-  return RunPathHandler(*handler, connection, request_info, &resp);
+  return RunPathHandler(*handler, connection, request_info, &resp, 
path_params);
 }
 
 sq_callback_result_t Webserver::RunPathHandler(
     const PathHandler& handler,
     struct sq_connection* connection,
     struct sq_request_info* request_info,
-    PrerenderedWebResponse* resp) {
+    PrerenderedWebResponse* resp,
+    const std::unordered_map<std::string, std::string>& path_params) {
   WebRequest req;
   if (request_info->query_string != nullptr) {
     req.query_string = request_info->query_string;
     BuildArgumentMap(request_info->query_string, &req.parsed_args);
   }
+  req.path_params = path_params;
   for (int i = 0; i < request_info->num_headers; i++) {
     const auto& h = request_info->http_headers[i];
     string key = h.name;
@@ -688,6 +733,33 @@ sq_callback_result_t Webserver::RunPathHandler(
   return SQ_HANDLED_OK;
 }
 
+std::vector<std::string> Webserver::SplitPath(const std::string& path) {
+  std::vector<std::string> segments;
+  // Reserve space based on '/' count
+  segments.reserve(std::count(path.begin(), path.end(), '/') + 1);
+
+  size_t start = 0;
+  size_t end;
+  while ((end = path.find('/', start)) != std::string::npos) {
+    if (end != start) {  // Avoid empty segments
+      segments.emplace_back(path.substr(start, end - start));
+    }
+    start = end + 1;
+  }
+  // Add the last segment if it exists
+  if (start < path.size()) {
+    segments.emplace_back(path.substr(start));
+  }
+
+  return segments;
+}
+
+bool Webserver::ContainsNonAscii(const std::string& path) {
+  return std::any_of(path.begin(), path.end(), [](char c) {
+    return c < 0 || c > 127;
+  });
+}
+
 void Webserver::SendResponse(struct sq_connection* connection,
                              PrerenderedWebResponse* resp,
                              const WebRequest* req,
diff --git a/src/kudu/server/webserver.h b/src/kudu/server/webserver.h
index 3de8a8784..ec86af287 100644
--- a/src/kudu/server/webserver.h
+++ b/src/kudu/server/webserver.h
@@ -21,6 +21,7 @@
 #include <iosfwd>
 #include <map>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -177,7 +178,16 @@ class Webserver : public WebCallbackRegistry {
       const PathHandler& handler,
       struct sq_connection* connection,
       struct sq_request_info* request_info,
-      PrerenderedWebResponse* resp);
+      PrerenderedWebResponse* resp,
+      const std::unordered_map<std::string, std::string>& params);
+
+  // Splits a path into its components, e.g. "/foo/bar" -> ["foo", "bar"]
+  // Only ASCII characters are supported.
+  // If a non-ASCII character is provided, an empty vector is returned.
+  static std::vector<std::string> SplitPath(const std::string& path);
+
+  // Checks whether the given path has non-ASCII characters.
+  static bool ContainsNonAscii(const std::string& path);
 
   // Callback to funnel mongoose logs through glog.
   static int LogMessageCallbackStatic(const struct sq_connection* connection,
diff --git a/src/kudu/util/web_callback_registry.h 
b/src/kudu/util/web_callback_registry.h
index 2c405de98..1d46bd8df 100644
--- a/src/kudu/util/web_callback_registry.h
+++ b/src/kudu/util/web_callback_registry.h
@@ -78,6 +78,9 @@ class WebCallbackRegistry {
 
     // In the case of a POST, the posted data.
     std::string post_data;
+
+    // Parameters extracted from the URL path.
+    ArgumentMap path_params;
   };
 
   // A response to an HTTP request whose body is rendered by template.

Reply via email to