Repository: kudu
Updated Branches:
  refs/heads/master 185781f6c -> 8f525829d


KUDU-2035 Enable HTTP compression for all webserver's paths

This patch enables HTTP compression when gzip compression is
accepted by the client.

Change-Id: I7c6db24b0fd2cbcca8a554460d310bd39ee5c071
Reviewed-on: http://gerrit.cloudera.org:8080/10332
Reviewed-by: Alexey Serbin <aser...@cloudera.com>
Tested-by: Will Berkeley <wdberke...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/8f525829
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/8f525829
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/8f525829

Branch: refs/heads/master
Commit: 8f525829dcfc49edea9fadbe7514db2ea64f2087
Parents: 185781f
Author: fwang29 <fw...@cloudera.com>
Authored: Thu May 17 13:33:50 2018 -0700
Committer: Will Berkeley <wdberke...@gmail.com>
Committed: Fri May 18 21:23:08 2018 +0000

----------------------------------------------------------------------
 .../integration-tests/linked_list-test-util.h   |  6 +-
 src/kudu/server/webserver-test.cc               | 62 +++++++++++++++++++-
 src/kudu/server/webserver.cc                    | 22 +++++++
 src/kudu/util/curl_util.cc                      | 21 ++++++-
 src/kudu/util/curl_util.h                       |  9 ++-
 5 files changed, 113 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/8f525829/src/kudu/integration-tests/linked_list-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/linked_list-test-util.h 
b/src/kudu/integration-tests/linked_list-test-util.h
index ba04ea6..252d640 100644
--- a/src/kudu/integration-tests/linked_list-test-util.h
+++ b/src/kudu/integration-tests/linked_list-test-util.h
@@ -374,8 +374,12 @@ class PeriodicWebUIChecker {
     while (is_running_.Load()) {
       // Poll all of the URLs.
       const MonoTime start = MonoTime::Now();
+      bool compression_enabled = true;
       for (const auto& url : urls_) {
-        Status s = curl.FetchURL(url, &dst);
+        // Switch compression back and forth.
+        Status s = compression_enabled ? curl.FetchURL(url, &dst, 
{"Accept-Encoding: gzip"})
+                                       : curl.FetchURL(url, &dst);
+        compression_enabled = !compression_enabled;
         if (s.ok()) {
           CHECK_GT(dst.length(), 0);
         }

http://git-wip-us.apache.org/repos/asf/kudu/blob/8f525829/src/kudu/server/webserver-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/server/webserver-test.cc 
b/src/kudu/server/webserver-test.cc
index 7c53a4b..aa4da52 100644
--- a/src/kudu/server/webserver-test.cc
+++ b/src/kudu/server/webserver-test.cc
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "kudu/server/webserver.h"
+
+#include <iosfwd>
 #include <memory>
 #include <string>
 #include <vector>
@@ -32,7 +35,6 @@
 #include "kudu/security/test/test_certs.h"
 #include "kudu/security/test/test_pass.h"
 #include "kudu/server/default_path_handlers.h"
-#include "kudu/server/webserver.h"
 #include "kudu/server/webserver_options.h"
 #include "kudu/util/curl_util.h"
 #include "kudu/util/env.h"
@@ -44,6 +46,7 @@
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
+#include "kudu/util/zlib.h"
 
 using std::string;
 using std::vector;
@@ -149,6 +152,63 @@ TEST_F(WebserverTest, TestIndexPage) {
   ASSERT_STR_CONTAINS(buf_.ToString(), "memz");
 }
 
+TEST_F(WebserverTest, TestHttpCompression) {
+  string url = strings::Substitute("http://$0/";, addr_.ToString());
+  std::ostringstream oss;
+  string decoded_str;
+
+  // Curl with gzip compression enabled.
+  ASSERT_OK(curl_.FetchURL(url, &buf_, {"Accept-Encoding: deflate, br, 
gzip"}));
+
+  // If compressed successfully, we should be able to uncompress.
+  ASSERT_OK(zlib::Uncompress(Slice(buf_.ToString()), &oss));
+  decoded_str = oss.str();
+
+  // Should have expected title.
+  ASSERT_STR_CONTAINS(decoded_str, "Kudu");
+
+  // Should have link to default path handlers (e.g memz)
+  ASSERT_STR_CONTAINS(decoded_str, "memz");
+
+  // Should have expected header when compressed with headers returned.
+  curl_.set_return_headers(true);
+  ASSERT_OK(curl_.FetchURL(url, &buf_,
+                          {"Accept-Encoding: deflate, megaturbogzip,  gzip , 
br"}));
+  ASSERT_STR_CONTAINS(buf_.ToString(), "Content-Encoding: gzip");
+
+
+  // Curl with compression disabled.
+  curl_.set_return_headers(true);
+  ASSERT_OK(curl_.FetchURL(url, &buf_));
+  // Check expected header.
+  ASSERT_STR_CONTAINS(buf_.ToString(), "Content-Type:");
+
+  // Check unexpected header.
+  ASSERT_STR_NOT_CONTAINS(buf_.ToString(), "Content-Encoding: gzip");
+
+  // Should have expected title.
+  ASSERT_STR_CONTAINS(buf_.ToString(), "Kudu");
+
+  // Should have link to default path handlers (e.g memz)
+  ASSERT_STR_CONTAINS(buf_.ToString(), "memz");
+
+
+  // Curl with compression enabled but not accepted by Kudu.
+  curl_.set_return_headers(true);
+  ASSERT_OK(curl_.FetchURL(url, &buf_, {"Accept-Encoding: megaturbogzip, 
deflate, xz"}));
+  // Check expected header.
+  ASSERT_STR_CONTAINS(buf_.ToString(), "HTTP/1.1 200 OK");
+
+  // Check unexpected header.
+  ASSERT_STR_NOT_CONTAINS(buf_.ToString(), "Content-Encoding: gzip");
+
+  // Should have expected title.
+  ASSERT_STR_CONTAINS(buf_.ToString(), "Kudu");
+
+  // Should have link to default path handlers (e.g memz)
+  ASSERT_STR_CONTAINS(buf_.ToString(), "memz");
+}
+
 TEST_F(SslWebserverTest, TestSSL) {
   // We use a self-signed cert, so we need to disable cert verification in 
curl.
   curl_.set_verify_peer(false);

http://git-wip-us.apache.org/repos/asf/kudu/blob/8f525829/src/kudu/server/webserver.cc
----------------------------------------------------------------------
diff --git a/src/kudu/server/webserver.cc b/src/kudu/server/webserver.cc
index 3016650..516cd89 100644
--- a/src/kudu/server/webserver.cc
+++ b/src/kudu/server/webserver.cc
@@ -45,6 +45,7 @@
 #include "kudu/gutil/strings/numbers.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/stringpiece.h"
+#include "kudu/gutil/strings/strip.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/security/openssl_util.h"
 #include "kudu/util/easy_json.h"
@@ -56,6 +57,7 @@
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/url-coding.h"
 #include "kudu/util/version_info.h"
+#include "kudu/util/zlib.h"
 
 struct sockaddr_in;
 
@@ -477,10 +479,30 @@ int Webserver::RunPathHandler(const PathHandler& handler,
     full_content = content.str();
   }
 
+  // Check if the gzip compression is accepted by the caller. If so, compress 
the content.
+  const char* accept_encoding_str = sq_get_header(connection, 
"Accept-Encoding");
+  bool is_compressed = false;
+  vector<string> encodings = strings::Split(accept_encoding_str, ",");
+  for (string& encoding : encodings) {
+    StripWhiteSpace(&encoding);
+    if (encoding == "gzip") {
+      ostringstream oss;
+      Status s = zlib::Compress(Slice(full_content), &oss);
+      if (s.ok()) {
+        full_content = oss.str();
+        is_compressed = true;
+      } else {
+        LOG(WARNING) << "Could not compress output: " << s.ToString();
+      }
+      break;
+    }
+  }
+
   ostringstream headers_stream;
   headers_stream << Substitute("HTTP/1.1 $0\r\n", 
HttpStatusCodeToString(resp.status_code));
   headers_stream << Substitute("Content-Type: $0\r\n", use_style ? "text/html" 
: "text/plain");
   headers_stream << Substitute("Content-Length: $0\r\n", 
full_content.length());
+  if (is_compressed) headers_stream << "Content-Encoding: gzip\r\n";
   headers_stream << Substitute("X-Frame-Options: $0\r\n", 
FLAGS_webserver_x_frame_options);
   std::unordered_set<string> invalid_headers{"Content-Type", "Content-Length", 
"X-Frame-Options"};
   for (const auto& entry : resp.response_headers) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/8f525829/src/kudu/util/curl_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/curl_util.cc b/src/kudu/util/curl_util.cc
index d2aa75b..4eddb64 100644
--- a/src/kudu/util/curl_util.cc
+++ b/src/kudu/util/curl_util.cc
@@ -27,6 +27,7 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/security/openssl_util.h"
 #include "kudu/util/faststring.h"
+#include "kudu/util/scoped_cleanup.h"
 
 namespace kudu {
 
@@ -63,8 +64,9 @@ EasyCurl::~EasyCurl() {
   curl_easy_cleanup(curl_);
 }
 
-Status EasyCurl::FetchURL(const std::string& url, faststring* buf) {
-  return DoRequest(url, nullptr, buf);
+Status EasyCurl::FetchURL(const std::string& url, faststring* dst,
+                          const std::vector<std::string>& headers) {
+  return DoRequest(url, nullptr, dst, headers);
 }
 
 Status EasyCurl::PostToURL(const std::string& url,
@@ -75,7 +77,8 @@ Status EasyCurl::PostToURL(const std::string& url,
 
 Status EasyCurl::DoRequest(const std::string& url,
                            const std::string* post_data,
-                           faststring* dst) {
+                           faststring* dst,
+                           const std::vector<std::string>& headers) {
   CHECK_NOTNULL(dst)->clear();
 
   if (!verify_peer_) {
@@ -84,6 +87,18 @@ Status EasyCurl::DoRequest(const std::string& url,
     RETURN_NOT_OK(TranslateError(curl_easy_setopt(
         curl_, CURLOPT_SSL_VERIFYPEER, 0)));
   }
+
+  // Add headers if specified.
+  struct curl_slist* curl_headers = nullptr;
+  auto clean_up_curl_slist = MakeScopedCleanup([&]() {
+    curl_slist_free_all(curl_headers);
+  });
+
+  for (const auto& header : headers) {
+    curl_headers = CHECK_NOTNULL(curl_slist_append(curl_headers, 
header.c_str()));
+  }
+  RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, 
curl_headers)));
+
   RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_URL, 
url.c_str())));
   if (return_headers_) {
     RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_HEADER, 1)));

http://git-wip-us.apache.org/repos/asf/kudu/blob/8f525829/src/kudu/util/curl_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/curl_util.h b/src/kudu/util/curl_util.h
index 49ba2d4..cccd2db 100644
--- a/src/kudu/util/curl_util.h
+++ b/src/kudu/util/curl_util.h
@@ -18,6 +18,7 @@
 #define KUDU_UTIL_CURL_UTIL_H
 
 #include <string>
+#include <vector>
 
 #include "kudu/gutil/macros.h"
 #include "kudu/util/monotime.h"
@@ -40,8 +41,11 @@ class EasyCurl {
 
   // Fetch the given URL into the provided buffer.
   // Any existing data in the buffer is replaced.
+  // The optional param 'headers' holds additional headers.
+  // e.g. {"Accept-Encoding: gzip"}
   Status FetchURL(const std::string& url,
-                  faststring* dst);
+                  faststring* dst,
+                  const std::vector<std::string>& headers = {});
 
   // Issue an HTTP POST to the given URL with the given data.
   // Returns results in 'dst' as above.
@@ -68,7 +72,8 @@ class EasyCurl {
   // Otherwise, does a GET.
   Status DoRequest(const std::string& url,
                    const std::string* post_data,
-                   faststring* dst);
+                   faststring* dst,
+                   const std::vector<std::string>& headers = {});
   CURL* curl_;
 
   // Whether to verify the server certificate.

Reply via email to