This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0-beta in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0-beta by this push: new ee97899deb [bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656) ee97899deb is described below commit ee97899deb2dfe681f630c76df65dab76319df2b Author: Kang <kxiao.ti...@gmail.com> AuthorDate: Sun Jun 11 14:00:04 2023 +0800 [bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656) --- be/src/util/s3_uri.cpp | 32 +++++++++++++++++++++++++------- be/src/util/s3_uri.h | 3 +++ be/test/util/s3_uri_test.cpp | 29 +++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp index 08041fece8..c2e4a72098 100644 --- a/be/src/util/s3_uri.cpp +++ b/be/src/util/s3_uri.cpp @@ -25,6 +25,9 @@ namespace doris { +const std::string S3URI::_SCHEME_S3 = "s3"; +const std::string S3URI::_SCHEME_HTTP = "http"; +const std::string S3URI::_SCHEME_HTTPS = "https"; const std::string S3URI::_SCHEME_DELIM = "://"; const std::string S3URI::_PATH_DELIM = "/"; const std::string S3URI::_QUERY_DELIM = "?"; @@ -42,15 +45,30 @@ Status S3URI::parse() { std::vector<std::string> scheme_split = strings::Split(_location, _SCHEME_DELIM); std::string rest; if (scheme_split.size() == 2) { - // has scheme, eg: s3://bucket1/path/to/file.txt - rest = scheme_split[1]; - std::vector<std::string> authority_split = - strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1)); - if (authority_split.size() != 2) { + if (scheme_split[0] == _SCHEME_S3) { + // has scheme, eg: s3://bucket1/path/to/file.txt + rest = scheme_split[1]; + std::vector<std::string> authority_split = + strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1)); + if (authority_split.size() != 2) { + return Status::InvalidArgument("Invalid S3 URI: {}", _location); + } + _bucket = authority_split[0]; + _key = authority_split[1]; + } else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) { + // has scheme, eg: http(s)://host/bucket1/path/to/file.txt + rest = scheme_split[1]; + std::vector<std::string> authority_split = + strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 2)); + if (authority_split.size() != 3) { + return Status::InvalidArgument("Invalid S3 HTTP URI: {}", _location); + } + // authority_split[1] is host + _bucket = authority_split[1]; + _key = authority_split[2]; + } else { return Status::InvalidArgument("Invalid S3 URI: {}", _location); } - _bucket = authority_split[0]; - _key = authority_split[1]; } else if (scheme_split.size() == 1) { // no scheme, eg: path/to/file.txt _bucket = ""; // unknown diff --git a/be/src/util/s3_uri.h b/be/src/util/s3_uri.h index 76ffea5824..769053450b 100644 --- a/be/src/util/s3_uri.h +++ b/be/src/util/s3_uri.h @@ -41,6 +41,9 @@ public: std::string to_string() const; private: + static const std::string _SCHEME_S3; + static const std::string _SCHEME_HTTP; + static const std::string _SCHEME_HTTPS; static const std::string _SCHEME_DELIM; static const std::string _PATH_DELIM; static const std::string _QUERY_DELIM; diff --git a/be/test/util/s3_uri_test.cpp b/be/test/util/s3_uri_test.cpp index ab5192a8a0..93aa2c72eb 100644 --- a/be/test/util/s3_uri_test.cpp +++ b/be/test/util/s3_uri_test.cpp @@ -56,13 +56,42 @@ TEST_F(S3URITest, EncodedString) { EXPECT_EQ("path%20to%20file", uri1.get_key()); } +TEST_F(S3URITest, HttpURI) { + std::string p1 = "http://a.b.com/bucket/path/to/file"; + S3URI uri1(p1); + EXPECT_TRUE(uri1.parse()); + EXPECT_EQ("bucket", uri1.get_bucket()); + EXPECT_EQ("path/to/file", uri1.get_key()); + + std::string p2 = "https://a.b.com/bucket/path/to/file"; + S3URI uri2(p2); + EXPECT_TRUE(uri2.parse()); + EXPECT_EQ("bucket", uri2.get_bucket()); + EXPECT_EQ("path/to/file", uri2.get_key()); +} + +TEST_F(S3URITest, InvalidSchema) { + std::string p1 = "xxx://a.b.com/bucket/path/to/file"; + S3URI uri1(p1); + EXPECT_FALSE(uri1.parse()); +} + TEST_F(S3URITest, MissingKey) { std::string p1 = "https://bucket/"; S3URI uri1(p1); EXPECT_FALSE(uri1.parse()); + std::string p2 = "s3://bucket/"; S3URI uri2(p2); EXPECT_FALSE(uri2.parse()); + + std::string p3 = "http://a.b.com/bucket/"; + S3URI uri3(p3); + EXPECT_FALSE(uri3.parse()); + + std::string p4 = "http://a.b.com/"; + S3URI uri4(p4); + EXPECT_FALSE(uri4.parse()); } TEST_F(S3URITest, RelativePathing) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org