This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ca23513 ARROW-4697: [C++] Add URI parsing facility
ca23513 is described below
commit ca2351363ba1724de17eda3dd8ef334d7231f4f8
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Mar 20 09:23:27 2019 +0100
ARROW-4697: [C++] Add URI parsing facility
Using the [uriparser](https://uriparser.github.io/) C library.
Author: Antoine Pitrou <[email protected]>
Author: Uwe L. Korn <[email protected]>
Closes #3779 from pitrou/ARROW-4697-cpp-uri-parsing and squashes the
following commits:
5ad5d1b6 <Uwe L. Korn> Set CMAKE_INSTALL_LIBDIR when building uriparser
ff41cef7 <Antoine Pitrou> Fix AppVeyor failure?
6348b896 <Antoine Pitrou> - Work around a CMake issue - Add docstrings -
Add more path tests
3f5c5899 <Antoine Pitrou> Explicitly bundle uriparser if conda was set as
default source
a1714902 <Antoine Pitrou> ARROW-4697: Add URI parsing facility
---
ci/appveyor-cpp-build.bat | 2 +-
cpp/CMakeLists.txt | 3 +
cpp/cmake_modules/ThirdpartyToolchain.cmake | 75 ++++++++++++
cpp/src/arrow/CMakeLists.txt | 1 +
cpp/src/arrow/symbols.map | 2 +
cpp/src/arrow/util/CMakeLists.txt | 1 +
cpp/src/arrow/util/uri-test.cc | 182 ++++++++++++++++++++++++++++
cpp/src/arrow/util/uri.cc | 147 ++++++++++++++++++++++
cpp/src/arrow/util/uri.h | 67 ++++++++++
cpp/thirdparty/versions.txt | 3 +
10 files changed, 482 insertions(+), 1 deletion(-)
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index b735d0f..0320db9 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -96,7 +96,7 @@ if "%JOB%" == "Build_Debug" (
exit /B 0
)
-set CONDA_PACKAGES=--file=ci\conda_env_python.yml python=%PYTHON% numpy=1.14
thrift-cpp=0.11 boost-cpp
+set CONDA_PACKAGES=--file=ci\conda_env_python.yml python=%PYTHON% numpy=1.14
boost-cpp
if "%ARROW_BUILD_GANDIVA%" == "ON" (
@rem Install llvmdev in the toolchain if building gandiva.dll
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3886743..d80fe70 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -757,6 +757,9 @@ set(ARROW_LINK_LIBS ${double-conversion_LIBRARIES})
set(ARROW_STATIC_LINK_LIBS ${double-conversion_LIBRARIES})
set(ARROW_STATIC_INSTALL_INTERFACE_LIBS ${double-conversion_LIBRARIES})
+list(APPEND ARROW_STATIC_LINK_LIBS uriparser::uriparser)
+list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS uriparser::uriparser)
+
if(ARROW_WITH_BROTLI)
# Order is important for static linking
list(APPEND ARROW_LINK_LIBS Brotli::brotlienc Brotli::brotlidec
Brotli::brotlicommon)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 29b6173..bdb8a98 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -133,6 +133,8 @@ macro(build_dependency DEPENDENCY_NAME)
build_grpc()
elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2")
build_bzip2()
+ elseif("${DEPENDENCY_NAME}" STREQUAL "uriparser")
+ build_uriparser()
else()
message(FATAL_ERROR "Unknown thirdparty dependency to build:
${DEPENDENCY_NAME}")
endif()
@@ -341,6 +343,13 @@ else()
)
endif()
+if(DEFINED ENV{ARROW_URIPARSER_URL})
+ set(URIPARSER_SOURCE_URL "$ENV{ARROW_URIPARSER_URL}")
+else()
+ set(URIPARSER_SOURCE_URL
+
"https://github.com/uriparser/uriparser/archive/${URIPARSER_VERSION}.tar.gz")
+endif()
+
if(DEFINED ENV{ARROW_ZLIB_URL})
set(ZLIB_SOURCE_URL "$ENV{ARROW_ZLIB_URL}")
else()
@@ -516,6 +525,72 @@ include_directories(SYSTEM
${double-conversion_INCLUDE_DIRS})
double_conversion_compability()
# ----------------------------------------------------------------------
+# uriparser library
+
+macro(build_uriparser)
+ message(STATUS "Building uriparser from source")
+ set(URIPARSER_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/uriparser_ep-install")
+ set(
+ URIPARSER_STATIC_LIB
+
"${URIPARSER_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}uriparser${CMAKE_STATIC_LIBRARY_SUFFIX}"
+ )
+ set(URIPARSER_INCLUDE_DIRS "${URIPARSER_PREFIX}/include")
+
+ set(URIPARSER_CMAKE_ARGS
+ ${EP_COMMON_CMAKE_ARGS}
+ "-DURIPARSER_BUILD_DOCS=off"
+ "-DURIPARSER_BUILD_TESTS=off"
+ "-DURIPARSER_BUILD_TOOLS=off"
+ "-DURIPARSER_BUILD_WCHAR_T=off"
+ "-DBUILD_SHARED_LIBS=off"
+ "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
+ "-DCMAKE_INSTALL_LIBDIR=lib"
+ "-DCMAKE_POSITION_INDEPENDENT_CODE=on"
+ "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>")
+
+ if(MSVC AND ARROW_USE_STATIC_CRT)
+ if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+ list(APPEND URIPARSER_CMAKE_ARGS "-DURIPARSER_MSVC_RUNTIME=/MTd")
+ else()
+ list(APPEND URIPARSER_CMAKE_ARGS "-DURIPARSER_MSVC_RUNTIME=/MT")
+ endif()
+ endif()
+
+ externalproject_add(uriparser_ep
+ URL
+ ${URIPARSER_SOURCE_URL}
+ CMAKE_ARGS
+ ${URIPARSER_CMAKE_ARGS}
+ BUILD_BYPRODUCTS
+ ${URIPARSER_STATIC_LIB}
+ INSTALL_DIR
+ ${URIPARSER_PREFIX}
+ ${EP_LOG_OPTIONS})
+
+ add_library(uriparser::uriparser STATIC IMPORTED)
+ # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052
+ file(MAKE_DIRECTORY ${URIPARSER_INCLUDE_DIRS})
+ set_target_properties(
+ uriparser::uriparser
+ PROPERTIES IMPORTED_LOCATION ${URIPARSER_STATIC_LIB}
INTERFACE_INCLUDE_DIRECTORIES
+ ${URIPARSER_INCLUDE_DIRS})
+
+ add_dependencies(toolchain uriparser_ep)
+ add_dependencies(uriparser::uriparser uriparser_ep)
+endmacro()
+
+# Unless the user overrides uriparser_SOURCE, build uriparser ourselves
+if("${uriparser_SOURCE}" STREQUAL "")
+ set(uriparser_SOURCE "BUNDLED")
+endif()
+
+resolve_dependency(uriparser)
+
+get_target_property(URIPARSER_INCLUDE_DIRS uriparser::uriparser
+ INTERFACE_INCLUDE_DIRECTORIES)
+include_directories(SYSTEM ${URIPARSER_INCLUDE_DIRS})
+
+# ----------------------------------------------------------------------
# Snappy
macro(build_snappy)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 4ef60c9..83c2674 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -129,6 +129,7 @@ set(ARROW_SRCS
util/task-group.cc
util/thread-pool.cc
util/trie.cc
+ util/uri.cc
util/utf8.cc
vendored/datetime/tz.cpp)
diff --git a/cpp/src/arrow/symbols.map b/cpp/src/arrow/symbols.map
index 9ee0ff3..9b24ab4 100644
--- a/cpp/src/arrow/symbols.map
+++ b/cpp/src/arrow/symbols.map
@@ -66,6 +66,8 @@
ERR_getErrorString;
# jemalloc
je_arrow_*;
+ # uriparser
+ uri*;
# ORC destructors
_ZThn8_N3orc*;
# Protobuf symbols that aren't hidden by the C++ section below
diff --git a/cpp/src/arrow/util/CMakeLists.txt
b/cpp/src/arrow/util/CMakeLists.txt
index ba24f88..ca0b96e 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -41,6 +41,7 @@ add_arrow_test(stl-util-test)
add_arrow_test(task-group-test)
add_arrow_test(thread-pool-test)
add_arrow_test(trie-test)
+add_arrow_test(uri-test)
add_arrow_test(utf8-util-test)
add_arrow_benchmark(bit-util-benchmark)
diff --git a/cpp/src/arrow/util/uri-test.cc b/cpp/src/arrow/util/uri-test.cc
new file mode 100644
index 0000000..34a7d24
--- /dev/null
+++ b/cpp/src/arrow/util/uri-test.cc
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/uri.h"
+
+namespace arrow {
+namespace internal {
+
+TEST(Uri, Empty) {
+ Uri uri;
+ ASSERT_EQ(uri.scheme(), "");
+}
+
+TEST(Uri, ParseSimple) {
+ Uri uri;
+ {
+ // An ephemeral string object shouldn't invalidate results
+ std::string s = "https://arrow.apache.org";
+ ASSERT_OK(uri.Parse(s));
+ s.replace(0, s.size(), s.size(), 'X'); // replace contents
+ }
+ ASSERT_EQ(uri.scheme(), "https");
+ ASSERT_EQ(uri.host(), "arrow.apache.org");
+ ASSERT_EQ(uri.port_text(), "");
+}
+
+TEST(Uri, ParsePath) {
+ // The various edge cases below (leading and trailing slashes) have been
+ // checked against several Python URI parsing modules: `uri`, `rfc3986`,
`rfc3987`
+
+ Uri uri;
+
+ // Relative path
+ ASSERT_OK(uri.Parse("unix:tmp/flight.sock"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_FALSE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "tmp/flight.sock");
+
+ // Absolute path
+ ASSERT_OK(uri.Parse("unix:/tmp/flight.sock"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_FALSE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "/tmp/flight.sock");
+
+ ASSERT_OK(uri.Parse("unix://localhost/tmp/flight.sock"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_TRUE(uri.has_host());
+ ASSERT_EQ(uri.host(), "localhost");
+ ASSERT_EQ(uri.path(), "/tmp/flight.sock");
+
+ ASSERT_OK(uri.Parse("unix:///tmp/flight.sock"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_TRUE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "/tmp/flight.sock");
+
+ // Empty path
+ ASSERT_OK(uri.Parse("unix:"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_FALSE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "");
+
+ ASSERT_OK(uri.Parse("unix://localhost"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_TRUE(uri.has_host());
+ ASSERT_EQ(uri.host(), "localhost");
+ ASSERT_EQ(uri.path(), "");
+
+ // With trailing slash
+ ASSERT_OK(uri.Parse("unix:/"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_FALSE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "/");
+
+ ASSERT_OK(uri.Parse("unix:tmp/"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_FALSE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "tmp/");
+
+ ASSERT_OK(uri.Parse("unix://localhost/"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_TRUE(uri.has_host());
+ ASSERT_EQ(uri.host(), "localhost");
+ ASSERT_EQ(uri.path(), "/");
+
+ ASSERT_OK(uri.Parse("unix:/tmp/flight/"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_FALSE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "/tmp/flight/");
+
+ ASSERT_OK(uri.Parse("unix:///tmp/flight/"));
+ ASSERT_EQ(uri.scheme(), "unix");
+ ASSERT_TRUE(uri.has_host());
+ ASSERT_EQ(uri.host(), "");
+ ASSERT_EQ(uri.path(), "/tmp/flight/");
+}
+
+TEST(Uri, ParseHostPort) {
+ Uri uri;
+
+ ASSERT_OK(uri.Parse("http://localhost:80"));
+ ASSERT_EQ(uri.scheme(), "http");
+ ASSERT_EQ(uri.host(), "localhost");
+ ASSERT_EQ(uri.port_text(), "80");
+ ASSERT_EQ(uri.port(), 80);
+
+ ASSERT_OK(uri.Parse("http://1.2.3.4"));
+ ASSERT_EQ(uri.scheme(), "http");
+ ASSERT_EQ(uri.host(), "1.2.3.4");
+ ASSERT_EQ(uri.port_text(), "");
+ ASSERT_EQ(uri.port(), -1);
+
+ ASSERT_OK(uri.Parse("http://1.2.3.4:"));
+ ASSERT_EQ(uri.scheme(), "http");
+ ASSERT_EQ(uri.host(), "1.2.3.4");
+ ASSERT_EQ(uri.port_text(), "");
+ ASSERT_EQ(uri.port(), -1);
+
+ ASSERT_OK(uri.Parse("http://1.2.3.4:80"));
+ ASSERT_EQ(uri.scheme(), "http");
+ ASSERT_EQ(uri.host(), "1.2.3.4");
+ ASSERT_EQ(uri.port_text(), "80");
+ ASSERT_EQ(uri.port(), 80);
+
+ ASSERT_OK(uri.Parse("http://[::1]"));
+ ASSERT_EQ(uri.scheme(), "http");
+ ASSERT_EQ(uri.host(), "::1");
+ ASSERT_EQ(uri.port_text(), "");
+ ASSERT_EQ(uri.port(), -1);
+
+ ASSERT_OK(uri.Parse("http://[::1]:"));
+ ASSERT_EQ(uri.scheme(), "http");
+ ASSERT_EQ(uri.host(), "::1");
+ ASSERT_EQ(uri.port_text(), "");
+ ASSERT_EQ(uri.port(), -1);
+
+ ASSERT_OK(uri.Parse("http://[::1]:80"));
+ ASSERT_EQ(uri.scheme(), "http");
+ ASSERT_EQ(uri.host(), "::1");
+ ASSERT_EQ(uri.port_text(), "80");
+ ASSERT_EQ(uri.port(), 80);
+}
+
+TEST(Uri, ParseError) {
+ Uri uri;
+
+ ASSERT_RAISES(Invalid, uri.Parse("http://a:b:c:d"));
+ ASSERT_RAISES(Invalid, uri.Parse("http://localhost:z"));
+ ASSERT_RAISES(Invalid, uri.Parse("http://localhost:-1"));
+ ASSERT_RAISES(Invalid, uri.Parse("http://localhost:99999"));
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc
new file mode 100644
index 0000000..3a90612
--- /dev/null
+++ b/cpp/src/arrow/util/uri.cc
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/uri.h"
+
+#include <cstring>
+#include <sstream>
+#include <vector>
+
+#include <uriparser/Uri.h>
+
+#include "arrow/util/parsing.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace internal {
+
+namespace {
+
+util::string_view TextRangeToView(const UriTextRangeStructA& range) {
+ if (range.first == nullptr) {
+ return "";
+ } else {
+ return {range.first, static_cast<size_t>(range.afterLast - range.first)};
+ }
+}
+
+std::string TextRangeToString(const UriTextRangeStructA& range) {
+ return std::string(TextRangeToView(range));
+}
+
+// There can be a difference between an absent field and an empty field.
+// For example, in "unix:/tmp/foo", the host is absent, while in
+// "unix:///tmp/foo", the host is empty but present.
+// This function helps distinguish.
+bool IsTextRangeSet(const UriTextRangeStructA& range) { return range.first !=
nullptr; }
+
+} // namespace
+
+struct Uri::Impl {
+ Impl() : port_(-1) { memset(&uri_, 0, sizeof(uri_)); }
+
+ ~Impl() { uriFreeUriMembersA(&uri_); }
+
+ void Reset() {
+ uriFreeUriMembersA(&uri_);
+ memset(&uri_, 0, sizeof(uri_));
+ data_.clear();
+ port_ = -1;
+ }
+
+ const std::string& KeepString(const std::string& s) {
+ data_.push_back(s);
+ return data_.back();
+ }
+
+ UriUriA uri_;
+ // Keep alive strings that uriparser stores pointers to
+ std::vector<std::string> data_;
+ int32_t port_;
+};
+
+Uri::Uri() : impl_(new Impl) {}
+
+Uri::~Uri() {}
+
+std::string Uri::scheme() const { return
TextRangeToString(impl_->uri_.scheme); }
+
+std::string Uri::host() const { return
TextRangeToString(impl_->uri_.hostText); }
+
+bool Uri::has_host() const { return IsTextRangeSet(impl_->uri_.hostText); }
+
+std::string Uri::port_text() const { return
TextRangeToString(impl_->uri_.portText); }
+
+int32_t Uri::port() const { return impl_->port_; }
+
+std::string Uri::path() const {
+ // Gather path segments
+ std::vector<util::string_view> segments;
+ auto path_seg = impl_->uri_.pathHead;
+ while (path_seg != nullptr) {
+ segments.push_back(TextRangeToView(path_seg->text));
+ path_seg = path_seg->next;
+ }
+
+ std::stringstream ss;
+ if (impl_->uri_.absolutePath == URI_TRUE) {
+ ss << "/";
+ } else if (has_host() && segments.size() > 0) {
+ // When there's a host (even empty), uriparser considers the path relative.
+ // Several URI parsers for Python all consider it absolute, though.
+ // For example, the path for "file:///tmp/foo" is "/tmp/foo", not
"tmp/foo".
+ // Similarly, the path for "file://localhost/" is "/".
+ // However, the path for "file://localhost" is "".
+ ss << "/";
+ }
+ bool first = true;
+ for (const auto seg : segments) {
+ if (!first) {
+ ss << "/";
+ }
+ first = false;
+ ss << seg;
+ }
+ return ss.str();
+}
+
+Status Uri::Parse(const std::string& uri_string) {
+ impl_->Reset();
+
+ const auto& s = impl_->KeepString(uri_string);
+ const char* error_pos;
+ if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(),
&error_pos) !=
+ URI_SUCCESS) {
+ return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+ }
+ // Parse port number
+ auto port_text = TextRangeToView(impl_->uri_.portText);
+ if (port_text.size()) {
+ StringConverter<UInt16Type> port_converter;
+ uint16_t port_num;
+ if (!port_converter(port_text.data(), port_text.size(), &port_num)) {
+ return Status::Invalid("Invalid port number '", port_text, "' in URI '",
uri_string,
+ "'");
+ }
+ impl_->port_ = port_num;
+ }
+
+ return Status::OK();
+}
+
+} // namespace internal
+} // namespace arrow
diff --git a/cpp/src/arrow/util/uri.h b/cpp/src/arrow/util/uri.h
new file mode 100644
index 0000000..3d69495
--- /dev/null
+++ b/cpp/src/arrow/util/uri.h
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief A parsed URI
+class ARROW_EXPORT Uri {
+ public:
+ Uri();
+ ~Uri();
+
+ // XXX Should we use util::string_view instead? These functions are
+ // not performance-critical.
+
+ /// The URI scheme, such as "http", or the empty string if the URI has no
+ /// explicit scheme.
+ std::string scheme() const;
+ /// Whether the URI has an explicit host name. This may return true if
+ /// the URI has an empty host (e.g. "file:///tmp/foo"), while it returns
+ /// false is the URI has not host component at all (e.g. "file:/tmp/foo").
+ bool has_host() const;
+ /// The URI host name, such as "localhost", "127.0.0.1" or "::1", or the
empty
+ /// string is the URI does not have a host component.
+ std::string host() const;
+ /// The URI port number, as a string such as "80", or the empty string is
the URI
+ /// does not have a port number component.
+ std::string port_text() const;
+ /// The URI port parsed as an integer, or -1 if the URI does not have a port
+ /// number component.
+ int32_t port() const;
+ /// The URI path component.
+ std::string path() const;
+
+ /// Factory function to parse a URI from its string representation.
+ Status Parse(const std::string& uri_string);
+
+ private:
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index e7ad54c..737f062 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -45,6 +45,8 @@ RAPIDJSON_VERSION=2bbd33b33217ff4a73434ebf10cdac41e2ef5e34
RE2_VERSION=2018-10-01
SNAPPY_VERSION=1.1.3
THRIFT_VERSION=0.12.0
+# CMake support appeared after latest release (0.9.1)
+URIPARSER_VERSION=63384be4fb8197264c55ff53a135110ecd5bd8c4
ZLIB_VERSION=1.2.8
ZSTD_VERSION=v1.3.7
@@ -70,6 +72,7 @@ DEPENDENCIES=(
"ARROW_RE2_URL re2-${RE2_VERSION}.tar.gz
https://github.com/google/re2/archive/${RE2_VERSION}.tar.gz"
"ARROW_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz
https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz"
"ARROW_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz
http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz"
+ "ARROW_URIPARSER_URL uriparser-${URIPARSER_VERSION}.tar.gz
https://github.com/uriparser/uriparser/archive/${URIPARSER_VERSION}.tar.gz"
"ARROW_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz
http://zlib.net/fossils/zlib-${ZLIB_VERSION}.tar.gz"
"ARROW_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz
https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz"
)