This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1489e3cfbf [Fix](file system) Make the constructor of `XxxFileSystem`
a private method (#15889)
1489e3cfbf is described below
commit 1489e3cfbf4ad4abc8ca76fa15aeeabe881cb786
Author: Tiewei Fang <[email protected]>
AuthorDate: Fri Jan 13 15:32:16 2023 +0800
[Fix](file system) Make the constructor of `XxxFileSystem` a private method
(#15889)
Since Filesystem inherited std::enable_shared_from_this , it is dangerous
to create native point of FileSystem.
To avoid this behavior, making the constructor of XxxFileSystem a private
method and using the static method create(...) to get a new FileSystem object.
---
be/src/io/file_factory.cpp | 8 ++--
be/src/io/fs/broker_file_system.cpp | 7 ++++
be/src/io/fs/broker_file_system.h | 8 +++-
be/src/io/fs/file_system.h | 6 +--
be/src/io/fs/hdfs_file_system.cpp | 5 +++
be/src/io/fs/hdfs_file_system.h | 6 ++-
be/src/io/fs/local_file_system.cpp | 7 +++-
be/src/io/fs/local_file_system.h | 5 ++-
be/src/io/fs/path.h | 6 ---
be/src/io/fs/s3_file_system.cpp | 5 +++
be/src/io/fs/s3_file_system.h | 5 ++-
be/src/olap/data_dir.cpp | 2 +-
be/src/olap/storage_policy_mgr.cpp | 2 +-
be/src/service/brpc_conflict.h | 48 ----------------------
be/test/io/cache/remote_file_cache_test.cpp | 2 +-
be/test/olap/remote_rowset_gc_test.cpp | 2 +-
be/test/olap/rowset/beta_rowset_test.cpp | 2 +-
be/test/olap/tablet_cooldown_test.cpp | 2 +-
be/test/vec/exec/parquet/parquet_reader_test.cpp | 2 +-
be/test/vec/exec/parquet/parquet_thrift_test.cpp | 8 ++--
.../sql-manual/sql-functions/table-functions/s3.md | 43 ++++++++++++++++++-
.../sql-manual/sql-functions/table-functions/s3.md | 45 +++++++++++++++++++-
22 files changed, 145 insertions(+), 81 deletions(-)
diff --git a/be/src/io/file_factory.cpp b/be/src/io/file_factory.cpp
index 4fbc6dc9f6..62696552a5 100644
--- a/be/src/io/file_factory.cpp
+++ b/be/src/io/file_factory.cpp
@@ -210,7 +210,7 @@ Status FileFactory::create_hdfs_reader(const THdfsParams&
hdfs_params, const std
io::FileReaderSPtr* reader,
const io::FileReaderOptions&
reader_options,
IOContext* io_ctx) {
- hdfs_file_system->reset(new io::HdfsFileSystem(hdfs_params, ""));
+ *hdfs_file_system = io::HdfsFileSystem::create(hdfs_params, "");
RETURN_IF_ERROR((std::static_pointer_cast<io::HdfsFileSystem>(*hdfs_file_system))->connect());
RETURN_IF_ERROR((*hdfs_file_system)->open_file(path, reader_options,
reader, io_ctx));
return Status::OK();
@@ -235,7 +235,7 @@ Status FileFactory::create_s3_reader(const
std::map<std::string, std::string>& p
}
S3Conf s3_conf;
RETURN_IF_ERROR(ClientFactory::convert_properties_to_s3_conf(prop, s3_uri,
&s3_conf));
- s3_file_system->reset(new io::S3FileSystem(s3_conf, ""));
+ *s3_file_system = io::S3FileSystem::create(s3_conf, "");
RETURN_IF_ERROR((std::static_pointer_cast<io::S3FileSystem>(*s3_file_system))->connect());
RETURN_IF_ERROR((*s3_file_system)->open_file(s3_uri.get_key(),
reader_options, reader, io_ctx));
return Status::OK();
@@ -248,8 +248,8 @@ Status FileFactory::create_broker_reader(const
TNetworkAddress& broker_addr,
io::FileReaderSPtr* reader,
const io::FileReaderOptions&
reader_options,
IOContext* io_ctx) {
- broker_file_system->reset(
- new io::BrokerFileSystem(broker_addr, prop,
file_description.file_size));
+ *broker_file_system =
+ io::BrokerFileSystem::create(broker_addr, prop,
file_description.file_size);
RETURN_IF_ERROR(
(std::static_pointer_cast<io::BrokerFileSystem>(*broker_file_system))->connect());
RETURN_IF_ERROR((*broker_file_system)
diff --git a/be/src/io/fs/broker_file_system.cpp
b/be/src/io/fs/broker_file_system.cpp
index 4407f3d686..875eb0829b 100644
--- a/be/src/io/fs/broker_file_system.cpp
+++ b/be/src/io/fs/broker_file_system.cpp
@@ -56,6 +56,13 @@ inline const std::string& client_id(const TNetworkAddress&
addr) {
}
#endif
+std::shared_ptr<BrokerFileSystem> BrokerFileSystem::create(
+ const TNetworkAddress& broker_addr, const std::map<std::string,
std::string>& broker_prop,
+ size_t file_size) {
+ return std::shared_ptr<BrokerFileSystem>(
+ new BrokerFileSystem(broker_addr, broker_prop, file_size));
+}
+
BrokerFileSystem::BrokerFileSystem(const TNetworkAddress& broker_addr,
const std::map<std::string, std::string>&
broker_prop,
size_t file_size)
diff --git a/be/src/io/fs/broker_file_system.h
b/be/src/io/fs/broker_file_system.h
index ec091ec577..bf55d49a53 100644
--- a/be/src/io/fs/broker_file_system.h
+++ b/be/src/io/fs/broker_file_system.h
@@ -24,8 +24,9 @@ namespace doris {
namespace io {
class BrokerFileSystem final : public RemoteFileSystem {
public:
- BrokerFileSystem(const TNetworkAddress& broker_addr,
- const std::map<std::string, std::string>& broker_prop,
size_t file_size);
+ static std::shared_ptr<BrokerFileSystem> create(
+ const TNetworkAddress& broker_addr,
+ const std::map<std::string, std::string>& broker_prop, size_t
file_size);
~BrokerFileSystem() override = default;
@@ -66,6 +67,9 @@ public:
Status get_client(std::shared_ptr<BrokerServiceConnection>* client) const;
private:
+ BrokerFileSystem(const TNetworkAddress& broker_addr,
+ const std::map<std::string, std::string>& broker_prop,
size_t file_size);
+
const TNetworkAddress& _broker_addr;
const std::map<std::string, std::string>& _broker_prop;
size_t _file_size;
diff --git a/be/src/io/fs/file_system.h b/be/src/io/fs/file_system.h
index 4598ff32d2..f1fd331038 100644
--- a/be/src/io/fs/file_system.h
+++ b/be/src/io/fs/file_system.h
@@ -44,9 +44,6 @@ enum class FileSystemType : uint8_t {
class FileSystem : public std::enable_shared_from_this<FileSystem> {
public:
- FileSystem(Path&& root_path, ResourceId&& resource_id, FileSystemType type)
- : _root_path(std::move(root_path)),
_resource_id(std::move(resource_id)), _type(type) {}
-
virtual ~FileSystem() = default;
DISALLOW_COPY_AND_ASSIGN(FileSystem);
@@ -81,6 +78,9 @@ public:
const FileSystemType type() const { return _type; }
protected:
+ FileSystem(Path&& root_path, ResourceId&& resource_id, FileSystemType type)
+ : _root_path(std::move(root_path)),
_resource_id(std::move(resource_id)), _type(type) {}
+
Path _root_path;
ResourceId _resource_id;
FileSystemType _type;
diff --git a/be/src/io/fs/hdfs_file_system.cpp
b/be/src/io/fs/hdfs_file_system.cpp
index 0cbfc32f2a..d623a8b245 100644
--- a/be/src/io/fs/hdfs_file_system.cpp
+++ b/be/src/io/fs/hdfs_file_system.cpp
@@ -61,6 +61,11 @@ private:
void _clean_oldest();
};
+std::shared_ptr<HdfsFileSystem> HdfsFileSystem::create(const THdfsParams&
hdfs_params,
+ const std::string&
path) {
+ return std::shared_ptr<HdfsFileSystem>(new HdfsFileSystem(hdfs_params,
path));
+}
+
HdfsFileSystem::HdfsFileSystem(const THdfsParams& hdfs_params, const
std::string& path)
: RemoteFileSystem(path, "", FileSystemType::HDFS),
_hdfs_params(hdfs_params),
diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h
index 49f0cb6a25..9e5edf6752 100644
--- a/be/src/io/fs/hdfs_file_system.h
+++ b/be/src/io/fs/hdfs_file_system.h
@@ -81,7 +81,9 @@ private:
class HdfsFileSystem final : public RemoteFileSystem {
public:
- HdfsFileSystem(const THdfsParams& hdfs_params, const std::string& path);
+ static std::shared_ptr<HdfsFileSystem> create(const THdfsParams&
hdfs_params,
+ const std::string& path);
+
~HdfsFileSystem() override;
Status create_file(const Path& path, FileWriterPtr* writer) override;
@@ -119,6 +121,8 @@ public:
HdfsFileSystemHandle* get_handle();
private:
+ HdfsFileSystem(const THdfsParams& hdfs_params, const std::string& path);
+
Path _covert_path(const Path& path) const;
const THdfsParams& _hdfs_params;
std::string _namenode;
diff --git a/be/src/io/fs/local_file_system.cpp
b/be/src/io/fs/local_file_system.cpp
index ea5efe17c4..9cbe0c0814 100644
--- a/be/src/io/fs/local_file_system.cpp
+++ b/be/src/io/fs/local_file_system.cpp
@@ -24,6 +24,11 @@
namespace doris {
namespace io {
+std::shared_ptr<LocalFileSystem> LocalFileSystem::create(Path path, ResourceId
resource_id) {
+ return std::shared_ptr<LocalFileSystem>(
+ new LocalFileSystem(std::move(path), std::move(resource_id)));
+}
+
LocalFileSystem::LocalFileSystem(Path root_path, ResourceId resource_id)
: FileSystem(std::move(root_path), std::move(resource_id),
FileSystemType::LOCAL) {}
@@ -144,7 +149,7 @@ Status LocalFileSystem::list(const Path& path,
std::vector<Path>* files) {
return Status::OK();
}
-static FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
+static FileSystemSPtr local_fs = io::LocalFileSystem::create("");
const FileSystemSPtr& global_local_filesystem() {
return local_fs;
diff --git a/be/src/io/fs/local_file_system.h b/be/src/io/fs/local_file_system.h
index d4b8e2e044..bc9faa1d98 100644
--- a/be/src/io/fs/local_file_system.h
+++ b/be/src/io/fs/local_file_system.h
@@ -25,7 +25,8 @@ namespace io {
class LocalFileSystem final : public FileSystem {
public:
- LocalFileSystem(Path root_path, ResourceId resource_id = ResourceId());
+ static std::shared_ptr<LocalFileSystem> create(Path path, ResourceId
resource_id = "");
+
~LocalFileSystem() override;
Status create_file(const Path& path, FileWriterPtr* writer) override;
@@ -52,6 +53,8 @@ public:
Status list(const Path& path, std::vector<Path>* files) override;
private:
+ LocalFileSystem(Path root_path, ResourceId resource_id = ResourceId());
+
Path absolute_path(const Path& path) const;
};
diff --git a/be/src/io/fs/path.h b/be/src/io/fs/path.h
index 695d51063a..9832ea6322 100644
--- a/be/src/io/fs/path.h
+++ b/be/src/io/fs/path.h
@@ -28,11 +28,5 @@ inline Path operator/(Path&& lhs, const Path& rhs) {
return std::move(lhs /= rhs);
}
-struct PathHasher {
- std::size_t operator()(const doris::io::Path& k) const {
- return std::hash<std::string>()(k.filename().native());
- }
-};
-
} // namespace io
} // namespace doris
diff --git a/be/src/io/fs/s3_file_system.cpp b/be/src/io/fs/s3_file_system.cpp
index 5796e68f4e..7f872586c6 100644
--- a/be/src/io/fs/s3_file_system.cpp
+++ b/be/src/io/fs/s3_file_system.cpp
@@ -49,6 +49,11 @@ namespace io {
}
#endif
+std::shared_ptr<S3FileSystem> S3FileSystem::create(S3Conf s3_conf, ResourceId
resource_id) {
+ return std::shared_ptr<S3FileSystem>(
+ new S3FileSystem(std::move(s3_conf), std::move(resource_id)));
+}
+
S3FileSystem::S3FileSystem(S3Conf s3_conf, ResourceId resource_id)
: RemoteFileSystem(
fmt::format("{}/{}/{}", s3_conf.endpoint, s3_conf.bucket,
s3_conf.prefix),
diff --git a/be/src/io/fs/s3_file_system.h b/be/src/io/fs/s3_file_system.h
index 015b75908c..93472bebd2 100644
--- a/be/src/io/fs/s3_file_system.h
+++ b/be/src/io/fs/s3_file_system.h
@@ -35,7 +35,8 @@ namespace io {
// This class is thread-safe.(Except `set_xxx` method)
class S3FileSystem final : public RemoteFileSystem {
public:
- S3FileSystem(S3Conf s3_conf, ResourceId resource_id);
+ static std::shared_ptr<S3FileSystem> create(S3Conf s3_conf, ResourceId
resource_id);
+
~S3FileSystem() override;
Status create_file(const Path& path, FileWriterPtr* writer) override;
@@ -78,6 +79,8 @@ public:
std::string get_key(const Path& path) const;
private:
+ S3FileSystem(S3Conf s3_conf, ResourceId resource_id);
+
S3Conf _s3_conf;
// FIXME(cyx): We can use std::atomic<std::shared_ptr> since c++20.
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 0b70977721..e29fca2276 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -73,7 +73,7 @@ DataDir::DataDir(const std::string& path, int64_t
capacity_bytes,
TStorageMedium::type storage_medium, TabletManager*
tablet_manager,
TxnManager* txn_manager)
: _path(path),
- _fs(std::make_shared<io::LocalFileSystem>(path)),
+ _fs(io::LocalFileSystem::create(path)),
_capacity_bytes(capacity_bytes),
_available_bytes(0),
_disk_capacity_bytes(0),
diff --git a/be/src/olap/storage_policy_mgr.cpp
b/be/src/olap/storage_policy_mgr.cpp
index ce9241c7f8..58da3ff03e 100644
--- a/be/src/olap/storage_policy_mgr.cpp
+++ b/be/src/olap/storage_policy_mgr.cpp
@@ -67,7 +67,7 @@ void StoragePolicyMgr::periodic_put(const std::string& name,
const StoragePolicy
s3_conf.connect_timeout_ms = policy->s3_conn_timeout_ms;
s3_conf.bucket = policy->bucket;
s3_conf.prefix = policy->root_path;
- s3_fs = std::make_shared<io::S3FileSystem>(std::move(s3_conf),
name);
+ s3_fs = io::S3FileSystem::create(std::move(s3_conf), name);
io::FileSystemMap::instance()->insert(name, s3_fs);
_policy_map.emplace(name, policy);
} else if (it->second->md5_sum != policy->md5_sum) {
diff --git a/be/src/service/brpc_conflict.h b/be/src/service/brpc_conflict.h
deleted file mode 100644
index 35ef1b815c..0000000000
--- a/be/src/service/brpc_conflict.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-// This file is used to fixed macro conflict between butil and gutil
-// and this file must put the first include in source file
-
-#include "gutil/macros.h"
-// Macros in the guti/macros.h, use butil's define
-#ifdef DISALLOW_IMPLICIT_CONSTRUCTORS
-#undef DISALLOW_IMPLICIT_CONSTRUCTORS
-#endif
-
-#ifdef arraysize
-#undef arraysize
-#endif
-
-#ifdef ARRAY_SIZE
-#undef ARRAY_SIZE
-#endif
-
-#undef OVERRIDE
-#undef FINAL
-
-// use be/src/gutil/integral_types.h override butil/basictypes.h
-#include "gutil/integral_types.h"
-#ifdef BASE_INTEGRAL_TYPES_H_
-#define BUTIL_BASICTYPES_H_
-#endif
-
-#ifdef DEBUG_MODE
-#undef DEBUG_MODE
-#endif
diff --git a/be/test/io/cache/remote_file_cache_test.cpp
b/be/test/io/cache/remote_file_cache_test.cpp
index 596ff88725..176e306a8a 100644
--- a/be/test/io/cache/remote_file_cache_test.cpp
+++ b/be/test/io/cache/remote_file_cache_test.cpp
@@ -163,7 +163,7 @@ protected:
// just use to create s3 filesystem, otherwise won't use cache
S3Conf s3_conf;
std::shared_ptr<io::S3FileSystem> fs =
- std::make_shared<io::S3FileSystem>(std::move(s3_conf),
resource_id);
+ io::S3FileSystem::create(std::move(s3_conf), resource_id);
rowset.rowset_meta()->set_resource_id(resource_id);
rowset.rowset_meta()->set_num_segments(1);
rowset.rowset_meta()->set_fs(fs);
diff --git a/be/test/olap/remote_rowset_gc_test.cpp
b/be/test/olap/remote_rowset_gc_test.cpp
index f24a1a1086..e4d6eea11d 100644
--- a/be/test/olap/remote_rowset_gc_test.cpp
+++ b/be/test/olap/remote_rowset_gc_test.cpp
@@ -52,7 +52,7 @@ public:
s3_conf.region = config::test_s3_region;
s3_conf.bucket = config::test_s3_bucket;
s3_conf.prefix = "remote_rowset_gc_test";
- auto s3_fs = std::make_shared<io::S3FileSystem>(std::move(s3_conf),
kResourceId);
+ auto s3_fs = io::S3FileSystem::create(std::move(s3_conf), kResourceId);
ASSERT_TRUE(s3_fs->connect().ok());
io::FileSystemMap::instance()->insert(kResourceId, s3_fs);
diff --git a/be/test/olap/rowset/beta_rowset_test.cpp
b/be/test/olap/rowset/beta_rowset_test.cpp
index e696995d25..8766faab6c 100644
--- a/be/test/olap/rowset/beta_rowset_test.cpp
+++ b/be/test/olap/rowset/beta_rowset_test.cpp
@@ -232,7 +232,7 @@ TEST_F(BetaRowsetTest, ReadTest) {
s3_conf.prefix = "prefix";
io::ResourceId resource_id = "test_resourse_id";
std::shared_ptr<io::S3FileSystem> fs =
- std::make_shared<io::S3FileSystem>(std::move(s3_conf),
resource_id);
+ io::S3FileSystem::create(std::move(s3_conf), resource_id);
Aws::SDKOptions aws_options = Aws::SDKOptions {};
Aws::InitAPI(aws_options);
// failed to head object
diff --git a/be/test/olap/tablet_cooldown_test.cpp
b/be/test/olap/tablet_cooldown_test.cpp
index 15521a6f12..d243f32a4e 100644
--- a/be/test/olap/tablet_cooldown_test.cpp
+++ b/be/test/olap/tablet_cooldown_test.cpp
@@ -51,7 +51,7 @@ public:
s3_conf.region = config::test_s3_region;
s3_conf.bucket = config::test_s3_bucket;
s3_conf.prefix = "tablet_cooldown_test";
- auto s3_fs = std::make_shared<io::S3FileSystem>(std::move(s3_conf),
kResourceId);
+ auto s3_fs = io::S3FileSystem::create(std::move(s3_conf), kResourceId);
ASSERT_TRUE(s3_fs->connect().ok());
io::FileSystemMap::instance()->insert(kResourceId, s3_fs);
diff --git a/be/test/vec/exec/parquet/parquet_reader_test.cpp
b/be/test/vec/exec/parquet/parquet_reader_test.cpp
index 3046e48dc5..6379513e3b 100644
--- a/be/test/vec/exec/parquet/parquet_reader_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_reader_test.cpp
@@ -89,7 +89,7 @@ TEST_F(ParquetReaderTest, normal) {
DescriptorTbl::create(&obj_pool, t_desc_table, &desc_tbl);
auto slot_descs = desc_tbl->get_tuple_descriptor(0)->slots();
- io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
+ io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
io::FileReaderSPtr reader;
local_fs->open_file("./be/test/exec/test_data/parquet_scanner/type-decoder.parquet",
&reader,
nullptr);
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index 9bc9eb97c0..f41b2da5d9 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -47,7 +47,7 @@ public:
};
TEST_F(ParquetThriftReaderTest, normal) {
- io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
+ io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
io::FileReaderSPtr reader;
auto st =
local_fs->open_file("./be/test/exec/test_data/parquet_scanner/localfile.parquet",
&reader, nullptr);
@@ -79,7 +79,7 @@ TEST_F(ParquetThriftReaderTest, complex_nested_file) {
// `friend` map<string,string>,
// `mark` struct<math:int,english:int>)
- io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
+ io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
io::FileReaderSPtr reader;
auto st =
local_fs->open_file("./be/test/exec/test_data/parquet_scanner/hive-complex.parquet",
&reader, nullptr);
@@ -283,7 +283,7 @@ static void read_parquet_data_and_check(const std::string&
parquet_file,
* `list_string` array<string>) // 14
*/
- io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
+ io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
io::FileReaderSPtr reader;
auto st = local_fs->open_file(parquet_file, &reader, nullptr);
EXPECT_TRUE(st.ok());
@@ -405,7 +405,7 @@ TEST_F(ParquetThriftReaderTest, group_reader) {
lazy_read_ctx.all_read_columns.emplace_back(slot->col_name());
read_columns.emplace_back(ParquetReadColumn(7, slot->col_name()));
}
- io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
+ io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
io::FileReaderSPtr file_reader;
auto st =
local_fs->open_file("./be/test/exec/test_data/parquet_scanner/type-decoder.parquet",
&file_reader, nullptr);
diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
index 452fa13e90..46c917a069 100644
--- a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -57,6 +57,12 @@ Related parameters for accessing S3:
- `secret_key`: (required)
- `use_path_style`: (optional) default `false` . The S3 SDK uses the
virtual-hosted style by default. However, some object storage systems may not
be enabled or support virtual-hosted style access. At this time, we can add the
`use_path_style` parameter to force the use of path style access method.
+> Note: URI currently supports three SCHEMA: http://, https:// and s3://.
+> 1. If you use http:// or https://, you will decide whether to use the 'path
style' to access s3 based on the 'use_path_style' parameter
+> 2. If you use s3://, you will use the "virtual-hosted style' to access the
s3, 'use_path_style' parameter is invalid.
+>
+> For detailed use cases, you can refer to Best Practice at the bottom.
+
file format parameter:
- `format`: (required) Currently support
`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`
@@ -99,8 +105,43 @@ MySQL [(none)]> Desc function s3("uri" =
"http://127.0.0.1:9312/test2/student1.c
s3, table-valued-function, tvf
### Best Practice
+Since the S3 table-valued-function does not know the table schema in advance,
it will read the file first to parse out the table schema.
+
+**Usage of different uri schemas**
+Example of http:// 、https://
+
+```sql
+// Note how to write your bucket of URI and set the 'use_path_style'
parameter, as well as http://.
+// Because of "use_path_style"="true", s3 will be accessed in 'path style'.
+select * from s3(
+ "URI" = "https://endpoint/bucket/file/student.csv",
+ "ACCESS_KEY"= "ak",
+ "SECRET_KEY" = "sk",
+ "FORMAT" = "csv",
+ "use_path_style"="true");
+
+// Note how to write your bucket of URI and set the 'use_path_style'
parameter, as well as http://.
+// Because of "use_path_style"="false", s3 will be accessed in 'virtual-hosted
style'.
+select * from s3(
+ "URI" = "https://bucket.endpoint/file/student.csv",
+ "ACCESS_KEY"= "ak",
+ "SECRET_KEY" = "sk",
+ "FORMAT" = "csv",
+ "use_path_style"="false");
+```
+
+Example of s3://:
+
+```sql
+// Note how to write your bucket of URI, no need to set 'use_path_style'.
+// s3 will be accessed in 'virtual-hosted style'.
+select * from s3(
+ "URI" = "s3://bucket.endpoint/file/student.csv",
+ "ACCESS_KEY"= "ak",
+ "SECRET_KEY" = "sk",
+ "FORMAT" = "csv");
+```
-Since the S3 table-valued-function does not know the table schema in advance,
it will read the file first to parse out the table schema. Specifically, for
different file formats:
**csv foramt**
`csv` format: Read the file on S3 and process it as a csv file, read the first
line in the file to parse out the table schema. The number of columns in the
first line of the file `n` will be used as the number of columns in the table
schema, and the column names of the table schema will be automatically named
`c1, c2, ..., cn`, and the column type is set to `String` , for example:
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
index a27b8c56df..a77b6007ff 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -58,7 +58,13 @@ S3 tvf中的每一个参数都是一个 `"key"="value"` 对。
- `uri`: (必填) 访问S3的uri,S3表函数会根据 `use_path_style` 参数来决定是否使用 path style 访问方式,默认为
virtual-hosted style 方式
- `access_key`: (必填)
- `secret_key`: (必填)
-- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 virtual-hosted style
方式。但某些对象存储系统可能没开启或没支持virtual-hosted style 方式的访问,此时我们可以添加 use_path_style 参数来强制使用
path style 方式。
+- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 virtual-hosted style
方式。但某些对象存储系统可能没开启或没支持virtual-hosted style 方式的访问,此时我们可以添加 use_path_style 参数来强制使用
path style 方式。比如 `minio`默认情况下只允许`path
style`访问方式,所以在访问minio时要加上`use_path_style=true`。
+
+> 注意:uri目前支持三种schema:http://, https:// 和 s3://
+> 1. 如果使用http://或https://, 则会根据 'use_path_style' 参数来决定是否使用'path style'方式访问s3
+> 2. 如果使用s3://, 则都使用 'virtual-hosted style' 方式访问s3, 'use_path_style'参数无效。
+>
+> 详细使用案例可以参考最下方 Best Practice。
文件格式参数:
- `format`:(必填) 目前支持
`csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`
@@ -102,9 +108,44 @@ MySQL [(none)]> Desc function s3("uri" =
"http://127.0.0.1:9312/test2/student1.c
### Best Practice
-由于S3 table-valued-function事先并不知道table schema,所以会先读一遍文件来解析出table
schema,具体到不同的文件格式来说:
+**不同url schema的写法**
+http:// 、https:// 使用示例:
+```sql
+// 注意URI bucket写法以及use_path_style参数设置,http同理。
+// 由于设置了"use_path_style"="true", 所以将采用path style方式访问s3。
+select * from s3(
+ "URI" = "https://endpoint/bucket/file/student.csv",
+ "ACCESS_KEY"= "ak",
+ "SECRET_KEY" = "sk",
+ "FORMAT" = "csv",
+ "use_path_style"="true");
+
+// 注意URI bucket写法以及use_path_style参数设置,http同理。
+// 由于设置了"use_path_style"="false", 所以将采用virtual-hosted style方式访问s3。
+select * from s3(
+ "URI" = "https://bucket.endpoint/file/student.csv",
+ "ACCESS_KEY"= "ak",
+ "SECRET_KEY" = "sk",
+ "FORMAT" = "csv",
+ "use_path_style"="false");
+```
+
+s3:// 使用示例:
+
+```sql
+// 注意URI bucket写法, 无需设置use_path_style参数。
+// 将采用virtual-hosted style方式访问s3。
+select * from s3(
+ "URI" = "s3://bucket.endpoint/file/student.csv",
+ "ACCESS_KEY"= "ak",
+ "SECRET_KEY" = "sk",
+ "FORMAT" = "csv");
+```
+
**csv foramt**
+由于S3 table-valued-function事先并不知道table schema,所以会先读一遍文件来解析出table schema。
+
`csv` 格式: S3 table-valued-function 读取S3上的文件并当作csv文件来处理,读取文件中的第一行用于解析table
schema。文件第一行的列个数`n`将作为table schema的列个数,table schema的列名则自动取名为`c1, c2, ..., cn`
,列类型都设置为 `String`, 举例:
student1.csv文件内容为:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]