This is an automated email from the ASF dual-hosted git repository.
liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8eff3d1a908 [fix](cloud) Fix packed file write path bypassing
encryption (#60629)
8eff3d1a908 is described below
commit 8eff3d1a9089164e130e1f846453940a6f92493e
Author: Xin Liao <[email protected]>
AuthorDate: Fri Feb 27 11:45:40 2026 +0800
[fix](cloud) Fix packed file write path bypassing encryption (#60629)
The PackedFileSystem was wrapping the filesystem after encryption on the
write path, which meant data written through PackedFileSystem bypassed
encryption. On the read path, the encrypted layer would then attempt to
decrypt unencrypted data, leading to data corruption.
Fix by swapping the order: apply PackedFileSystem first, then wrap with
encryption on top, ensuring all data written through the packed path is
properly encrypted.
---
be/src/cloud/cloud_rowset_writer.cpp | 24 ++++++++++++++++++------
be/src/io/fs/file_writer.h | 4 ++++
be/src/io/fs/packed_file_writer.h | 3 +++
be/src/olap/rowset/rowset_meta.cpp | 8 +++++---
be/src/olap/rowset/rowset_writer_context.h | 12 ++++++------
5 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/be/src/cloud/cloud_rowset_writer.cpp
b/be/src/cloud/cloud_rowset_writer.cpp
index 0c1b79392e9..c5b58049ae4 100644
--- a/be/src/cloud/cloud_rowset_writer.cpp
+++ b/be/src/cloud/cloud_rowset_writer.cpp
@@ -17,8 +17,10 @@
#include "cloud/cloud_rowset_writer.h"
+#include "common/logging.h"
#include "common/status.h"
#include "io/cache/block_file_cache_factory.h"
+#include "io/fs/packed_file_manager.h"
#include "io/fs/packed_file_writer.h"
#include "olap/rowset/rowset_factory.h"
@@ -80,6 +82,9 @@ Status CloudRowsetWriter::init(const RowsetWriterContext&
rowset_writer_context)
}
Status CloudRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool
check_segment_num) {
+ VLOG_NOTICE << "start to build rowset meta. tablet_id=" <<
rowset_meta->tablet_id()
+ << ", rowset_id=" << rowset_meta->rowset_id()
+ << ", check_segment_num=" << check_segment_num;
// Call base class implementation
RETURN_IF_ERROR(BaseBetaRowsetWriter::_build_rowset_meta(rowset_meta,
check_segment_num));
@@ -157,6 +162,8 @@ Status CloudRowsetWriter::build(RowsetSharedPtr& rowset) {
}
Status CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta*
rowset_meta) {
+ VLOG_NOTICE << "start to collect packed slice locations for rowset meta.
tablet_id="
+ << rowset_meta->tablet_id() << ", rowset_id=" <<
rowset_meta->rowset_id();
if (!_context.packed_file_active) {
return Status::OK();
}
@@ -189,17 +196,22 @@ Status
CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta* rowset
Status CloudRowsetWriter::_collect_packed_slice_location(io::FileWriter*
file_writer,
const std::string&
file_path,
RowsetMeta*
rowset_meta) {
- // At this point, we only call this when
RowsetWriterContext::merge_file_active is true,
- // and all writers should be MergeFileWriter. So we can safely cast
without extra checks.
- auto* packed_writer = static_cast<io::PackedFileWriter*>(file_writer);
-
- if (packed_writer->state() != io::FileWriter::State::CLOSED) {
+ VLOG_NOTICE << "collect packed slice location for file: " << file_path;
+ // Check if file writer is closed
+ if (file_writer->state() != io::FileWriter::State::CLOSED) {
// Writer is still open; index will be collected after it is closed.
return Status::OK();
}
+ // Check if file is actually in packed file (not direct write for large
files)
+ if (!file_writer->is_in_packed_file()) {
+ return Status::OK();
+ }
+
+ // Get packed slice location directly from PackedFileManager
io::PackedSliceLocation index;
- RETURN_IF_ERROR(packed_writer->get_packed_slice_location(&index));
+ RETURN_IF_ERROR(
+
io::PackedFileManager::instance()->get_packed_slice_location(file_path,
&index));
if (index.packed_file_path.empty()) {
return Status::OK(); // File not in packed file, skip
}
diff --git a/be/src/io/fs/file_writer.h b/be/src/io/fs/file_writer.h
index a22fc28c26e..3712162576b 100644
--- a/be/src/io/fs/file_writer.h
+++ b/be/src/io/fs/file_writer.h
@@ -82,6 +82,10 @@ public:
virtual State state() const = 0;
+ // Returns true if this file's data was written to a packed file.
+ // Used to determine whether to collect packed slice location from
PackedFileManager.
+ virtual bool is_in_packed_file() const { return false; }
+
FileCacheAllocatorBuilder* cache_builder() const {
return _cache_builder == nullptr ? nullptr : _cache_builder.get();
}
diff --git a/be/src/io/fs/packed_file_writer.h
b/be/src/io/fs/packed_file_writer.h
index 9499b0912a4..eaae0a6ed74 100644
--- a/be/src/io/fs/packed_file_writer.h
+++ b/be/src/io/fs/packed_file_writer.h
@@ -57,6 +57,9 @@ public:
// Returns empty index if file is not in merge file
Status get_packed_slice_location(PackedSliceLocation* location) const;
+ // Returns true if this file's data was written to a packed file (not
direct write)
+ bool is_in_packed_file() const override { return !_is_direct_write; }
+
private:
// Async close: submit data without waiting
Status _close_async();
diff --git a/be/src/olap/rowset/rowset_meta.cpp
b/be/src/olap/rowset/rowset_meta.cpp
index 6e33d237b93..f9c07077a12 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -134,9 +134,8 @@ io::FileSystemSPtr RowsetMeta::fs() {
return nullptr;
}
- auto wrapped = io::make_file_system(fs, algorithm.value());
-
- // Apply packed file system if enabled and index_map is not empty
+ // Apply packed file system first if enabled and index_map is not empty
+ io::FileSystemSPtr wrapped = fs;
if (_rowset_meta_pb.packed_slice_locations_size() > 0) {
std::unordered_map<std::string, io::PackedSliceLocation> index_map;
for (const auto& [path, index_pb] :
_rowset_meta_pb.packed_slice_locations()) {
@@ -159,6 +158,9 @@ io::FileSystemSPtr RowsetMeta::fs() {
wrapped = std::make_shared<io::PackedFileSystem>(wrapped,
index_map, append_info);
}
}
+
+ // Then apply encryption on top
+ wrapped = io::make_file_system(wrapped, algorithm.value());
return wrapped;
#else
return fs;
diff --git a/be/src/olap/rowset/rowset_writer_context.h
b/be/src/olap/rowset/rowset_writer_context.h
index a2984933d98..3ce93a271f8 100644
--- a/be/src/olap/rowset/rowset_writer_context.h
+++ b/be/src/olap/rowset/rowset_writer_context.h
@@ -194,12 +194,7 @@ struct RowsetWriterContext {
#endif
}
- // Apply encryption if needed
- if (algorithm.has_value()) {
- fs = io::make_file_system(fs, algorithm.value());
- }
-
- // Apply packed file system for write path if enabled
+ // Apply packed file system first for write path if enabled
// Create empty index_map for write path
// Index information will be populated after write completes
bool has_v1_inverted_index = tablet_schema != nullptr &&
@@ -229,6 +224,11 @@ struct RowsetWriterContext {
fs = std::make_shared<io::PackedFileSystem>(fs, append_info);
}
+ // Then apply encryption on top
+ if (algorithm.has_value()) {
+ fs = io::make_file_system(fs, algorithm.value());
+ }
+
// Cache the result to ensure consistency across multiple calls
_cached_fs = fs;
return fs;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]