This is an automated email from the ASF dual-hosted git repository.

liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8eff3d1a908 [fix](cloud) Fix packed file write path bypassing 
encryption (#60629)
8eff3d1a908 is described below

commit 8eff3d1a9089164e130e1f846453940a6f92493e
Author: Xin Liao <[email protected]>
AuthorDate: Fri Feb 27 11:45:40 2026 +0800

    [fix](cloud) Fix packed file write path bypassing encryption (#60629)
    
    The PackedFileSystem was wrapping the filesystem after encryption on the
    write path, which meant data written through PackedFileSystem bypassed
    encryption. On the read path, the encrypted layer would then attempt to
    decrypt unencrypted data, leading to data corruption.
    
    Fix by swapping the order: apply PackedFileSystem first, then wrap with
    encryption on top, ensuring all data written through the packed path is
    properly encrypted.
---
 be/src/cloud/cloud_rowset_writer.cpp       | 24 ++++++++++++++++++------
 be/src/io/fs/file_writer.h                 |  4 ++++
 be/src/io/fs/packed_file_writer.h          |  3 +++
 be/src/olap/rowset/rowset_meta.cpp         |  8 +++++---
 be/src/olap/rowset/rowset_writer_context.h | 12 ++++++------
 5 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/be/src/cloud/cloud_rowset_writer.cpp 
b/be/src/cloud/cloud_rowset_writer.cpp
index 0c1b79392e9..c5b58049ae4 100644
--- a/be/src/cloud/cloud_rowset_writer.cpp
+++ b/be/src/cloud/cloud_rowset_writer.cpp
@@ -17,8 +17,10 @@
 
 #include "cloud/cloud_rowset_writer.h"
 
+#include "common/logging.h"
 #include "common/status.h"
 #include "io/cache/block_file_cache_factory.h"
+#include "io/fs/packed_file_manager.h"
 #include "io/fs/packed_file_writer.h"
 #include "olap/rowset/rowset_factory.h"
 
@@ -80,6 +82,9 @@ Status CloudRowsetWriter::init(const RowsetWriterContext& 
rowset_writer_context)
 }
 
 Status CloudRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool 
check_segment_num) {
+    VLOG_NOTICE << "start to build rowset meta. tablet_id=" << 
rowset_meta->tablet_id()
+                << ", rowset_id=" << rowset_meta->rowset_id()
+                << ", check_segment_num=" << check_segment_num;
     // Call base class implementation
     RETURN_IF_ERROR(BaseBetaRowsetWriter::_build_rowset_meta(rowset_meta, 
check_segment_num));
 
@@ -157,6 +162,8 @@ Status CloudRowsetWriter::build(RowsetSharedPtr& rowset) {
 }
 
 Status CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta* 
rowset_meta) {
+    VLOG_NOTICE << "start to collect packed slice locations for rowset meta. 
tablet_id="
+                << rowset_meta->tablet_id() << ", rowset_id=" << 
rowset_meta->rowset_id();
     if (!_context.packed_file_active) {
         return Status::OK();
     }
@@ -189,17 +196,22 @@ Status 
CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta* rowset
 Status CloudRowsetWriter::_collect_packed_slice_location(io::FileWriter* 
file_writer,
                                                          const std::string& 
file_path,
                                                          RowsetMeta* 
rowset_meta) {
-    // At this point, we only call this when 
RowsetWriterContext::merge_file_active is true,
-    // and all writers should be MergeFileWriter. So we can safely cast 
without extra checks.
-    auto* packed_writer = static_cast<io::PackedFileWriter*>(file_writer);
-
-    if (packed_writer->state() != io::FileWriter::State::CLOSED) {
+    VLOG_NOTICE << "collect packed slice location for file: " << file_path;
+    // Check if file writer is closed
+    if (file_writer->state() != io::FileWriter::State::CLOSED) {
         // Writer is still open; index will be collected after it is closed.
         return Status::OK();
     }
 
+    // Check if file is actually in packed file (not direct write for large 
files)
+    if (!file_writer->is_in_packed_file()) {
+        return Status::OK();
+    }
+
+    // Get packed slice location directly from PackedFileManager
     io::PackedSliceLocation index;
-    RETURN_IF_ERROR(packed_writer->get_packed_slice_location(&index));
+    RETURN_IF_ERROR(
+            
io::PackedFileManager::instance()->get_packed_slice_location(file_path, 
&index));
     if (index.packed_file_path.empty()) {
         return Status::OK(); // File not in packed file, skip
     }
diff --git a/be/src/io/fs/file_writer.h b/be/src/io/fs/file_writer.h
index a22fc28c26e..3712162576b 100644
--- a/be/src/io/fs/file_writer.h
+++ b/be/src/io/fs/file_writer.h
@@ -82,6 +82,10 @@ public:
 
     virtual State state() const = 0;
 
+    // Returns true if this file's data was written to a packed file.
+    // Used to determine whether to collect packed slice location from 
PackedFileManager.
+    virtual bool is_in_packed_file() const { return false; }
+
     FileCacheAllocatorBuilder* cache_builder() const {
         return _cache_builder == nullptr ? nullptr : _cache_builder.get();
     }
diff --git a/be/src/io/fs/packed_file_writer.h 
b/be/src/io/fs/packed_file_writer.h
index 9499b0912a4..eaae0a6ed74 100644
--- a/be/src/io/fs/packed_file_writer.h
+++ b/be/src/io/fs/packed_file_writer.h
@@ -57,6 +57,9 @@ public:
     // Returns empty index if file is not in merge file
     Status get_packed_slice_location(PackedSliceLocation* location) const;
 
+    // Returns true if this file's data was written to a packed file (not 
direct write)
+    bool is_in_packed_file() const override { return !_is_direct_write; }
+
 private:
     // Async close: submit data without waiting
     Status _close_async();
diff --git a/be/src/olap/rowset/rowset_meta.cpp 
b/be/src/olap/rowset/rowset_meta.cpp
index 6e33d237b93..f9c07077a12 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -134,9 +134,8 @@ io::FileSystemSPtr RowsetMeta::fs() {
         return nullptr;
     }
 
-    auto wrapped = io::make_file_system(fs, algorithm.value());
-
-    // Apply packed file system if enabled and index_map is not empty
+    // Apply packed file system first if enabled and index_map is not empty
+    io::FileSystemSPtr wrapped = fs;
     if (_rowset_meta_pb.packed_slice_locations_size() > 0) {
         std::unordered_map<std::string, io::PackedSliceLocation> index_map;
         for (const auto& [path, index_pb] : 
_rowset_meta_pb.packed_slice_locations()) {
@@ -159,6 +158,9 @@ io::FileSystemSPtr RowsetMeta::fs() {
             wrapped = std::make_shared<io::PackedFileSystem>(wrapped, 
index_map, append_info);
         }
     }
+
+    // Then apply encryption on top
+    wrapped = io::make_file_system(wrapped, algorithm.value());
     return wrapped;
 #else
     return fs;
diff --git a/be/src/olap/rowset/rowset_writer_context.h 
b/be/src/olap/rowset/rowset_writer_context.h
index a2984933d98..3ce93a271f8 100644
--- a/be/src/olap/rowset/rowset_writer_context.h
+++ b/be/src/olap/rowset/rowset_writer_context.h
@@ -194,12 +194,7 @@ struct RowsetWriterContext {
 #endif
         }
 
-        // Apply encryption if needed
-        if (algorithm.has_value()) {
-            fs = io::make_file_system(fs, algorithm.value());
-        }
-
-        // Apply packed file system for write path if enabled
+        // Apply packed file system first for write path if enabled
         // Create empty index_map for write path
         // Index information will be populated after write completes
         bool has_v1_inverted_index = tablet_schema != nullptr &&
@@ -229,6 +224,11 @@ struct RowsetWriterContext {
             fs = std::make_shared<io::PackedFileSystem>(fs, append_info);
         }
 
+        // Then apply encryption on top
+        if (algorithm.has_value()) {
+            fs = io::make_file_system(fs, algorithm.value());
+        }
+
         // Cache the result to ensure consistency across multiple calls
         _cached_fs = fs;
         return fs;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to