This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ab3b9d3b90 [fix](parquet) check end of file when reading page (#41816)
3ab3b9d3b90 is described below

commit 3ab3b9d3b9073fa1872e28fb38f5dae3c816321e
Author: Socrates <[email protected]>
AuthorDate: Tue Oct 15 17:31:05 2024 +0800

    [fix](parquet) check end of file when reading page (#41816)
    
    ## Proposed changes
    fix parquet case: nation.dict-malformed.parquet
---
 be/src/io/fs/buffered_reader.cpp                           |  8 ++++++--
 .../external_table_p0/tvf/test_hdfs_parquet_group0.groovy  | 14 ++++++++------
 .../external_table_p0/tvf/test_hdfs_parquet_group5.groovy  |  8 --------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp
index 43445ed42ef..62c0a9c7a0c 100644
--- a/be/src/io/fs/buffered_reader.cpp
+++ b/be/src/io/fs/buffered_reader.cpp
@@ -778,8 +778,12 @@ 
BufferedFileStreamReader::BufferedFileStreamReader(io::FileReaderSPtr file, uint
 
 Status BufferedFileStreamReader::read_bytes(const uint8_t** buf, uint64_t 
offset,
                                             const size_t bytes_to_read, const 
IOContext* io_ctx) {
-    if (offset < _file_start_offset || offset >= _file_end_offset) {
-        return Status::IOError("Out-of-bounds Access");
+    if (offset < _file_start_offset || offset >= _file_end_offset ||
+        offset + bytes_to_read > _file_end_offset) {
+        return Status::IOError(
+                "Out-of-bounds Access: offset={}, bytes_to_read={}, 
file_start={}, "
+                "file_end={}",
+                offset, bytes_to_read, _file_start_offset, _file_end_offset);
     }
     int64_t end_offset = offset + bytes_to_read;
     if (_buf_start_offset <= offset && _buf_end_offset >= end_offset) {
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
index 2af8eef6cb5..65d6732e272 100644
--- 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
+++ 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
@@ -169,12 +169,14 @@ 
suite("test_hdfs_parquet_group0","external,hive,tvf,external_docker") {
                         "format" = "parquet") limit 10; """
 
 
-            // uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
-            // order_qt_test_20 """ select * from HDFS(
-            //             "uri" = "${uri}",
-            //             "hadoop.username" = "${hdfsUserName}",
-            //             "format" = "parquet") limit 10; """
-            // [E-3113]string column length is too large: 
total_length=3990808712454497748, element_number=25, you can set batch_size a 
number smaller than 25 to avoid this error
+            uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
+            test {
+                sql """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "parquet") limit 10; """
+                exception "[IO_ERROR]Out-of-bounds Access"
+            }
 
 
             uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed_larger.parquet"
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
index 324b6aaf209..a8723a433f0 100644
--- 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
+++ 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
@@ -183,14 +183,6 @@ 
suite("test_hdfs_parquet_group5","external,hive,tvf,external_docker") {
                         "format" = "parquet") limit 10; """
 
 
-            // uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group5/nation.dict-malformed.parquet"
-            // order_qt_test_22 """ select * from HDFS(
-            //             "uri" = "${uri}",
-            //             "hadoop.username" = "${hdfsUserName}",
-            //             "format" = "parquet") limit 10; """
-            // [E-3113]string column length is too large: 
total_length=7909446880690438330, element_number=25, you can set batch_size a 
number smaller than 25 to avoid this error
-
-
             uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group5/v0.7.1.column-metadata-handling.parquet"
             order_qt_test_23 """ select * from HDFS(
                         "uri" = "${uri}",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to