This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3ab3b9d3b90 [fix](parquet) check end of file when reading page (#41816)
3ab3b9d3b90 is described below
commit 3ab3b9d3b9073fa1872e28fb38f5dae3c816321e
Author: Socrates <[email protected]>
AuthorDate: Tue Oct 15 17:31:05 2024 +0800
[fix](parquet) check end of file when reading page (#41816)
## Proposed changes
fix parquet case: nation.dict-malformed.parquet
---
be/src/io/fs/buffered_reader.cpp | 8 ++++++--
.../external_table_p0/tvf/test_hdfs_parquet_group0.groovy | 14 ++++++++------
.../external_table_p0/tvf/test_hdfs_parquet_group5.groovy | 8 --------
3 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp
index 43445ed42ef..62c0a9c7a0c 100644
--- a/be/src/io/fs/buffered_reader.cpp
+++ b/be/src/io/fs/buffered_reader.cpp
@@ -778,8 +778,12 @@
BufferedFileStreamReader::BufferedFileStreamReader(io::FileReaderSPtr file, uint
Status BufferedFileStreamReader::read_bytes(const uint8_t** buf, uint64_t
offset,
const size_t bytes_to_read, const
IOContext* io_ctx) {
- if (offset < _file_start_offset || offset >= _file_end_offset) {
- return Status::IOError("Out-of-bounds Access");
+ if (offset < _file_start_offset || offset >= _file_end_offset ||
+ offset + bytes_to_read > _file_end_offset) {
+ return Status::IOError(
+ "Out-of-bounds Access: offset={}, bytes_to_read={},
file_start={}, "
+ "file_end={}",
+ offset, bytes_to_read, _file_start_offset, _file_end_offset);
}
int64_t end_offset = offset + bytes_to_read;
if (_buf_start_offset <= offset && _buf_end_offset >= end_offset) {
diff --git
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
index 2af8eef6cb5..65d6732e272 100644
---
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
+++
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
@@ -169,12 +169,14 @@
suite("test_hdfs_parquet_group0","external,hive,tvf,external_docker") {
"format" = "parquet") limit 10; """
- // uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
- // order_qt_test_20 """ select * from HDFS(
- // "uri" = "${uri}",
- // "hadoop.username" = "${hdfsUserName}",
- // "format" = "parquet") limit 10; """
- // [E-3113]string column length is too large:
total_length=3990808712454497748, element_number=25, you can set batch_size a
number smaller than 25 to avoid this error
+ uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
+ test {
+ sql """ select * from HDFS(
+ "uri" = "${uri}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "parquet") limit 10; """
+ exception "[IO_ERROR]Out-of-bounds Access"
+ }
uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed_larger.parquet"
diff --git
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
index 324b6aaf209..a8723a433f0 100644
---
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
+++
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
@@ -183,14 +183,6 @@
suite("test_hdfs_parquet_group5","external,hive,tvf,external_docker") {
"format" = "parquet") limit 10; """
- // uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_parquet/group5/nation.dict-malformed.parquet"
- // order_qt_test_22 """ select * from HDFS(
- // "uri" = "${uri}",
- // "hadoop.username" = "${hdfsUserName}",
- // "format" = "parquet") limit 10; """
- // [E-3113]string column length is too large:
total_length=7909446880690438330, element_number=25, you can set batch_size a
number smaller than 25 to avoid this error
-
-
uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_parquet/group5/v0.7.1.column-metadata-handling.parquet"
order_qt_test_23 """ select * from HDFS(
"uri" = "${uri}",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]