This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 119f8d4ace6 branch-3.0: [fix](tvf) support compressed json file for
tvf and refactor code (#51983) (#53165)
119f8d4ace6 is described below
commit 119f8d4ace6ffa066a68e4a15d504d6901fdbf6d
Author: Socrates <[email protected]>
AuthorDate: Mon Jul 14 14:22:29 2025 +0800
branch-3.0: [fix](tvf) support compressed json file for tvf and refactor
code (#51983) (#53165)
bp: #51983
---
be/src/vec/exec/format/json/new_json_reader.cpp | 4 +++-
.../json_format_test/simple_object_json.json.gz | Bin 0 -> 211 bytes
.../data/external_table_p0/tvf/test_hdfs_tvf.out | Bin 40945 -> 41176 bytes
.../external_table_p0/tvf/test_hdfs_tvf.groovy | 10 ++++++++++
4 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp
b/be/src/vec/exec/format/json/new_json_reader.cpp
index c8969c6d4c3..fb1b04e867c 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -258,7 +258,9 @@ Status
NewJsonReader::get_columns(std::unordered_map<std::string, TypeDescriptor
Status NewJsonReader::get_parsed_schema(std::vector<std::string>* col_names,
std::vector<TypeDescriptor>*
col_types) {
RETURN_IF_ERROR(_get_range_params());
-
+ // create decompressor.
+ // _decompressor may be nullptr if this is not a compressed file
+ RETURN_IF_ERROR(Decompressor::create_decompressor(_file_compress_type,
&_decompressor));
RETURN_IF_ERROR(_open_file_reader(true));
if (_read_json_by_line) {
RETURN_IF_ERROR(_open_line_reader());
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz
new file mode 100644
index 00000000000..8a6db90241f
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz
differ
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
index a8f5dcf5396..04ec58cdbae 100644
Binary files a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out and
b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out differ
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
index 74cb1e320aa..8bc8194843d 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
@@ -143,6 +143,16 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker")
{
"strip_outer_array" = "false",
"read_json_by_line" = "true") order by id; """
+ uri = "${defaultFS}" +
"/user/doris/preinstalled_data/json_format_test/simple_object_json.json.gz"
+ format = "json"
+ qt_json_compressed """ select * from HDFS(
+ "uri" = "${uri}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "compress_type" = "GZ",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true") order by id; """
+
uri = "${defaultFS}" +
"/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
format = "json"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]