This is an automated email from the ASF dual-hosted git repository.
morningman pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
from d3165483522 [Enhancement] (nereids) implement
cancelDecommissionBackend in nereids (#50671)
add 3de6d4830f8 [improve](hive) Refactor csv reader (#50379)
No new revisions were added by this update.
Summary of changes:
be/src/http/action/stream_load.cpp | 4 -
be/src/service/internal_service.cpp | 17 +-
be/src/util/load_util.cpp | 64 +-
be/src/util/thrift_rpc_helper.cpp | 8 +
.../vec/data_types/serde/data_type_string_serde.h | 18 +-
be/src/vec/exec/format/csv/csv_reader.cpp | 503 ++---
be/src/vec/exec/format/csv/csv_reader.h | 102 +-
.../file_reader/new_plain_text_line_reader.cpp | 29 +-
.../file_reader/new_plain_text_line_reader.h | 15 +-
be/src/vec/exec/format/text/text_reader.cpp | 132 ++
be/src/vec/exec/format/text/text_reader.h | 69 +
be/src/vec/exec/scan/file_scanner.cpp | 14 +-
be/test/util/load_util_test.cpp | 2 +-
be/test/vec/data_types/data_type_string_test.cpp | 18 +-
.../new_plain_text_line_reader_test.cpp | 4 +-
.../scripts/create_preinstalled_scripts/run76.hql | 26 +-
.../open_csv_complex_type.csv | 5 +
.../open_csv_table_null_format.csv | 3 +
.../preinstalled_data/csv_tvf_data/14512.csv | 2 +
.../preinstalled_data/csv_tvf_data/14512_og.csv | 4 +
.../preinstalled_data/csv_tvf_data/16857.csv | 211 ++
.../preinstalled_data/csv_tvf_data/all_quotes.csv | 7 +
.../csv_tvf_data/bad_csv_file_2047.csv | 2054 ++++++++++++++++++++
.../preinstalled_data/csv_tvf_data/bad_escape.csv | 3 +
.../preinstalled_data/csv_tvf_data/big_escape.csv | 30 +
.../preinstalled_data/csv_tvf_data/blank_line.csv | 12 +
.../preinstalled_data/csv_tvf_data/bool.csv | 3 +
.../csv_tvf_data/csv_quoted_newline_odd.csv | 11 +
.../preinstalled_data/csv_tvf_data/decimal.csv | 2 +
.../decimal_separators/decimal_separators.csv | 6 +
.../decimal_separators/decimal_separators_csv.csv | 2 +
.../decimal_separators/invalid_char.csv | 2 +
.../decimal_separators/mixed_format_fail.csv | 4 +
.../preinstalled_data/csv_tvf_data/dirty_line.csv | 4 +
.../csv_tvf_data/double_quoted_header.csv | 3 +
.../csv_tvf_data/empty_first_line.csv | 4 +
.../csv_tvf_data/empty_space_start_value.csv | 5 +
.../preinstalled_data/csv_tvf_data/error.csv | 4 +
.../preinstalled_data/csv_tvf_data/escape.csv | 1 +
.../csv_tvf_data/extra_delimiters.csv | 5 +
.../csv_tvf_data/header_left_space.csv | 4 +
.../csv_tvf_data/header_normalize.csv | 4 +
.../preinstalled_data/csv_tvf_data/header_only.csv | 1 +
.../csv_tvf_data/headers/all_varchar.csv | 2 +
.../csv_tvf_data/headers/borked_type.csv | 3 +
.../csv_tvf_data/headers/empty_1.csv | 3 +
.../csv_tvf_data/headers/empty_2.csv | 3 +
.../csv_tvf_data/headers/empty_3.csv | 3 +
.../csv_tvf_data/headers/empty_4.csv | 3 +
.../csv_tvf_data/headers/escaped_quote.csv | 3 +
.../csv_tvf_data/headers/integer.csv | 1 +
.../csv_tvf_data/headers/single_line.csv | 1 +
.../csv_tvf_data/headers/undetected_type.csv | 2 +
.../csv_tvf_data/headers/unescaped_quote.csv | 3 +
.../csv_tvf_data/hits_problematic.csv | 1 +
.../csv_tvf_data/multidelimiter/aa_delim.csv | 19 +
.../multidelimiter/aa_delim_quoted.csv | 19 +
.../multidelimiter/aa_delim_quoted_2.csv | 19 +
.../csv_tvf_data/multidelimiter/aa_delim_small.csv | 3 +
.../csv_tvf_data/multidelimiter/aaa_delim.csv | 19 +
.../csv_tvf_data/multidelimiter/aaaa_delim.csv | 19 +
.../csv_tvf_data/multidelimiter/aaaa_delim_rn.csv | 19 +
.../csv_tvf_data/multidelimiter/aaab_delim.csv | 2 +
.../csv_tvf_data/multidelimiter/aab_delim.csv | 2 +
.../csv_tvf_data/multidelimiter/ab_delim.csv | 19 +
.../csv_tvf_data/multidelimiter/abac.csv | 1 +
.../multidelimiter/abac_incomplete_quote.csv | 1 +
.../csv_tvf_data/multidelimiter/abac_mix.csv | 1 +
.../multidelimiter/abac_newline_in_quote.csv | 2 +
.../multidelimiter/carriage_feed_newline.csv | 1 +
.../multidelimiter/complex_unterminated_quote.csv | 1 +
.../multidelimiter/escape_non_quote_escape.csv | 1 +
.../escape_non_quote_escape_complex.csv | 1 +
.../multidelimiter/file_ends_in_quoted_value.csv | 1 +
.../incomplete_multibyte_delimiter.csv | 1 +
.../csv_tvf_data/multidelimiter/many_bytes.csv | 8 +
.../multidelimiter/simple_unterminated_quote.csv | 1 +
.../multidelimiter/trailing_delimiter.csv | 1 +
.../multidelimiter/trailing_delimiter_complex.csv | 1 +
.../multidelimiter/unquote_without_delimiter.csv | 1 +
.../multidelimiter/unterminated_escape.csv | 1 +
.../multidelimiter/unterminated_escape_complex.csv | 1 +
.../multidelimiter/unterminated_quote_escape.csv | 1 +
.../unterminated_quote_escape_complex.csv | 1 +
.../unterminated_quote_multi_line.csv | 1 +
.../unterminated_quote_with_escape.csv | 1 +
.../unterminated_quote_with_escape_complex.csv | 1 +
.../multidelimiter/windows_newline.csv | 2 +
.../csv_tvf_data/single_quote.csv | 2 +
.../preinstalled_data/csv_tvf_data/struct.csv | 2 +
.../csv_tvf_data/unescaped_quotes/end_quote.csv | 2 +
.../csv_tvf_data/unescaped_quotes/end_quote_2.csv | 3 +
.../csv_tvf_data/unescaped_quotes/end_quote_3.csv | 3 +
.../unescaped_quotes/end_quote_mixed.csv | 3 +
.../unescaped_quotes/some_escaped_some_not.csv | 4 +
.../unescaped_quotes/unescaped_quote.csv | 6 +
.../unescaped_quotes/unescaped_quote_new_line.csv | 12 +
.../unescaped_quote_new_line_rn.csv | 12 +
.../org/apache/doris/analysis/DataDescription.java | 2 +
.../java/org/apache/doris/common/util/Util.java | 6 +-
.../apache/doris/datasource/FileQueryScanNode.java | 8 +-
.../apache/doris/datasource/LoadScanProvider.java | 4 -
.../datasource/hive/HiveMetaStoreClientHelper.java | 2 +
.../doris/datasource/hive/source/HiveScanNode.java | 9 +-
.../fileformat/CsvFileFormatProperties.java | 18 +-
.../property/fileformat/FileFormatProperties.java | 4 +-
...operties.java => TextFileFormatProperties.java} | 88 +-
.../doris/nereids/load/NereidsDataDescription.java | 2 +
.../nereids/load/NereidsLoadPlanInfoCollector.java | 5 -
.../ExternalFileTableValuedFunction.java | 7 +-
.../fileformat/TextFileFormatPropertiesTest.java | 143 ++
gensrc/thrift/PlanNodes.thrift | 5 +-
.../external_table_p0/hive/test_open_csv_serde.out | Bin 1607 -> 2363 bytes
.../external_table_p0/tvf/test_hdfs_tvf_csv.out | Bin 0 -> 95776 bytes
.../data/load_p0/stream_load/test_hive_text.out | Bin 0 -> 1062 bytes
.../data/load_p0/stream_load/test_hive_text.text | 3 +
.../data/load_p0/stream_load/test_hive_text2.text | 3 +
.../test_hive_text_with_custom_delimiter.text | 3 +
.../stream_load/test_hive_text_with_header.text | 5 +
.../hive/test_open_csv_serde.groovy | 3 +-
.../external_table_p0/hive/test_utf8_check.groovy | 6 -
.../external_table_p0/tvf/test_hdfs_tvf_csv.groovy | 611 ++++++
.../load_p0/stream_load/test_hive_text.groovy | 152 ++
123 files changed, 4129 insertions(+), 630 deletions(-)
create mode 100644 be/src/vec/exec/format/text/text_reader.cpp
create mode 100644 be/src/vec/exec/format/text/text_reader.h
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv/open_csv_complex_type/open_csv_complex_type.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv/open_csv_table_null_format/open_csv_table_null_format.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/14512.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/14512_og.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/16857.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/all_quotes.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/bad_csv_file_2047.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/bad_escape.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/big_escape.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/blank_line.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/bool.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/csv_quoted_newline_odd.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/decimal.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/decimal_separators/decimal_separators.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/decimal_separators/decimal_separators_csv.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/decimal_separators/invalid_char.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/decimal_separators/mixed_format_fail.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/dirty_line.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/double_quoted_header.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/empty_first_line.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/empty_space_start_value.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/error.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/escape.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/extra_delimiters.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/header_left_space.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/header_normalize.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/header_only.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/all_varchar.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/borked_type.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/empty_1.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/empty_2.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/empty_3.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/empty_4.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/escaped_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/integer.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/single_line.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/undetected_type.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/headers/unescaped_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/hits_problematic.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aa_delim.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aa_delim_quoted.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aa_delim_quoted_2.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aa_delim_small.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aaa_delim.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aaaa_delim.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aaaa_delim_rn.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aaab_delim.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/aab_delim.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/ab_delim.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/abac.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/abac_incomplete_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/abac_mix.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/abac_newline_in_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/carriage_feed_newline.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/complex_unterminated_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/escape_non_quote_escape.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/escape_non_quote_escape_complex.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/file_ends_in_quoted_value.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/incomplete_multibyte_delimiter.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/many_bytes.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/simple_unterminated_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/trailing_delimiter.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/trailing_delimiter_complex.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unquote_without_delimiter.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unterminated_escape.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unterminated_escape_complex.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unterminated_quote_escape.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unterminated_quote_escape_complex.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unterminated_quote_multi_line.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unterminated_quote_with_escape.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/unterminated_quote_with_escape_complex.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/multidelimiter/windows_newline.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/single_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/struct.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/end_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/end_quote_2.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/end_quote_3.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/end_quote_mixed.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/some_escaped_some_not.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/unescaped_quote.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/unescaped_quote_new_line.csv
create mode 100644
docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/csv_tvf_data/unescaped_quotes/unescaped_quote_new_line_rn.csv
copy
fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/{CsvFileFormatProperties.java
=> TextFileFormatProperties.java} (59%)
create mode 100644
fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/TextFileFormatPropertiesTest.java
create mode 100644
regression-test/data/external_table_p0/tvf/test_hdfs_tvf_csv.out
create mode 100644 regression-test/data/load_p0/stream_load/test_hive_text.out
create mode 100644 regression-test/data/load_p0/stream_load/test_hive_text.text
create mode 100644
regression-test/data/load_p0/stream_load/test_hive_text2.text
create mode 100644
regression-test/data/load_p0/stream_load/test_hive_text_with_custom_delimiter.text
create mode 100644
regression-test/data/load_p0/stream_load/test_hive_text_with_header.text
create mode 100644
regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_csv.groovy
create mode 100644
regression-test/suites/load_p0/stream_load/test_hive_text.groovy
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]