This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 6554a5e956c [fix](statistics)Fix column cached stats size bug.
(#37545) (#37666)
6554a5e956c is described below
commit 6554a5e956ce46c703ec0e78425c4e6ce86749c9
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Jul 11 18:52:51 2024 +0800
[fix](statistics)Fix column cached stats size bug. (#37545) (#37666)
backport: https://github.com/apache/doris/pull/37545
---
.../org/apache/doris/statistics/ColStatsData.java | 2 +-
.../suites/statistics/analyze_stats.groovy | 61 ++++++++++++++++++++--
2 files changed, 57 insertions(+), 6 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 6bbafdbe5b5..7cf75462fee 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -142,7 +142,7 @@ public class ColStatsData {
columnStatisticBuilder.setNdv(ndv);
columnStatisticBuilder.setNumNulls(nullCount);
columnStatisticBuilder.setDataSize(dataSizeInBytes);
- columnStatisticBuilder.setAvgSizeByte(count == 0 ? 0 :
dataSizeInBytes / count);
+ columnStatisticBuilder.setAvgSizeByte(count == 0 ? 0 : ((double)
dataSizeInBytes) / count);
if (statsId == null) {
return ColumnStatistic.UNKNOWN;
}
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index b6c416b0ffa..31f66f68112 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -180,7 +180,7 @@ suite("test_analyze") {
"""
def contains_expected_table = { r ->
- for (int i = 0; i < r.size; i++) {
+ for (int i = 0; i < r.size(); i++) {
if (r[i][3] == "${tbl}") {
return true
}
@@ -189,7 +189,7 @@ suite("test_analyze") {
}
def stats_job_removed = { r, id ->
- for (int i = 0; i < r.size; i++) {
+ for (int i = 0; i < r.size(); i++) {
if (r[i][0] == id) {
return false
}
@@ -249,7 +249,7 @@ suite("test_analyze") {
"""
def expected_result = { r->
- for(int i = 0; i < r.size; i++) {
+ for(int i = 0; i < r.size(); i++) {
if ((int) Double.parseDouble(r[i][2]) == 6) {
return true
} else {
@@ -1202,7 +1202,7 @@ PARTITION `p599` VALUES IN (599)
"""
def tbl_name_as_expetected = { r,name ->
- for (int i = 0; i < r.size; i++) {
+ for (int i = 0; i < r.size(); i++) {
if (r[i][3] != name) {
return false
}
@@ -1220,7 +1220,7 @@ PARTITION `p599` VALUES IN (599)
assert show_result.size() > 0
def all_finished = { r ->
- for (int i = 0; i < r.size; i++) {
+ for (int i = 0; i < r.size(); i++) {
if (r[i][9] != "FINISHED") {
return false
}
@@ -2775,6 +2775,57 @@ PARTITION `p599` VALUES IN (599)
result_sample = sql """show analyze task status ${jobId}"""
assertEquals(2, result_sample.size())
+ // Test inject stats avg_size.
+ sql """CREATE TABLE `date_dim` (
+ `d_date_sk` BIGINT NOT NULL,
+ `d_date_id` CHAR(16) NOT NULL,
+ `d_date` DATE NULL,
+ `d_month_seq` INT NULL,
+ `d_week_seq` INT NULL,
+ `d_quarter_seq` INT NULL,
+ `d_year` INT NULL,
+ `d_dow` INT NULL,
+ `d_moy` INT NULL,
+ `d_dom` INT NULL,
+ `d_qoy` INT NULL,
+ `d_fy_year` INT NULL,
+ `d_fy_quarter_seq` INT NULL,
+ `d_fy_week_seq` INT NULL,
+ `d_day_name` CHAR(9) NULL,
+ `d_quarter_name` CHAR(6) NULL,
+ `d_holiday` CHAR(1) NULL,
+ `d_weekend` CHAR(1) NULL,
+ `d_following_holiday` CHAR(1) NULL,
+ `d_first_dom` INT NULL,
+ `d_last_dom` INT NULL,
+ `d_same_day_ly` INT NULL,
+ `d_same_day_lq` INT NULL,
+ `d_current_day` CHAR(1) NULL,
+ `d_current_week` CHAR(1) NULL,
+ `d_current_month` CHAR(1) NULL,
+ `d_current_quarter` CHAR(1) NULL,
+ `d_current_year` CHAR(1) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`d_date_sk`)
+ DISTRIBUTED BY HASH(`d_date_sk`) BUCKETS 12
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1")
+ """
+
+ sql """
+ alter table date_dim modify column d_day_name set stats
('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='Friday',
'max_value'='Wednesday', 'data_size'='521779')
+ """
+
+ alter_result = sql """show column cached stats date_dim"""
+ assertEquals("d_day_name", alter_result[0][0])
+ assertEquals("date_dim", alter_result[0][1])
+ assertEquals("73049.0", alter_result[0][2])
+ assertEquals("7.0", alter_result[0][3])
+ assertEquals("0.0", alter_result[0][4])
+ assertEquals("521779.0", alter_result[0][5])
+ assertEquals("7.142863009760572", alter_result[0][6])
+
+
sql """DROP DATABASE IF EXISTS trigger"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]