This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 294acab3f91 [improvement](statistics)Improve statistics cache loading
logic. (#38829)
294acab3f91 is described below
commit 294acab3f9136002553e182f520a61dea772df7a
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Aug 6 10:22:32 2024 +0800
[improvement](statistics)Improve statistics cache loading logic. (#38829)
Improve statistics cache loading logic. When loading operation throw an
exception, do not cache UNKNOWN, so it can try to load again next time.
---
.../doris/datasource/hive/HMSExternalTable.java | 12 +-
.../java/org/apache/doris/qe/ShowExecutor.java | 9 +-
.../apache/doris/statistics/AnalysisManager.java | 8 +-
.../apache/doris/statistics/ColumnStatistic.java | 140 +++++++++------------
.../statistics/ColumnStatisticsCacheLoader.java | 34 ++---
.../doris/statistics/PartitionColumnStatistic.java | 121 ++++++++----------
.../PartitionColumnStatisticCacheLoader.java | 28 ++---
.../doris/statistics/StatisticsRepository.java | 13 +-
.../doris/statistics/util/StatisticsUtil.java | 6 +-
.../org/apache/doris/statistics/CacheTest.java | 44 +++++++
10 files changed, 205 insertions(+), 210 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 067f2231a4b..5692f61df0c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -610,21 +610,13 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
continue;
}
ColumnStatisticsData data = tableStat.getStatsData();
- try {
- setStatData(column, data, columnStatisticBuilder, count);
- } catch (AnalysisException e) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(e);
- }
- return Optional.empty();
- }
+ setStatData(column, data, columnStatisticBuilder, count);
}
return Optional.of(columnStatisticBuilder.build());
}
- private void setStatData(Column col, ColumnStatisticsData data,
ColumnStatisticBuilder builder, long count)
- throws AnalysisException {
+ private void setStatData(Column col, ColumnStatisticsData data,
ColumnStatisticBuilder builder, long count) {
long ndv = 0;
long nulls = 0;
double colSize = 0;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 7340f2affcc..a3d1ca313ae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -2695,7 +2695,14 @@ public class ShowExecutor {
if (indexName == null) {
continue;
}
- columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)),
ColumnStatistic.fromResultRow(row)));
+ try {
+ columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)),
ColumnStatistic.fromResultRow(row)));
+ } catch (Exception e) {
+ LOG.warn("Failed to deserialize column statistics. reason:
[{}]. Row [{}]", e.getMessage(), row);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(e);
+ }
+ }
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 0ebbfd79811..0f9e833d496 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -834,8 +834,12 @@ public class AnalysisManager implements Writable {
// count, ndv, null_count, min, max, data_size, update_time]
StatisticsCache cache = Env.getCurrentEnv().getStatisticsCache();
for (ResultRow row : resultRows) {
- cache.updatePartitionColStatsCache(catalogId, dbId, tableId,
indexId, row.get(4), colName,
- PartitionColumnStatistic.fromResultRow(row));
+ try {
+ cache.updatePartitionColStatsCache(catalogId, dbId, tableId,
indexId, row.get(4), colName,
+ PartitionColumnStatistic.fromResultRow(row));
+ } catch (Exception e) {
+ cache.invalidatePartitionColumnStatsCache(catalogId, dbId,
tableId, indexId, row.get(4), colName);
+ }
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 639d5ef8346..9713d2d30e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -108,100 +108,82 @@ public class ColumnStatistic {
}
public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
- ColumnStatistic columnStatistic = null;
- try {
- for (ResultRow resultRow : resultRows) {
- String partId = resultRow.get(6);
- if (partId == null) {
- columnStatistic = fromResultRow(resultRow);
- } else {
- LOG.warn("Column statistics table shouldn't contain
partition stats. [{}]", resultRow);
- }
- }
- } catch (Throwable t) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed to deserialize column stats", t);
+ ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
+ for (ResultRow resultRow : resultRows) {
+ String partId = resultRow.get(6);
+ if (partId == null) {
+ columnStatistic = fromResultRow(resultRow);
+ } else {
+ LOG.warn("Column statistics table shouldn't contain partition
stats. [{}]", resultRow);
}
- return ColumnStatistic.UNKNOWN;
- }
- if (columnStatistic == null) {
- return ColumnStatistic.UNKNOWN;
}
return columnStatistic;
}
// TODO: use thrift
public static ColumnStatistic fromResultRow(ResultRow row) {
- try {
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
- double count = Double.parseDouble(row.get(7));
- columnStatisticBuilder.setCount(count);
- double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
- columnStatisticBuilder.setNdv(ndv);
- String nullCount = row.getWithDefault(9, "0");
- columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
- columnStatisticBuilder.setDataSize(Double
- .parseDouble(row.getWithDefault(12, "0")));
-
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
- ? 0 : columnStatisticBuilder.getDataSize()
- / columnStatisticBuilder.getCount());
- long catalogId = Long.parseLong(row.get(1));
- long idxId = Long.parseLong(row.get(4));
- long dbID = Long.parseLong(row.get(2));
- long tblId = Long.parseLong(row.get(3));
- String colName = row.get(5);
- Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId,
idxId, colName);
- if (col == null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed to deserialize column statistics, ctlId:
{} dbId: {}"
- + "tblId: {} column: {} not exists",
- catalogId, dbID, tblId, colName);
- }
- return ColumnStatistic.UNKNOWN;
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
+ double count = Double.parseDouble(row.get(7));
+ columnStatisticBuilder.setCount(count);
+ double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
+ columnStatisticBuilder.setNdv(ndv);
+ String nullCount = row.getWithDefault(9, "0");
+ columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
+ columnStatisticBuilder.setDataSize(Double
+ .parseDouble(row.getWithDefault(12, "0")));
+
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
+ ? 0 : columnStatisticBuilder.getDataSize()
+ / columnStatisticBuilder.getCount());
+ long catalogId = Long.parseLong(row.get(1));
+ long idxId = Long.parseLong(row.get(4));
+ long dbID = Long.parseLong(row.get(2));
+ long tblId = Long.parseLong(row.get(3));
+ String colName = row.get(5);
+ Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId,
colName);
+ if (col == null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed to deserialize column statistics, ctlId: {}
dbId: {}"
+ + "tblId: {} column: {} not exists",
+ catalogId, dbID, tblId, colName);
}
- String min = row.get(10);
- String max = row.get(11);
- if (min != null && !min.equalsIgnoreCase("NULL")) {
- // Internal catalog get the min/max value using a separate SQL,
- // and the value is already encoded by base64. Need to handle
internal and external catalog separately.
- if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
min.equalsIgnoreCase("NULL")) {
+ return ColumnStatistic.UNKNOWN;
+ }
+ String min = row.get(10);
+ String max = row.get(11);
+ if (min != null && !min.equalsIgnoreCase("NULL")) {
+ // Internal catalog get the min/max value using a separate SQL,
+ // and the value is already encoded by base64. Need to handle
internal and external catalog separately.
+ if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
min.equalsIgnoreCase("NULL")) {
+ columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+ } else {
+ try {
+
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
min));
+
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
min));
+ } catch (AnalysisException e) {
+ LOG.warn("Failed to deserialize column {} min value {}.",
col, min, e);
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
- } else {
- try {
-
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
min));
-
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
min));
- } catch (AnalysisException e) {
- LOG.warn("Failed to deserialize column {} min value
{}.", col, min, e);
-
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
- }
}
- } else {
- columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
- if (max != null && !max.equalsIgnoreCase("NULL")) {
- if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
max.equalsIgnoreCase("NULL")) {
+ } else {
+ columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+ }
+ if (max != null && !max.equalsIgnoreCase("NULL")) {
+ if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
max.equalsIgnoreCase("NULL")) {
+ columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
+ } else {
+ try {
+
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
max));
+
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
max));
+ } catch (AnalysisException e) {
+ LOG.warn("Failed to deserialize column {} max value {}.",
col, max, e);
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- } else {
- try {
-
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
max));
-
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
max));
- } catch (AnalysisException e) {
- LOG.warn("Failed to deserialize column {} max value
{}.", col, max, e);
-
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- }
}
- } else {
- columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- }
- columnStatisticBuilder.setUpdatedTime(row.get(13));
- return columnStatisticBuilder.build();
- } catch (Exception e) {
- LOG.warn("Failed to deserialize column statistics. reason: [{}].
Row [{}]", e.getMessage(), row);
- if (LOG.isDebugEnabled()) {
- LOG.debug(e);
}
- return ColumnStatistic.UNKNOWN;
+ } else {
+ columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
+ columnStatisticBuilder.setUpdatedTime(row.get(13));
+ return columnStatisticBuilder.build();
}
public static boolean isAlmostUnique(double ndv, double rowCount) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
index fba1a4d7b72..ec98ee5af15 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
@@ -18,7 +18,6 @@
package org.apache.doris.statistics;
import org.apache.doris.catalog.TableIf;
-import org.apache.doris.qe.InternalQueryExecutionException;
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.logging.log4j.LogManager;
@@ -33,21 +32,14 @@ public class ColumnStatisticsCacheLoader extends
BasicAsyncCacheLoader<Statistic
@Override
protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
- Optional<ColumnStatistic> columnStatistic = Optional.empty();
+ Optional<ColumnStatistic> columnStatistic;
try {
// Load from statistics table.
columnStatistic = loadFromStatsTable(key);
if (!columnStatistic.isPresent()) {
// Load from data source metadata
- try {
- TableIf table = StatisticsUtil.findTable(key.catalogId,
key.dbId, key.tableId);
- columnStatistic = table.getColumnStatistic(key.colName);
- } catch (Exception e) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Exception to get column statistics by
metadata. [Catalog:{}, DB:{}, Table:{}]",
- key.catalogId, key.dbId, key.tableId, e);
- }
- }
+ TableIf table = StatisticsUtil.findTable(key.catalogId,
key.dbId, key.tableId);
+ columnStatistic = table.getColumnStatistic(key.colName);
}
} catch (Throwable t) {
LOG.warn("Failed to load stats for column [Catalog:{}, DB:{},
Table:{}, Column:{}], Reason: {}",
@@ -55,6 +47,7 @@ public class ColumnStatisticsCacheLoader extends
BasicAsyncCacheLoader<Statistic
if (LOG.isDebugEnabled()) {
LOG.debug(t);
}
+ return null;
}
if (columnStatistic.isPresent()) {
// For non-empty table, return UNKNOWN if we can't collect ndv
value.
@@ -68,22 +61,9 @@ public class ColumnStatisticsCacheLoader extends
BasicAsyncCacheLoader<Statistic
}
private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey
key) {
- List<ResultRow> columnResults;
- try {
- columnResults = StatisticsRepository.loadColStats(
- key.catalogId, key.dbId, key.tableId, key.idxId,
key.colName);
- } catch (InternalQueryExecutionException e) {
- LOG.info("Failed to load stats for table {} column {}. Reason:{}",
- key.tableId, key.colName, e.getMessage());
- return Optional.empty();
- }
- ColumnStatistic columnStatistics;
- try {
- columnStatistics =
StatisticsUtil.deserializeToColumnStatistics(columnResults);
- } catch (Exception e) {
- LOG.warn("Exception to deserialize column statistics", e);
- return Optional.empty();
- }
+ List<ResultRow> columnResults
+ = StatisticsRepository.loadColStats(key.catalogId, key.dbId,
key.tableId, key.idxId, key.colName);
+ ColumnStatistic columnStatistics =
StatisticsUtil.deserializeToColumnStatistics(columnResults);
if (columnStatistics == null) {
return Optional.empty();
} else {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
index d755392e79f..eebe910d8b0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
@@ -29,6 +29,7 @@ import org.apache.logging.log4j.Logger;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
+import java.io.IOException;
import java.util.Base64;
import java.util.List;
import java.util.StringJoiner;
@@ -76,7 +77,7 @@ public class PartitionColumnStatistic {
this.updatedTime = updatedTime;
}
- public static PartitionColumnStatistic fromResultRow(List<ResultRow>
resultRows) {
+ public static PartitionColumnStatistic fromResultRow(List<ResultRow>
resultRows) throws IOException {
if (resultRows == null || resultRows.isEmpty()) {
return PartitionColumnStatistic.UNKNOWN;
}
@@ -90,80 +91,68 @@ public class PartitionColumnStatistic {
stringJoiner.toString());
return PartitionColumnStatistic.UNKNOWN;
}
- try {
- return fromResultRow(resultRows.get(0));
- } catch (Throwable t) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed to deserialize column stats", t);
- }
- return PartitionColumnStatistic.UNKNOWN;
- }
+ return fromResultRow(resultRows.get(0));
}
- public static PartitionColumnStatistic fromResultRow(ResultRow row) {
+ public static PartitionColumnStatistic fromResultRow(ResultRow row) throws
IOException {
// row : [catalog_id, db_id, tbl_id, idx_id, part_name, col_id,
// count, ndv, null_count, min, max, data_size, update_time]
- try {
- long catalogId = Long.parseLong(row.get(0));
- long dbID = Long.parseLong(row.get(1));
- long tblId = Long.parseLong(row.get(2));
- long idxId = Long.parseLong(row.get(3));
- String colName = row.get(5);
- Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId,
idxId, colName);
- if (col == null) {
- LOG.info("Failed to deserialize column statistics, ctlId: {}
dbId: {}, "
- + "tblId: {} column: {} not exists", catalogId, dbID,
tblId, colName);
- return PartitionColumnStatistic.UNKNOWN;
- }
+ long catalogId = Long.parseLong(row.get(0));
+ long dbID = Long.parseLong(row.get(1));
+ long tblId = Long.parseLong(row.get(2));
+ long idxId = Long.parseLong(row.get(3));
+ String colName = row.get(5);
+ Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId,
colName);
+ if (col == null) {
+ LOG.info("Failed to deserialize column statistics, ctlId: {} dbId:
{}, "
+ + "tblId: {} column: {} not exists", catalogId, dbID,
tblId, colName);
+ return PartitionColumnStatistic.UNKNOWN;
+ }
- PartitionColumnStatisticBuilder partitionStatisticBuilder = new
PartitionColumnStatisticBuilder();
- double count = Double.parseDouble(row.get(6));
- partitionStatisticBuilder.setCount(count);
- String ndv = row.get(7);
- Base64.Decoder decoder = Base64.getDecoder();
- DataInputStream dis = new DataInputStream(new
ByteArrayInputStream(decoder.decode(ndv)));
- Hll hll = new Hll();
- if (!hll.deserialize(dis)) {
- LOG.warn("Failed to deserialize ndv. [{}]", row);
- return PartitionColumnStatistic.UNKNOWN;
- }
- partitionStatisticBuilder.setNdv(Hll128.fromHll(hll));
- String nullCount = row.getWithDefault(8, "0");
-
partitionStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
- partitionStatisticBuilder.setDataSize(Double
- .parseDouble(row.getWithDefault(11, "0")));
-
partitionStatisticBuilder.setAvgSizeByte(partitionStatisticBuilder.getCount()
== 0
- ? 0 : partitionStatisticBuilder.getDataSize()
- / partitionStatisticBuilder.getCount());
- String min = row.get(9);
- String max = row.get(10);
- if (min != null && !"NULL".equalsIgnoreCase(min)) {
- try {
-
partitionStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
min));
-
partitionStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
min));
- } catch (AnalysisException e) {
- LOG.warn("Failed to deserialize column {} min value {}.",
col, min, e);
-
partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
- }
- } else {
+ PartitionColumnStatisticBuilder partitionStatisticBuilder = new
PartitionColumnStatisticBuilder();
+ double count = Double.parseDouble(row.get(6));
+ partitionStatisticBuilder.setCount(count);
+ String ndv = row.get(7);
+ Base64.Decoder decoder = Base64.getDecoder();
+ DataInputStream dis = new DataInputStream(new
ByteArrayInputStream(decoder.decode(ndv)));
+ Hll hll = new Hll();
+ if (!hll.deserialize(dis)) {
+ LOG.warn("Failed to deserialize ndv. [{}]", row);
+ return PartitionColumnStatistic.UNKNOWN;
+ }
+ partitionStatisticBuilder.setNdv(Hll128.fromHll(hll));
+ String nullCount = row.getWithDefault(8, "0");
+ partitionStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
+ partitionStatisticBuilder.setDataSize(Double
+ .parseDouble(row.getWithDefault(11, "0")));
+
partitionStatisticBuilder.setAvgSizeByte(partitionStatisticBuilder.getCount()
== 0
+ ? 0 : partitionStatisticBuilder.getDataSize()
+ / partitionStatisticBuilder.getCount());
+ String min = row.get(9);
+ String max = row.get(10);
+ if (min != null && !"NULL".equalsIgnoreCase(min)) {
+ try {
+
partitionStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
min));
+
partitionStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
min));
+ } catch (AnalysisException e) {
+ LOG.warn("Failed to deserialize column {} min value {}.", col,
min, e);
partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
- if (max != null && !"NULL".equalsIgnoreCase(max)) {
- try {
-
partitionStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
max));
-
partitionStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
max));
- } catch (AnalysisException e) {
- LOG.warn("Failed to deserialize column {} max value {}.",
col, max, e);
-
partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- }
- } else {
+ } else {
+ partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+ }
+ if (max != null && !"NULL".equalsIgnoreCase(max)) {
+ try {
+
partitionStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
max));
+
partitionStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
max));
+ } catch (AnalysisException e) {
+ LOG.warn("Failed to deserialize column {} max value {}.", col,
max, e);
partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
- partitionStatisticBuilder.setUpdatedTime(row.get(12));
- return partitionStatisticBuilder.build();
- } catch (Exception e) {
- LOG.warn("Failed to deserialize column statistics. Row [{}]", row,
e);
- return PartitionColumnStatistic.UNKNOWN;
+ } else {
+ partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
+ partitionStatisticBuilder.setUpdatedTime(row.get(12));
+ return partitionStatisticBuilder.build();
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
index c365f6b1a74..7154ad2e819 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
@@ -17,12 +17,12 @@
package org.apache.doris.statistics;
-import org.apache.doris.qe.InternalQueryExecutionException;
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.io.IOException;
import java.util.List;
import java.util.Optional;
@@ -33,7 +33,7 @@ public class PartitionColumnStatisticCacheLoader extends
@Override
protected Optional<PartitionColumnStatistic>
doLoad(PartitionColumnStatisticCacheKey key) {
- Optional<PartitionColumnStatistic> partitionStatistic =
Optional.empty();
+ Optional<PartitionColumnStatistic> partitionStatistic;
try {
partitionStatistic = loadFromPartitionStatsTable(key);
} catch (Throwable t) {
@@ -42,6 +42,7 @@ public class PartitionColumnStatisticCacheLoader extends
if (LOG.isDebugEnabled()) {
LOG.debug(t);
}
+ return null;
}
if (partitionStatistic.isPresent()) {
// For non-empty table, return UNKNOWN if we can't collect ndv
value.
@@ -54,24 +55,11 @@ public class PartitionColumnStatisticCacheLoader extends
return partitionStatistic;
}
- private Optional<PartitionColumnStatistic>
loadFromPartitionStatsTable(PartitionColumnStatisticCacheKey key) {
- List<ResultRow> partitionResults;
- try {
- String partName = "'" + StatisticsUtil.escapeSQL(key.partId) + "'";
- partitionResults = StatisticsRepository.loadPartitionColumnStats(
+ private Optional<PartitionColumnStatistic>
loadFromPartitionStatsTable(PartitionColumnStatisticCacheKey key)
+ throws IOException {
+ String partName = "'" + StatisticsUtil.escapeSQL(key.partId) + "'";
+ List<ResultRow> partitionResults =
StatisticsRepository.loadPartitionColumnStats(
key.catalogId, key.dbId, key.tableId, key.idxId, partName,
key.colName);
- } catch (InternalQueryExecutionException e) {
- LOG.info("Failed to load stats for table {} column {}. Reason:{}",
- key.tableId, key.colName, e.getMessage());
- return Optional.empty();
- }
- PartitionColumnStatistic partitionStatistic;
- try {
- partitionStatistic =
StatisticsUtil.deserializeToPartitionStatistics(partitionResults);
- } catch (Exception e) {
- LOG.warn("Exception to deserialize partition statistics", e);
- return Optional.empty();
- }
- return Optional.ofNullable(partitionStatistic);
+ return
Optional.ofNullable(StatisticsUtil.deserializeToPartitionStatistics(partitionResults));
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 6202d40f1d9..76fb22a60e4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -126,11 +126,20 @@ public class StatisticsRepository {
public static ColumnStatistic queryColumnStatisticsByName(
long ctlId, long dbId, long tableId, long indexId, String colName)
{
+ ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
ResultRow resultRow = queryColumnStatisticById(ctlId, dbId, tableId,
indexId, colName);
if (resultRow == null) {
- return ColumnStatistic.UNKNOWN;
+ return columnStatistic;
}
- return ColumnStatistic.fromResultRow(resultRow);
+ try {
+ columnStatistic = ColumnStatistic.fromResultRow(resultRow);
+ } catch (Exception e) {
+ LOG.warn("Failed to deserialize column statistics. reason: [{}].
Row [{}]", e.getMessage(), resultRow);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(e);
+ }
+ }
+ return columnStatistic;
}
public static List<ResultRow> queryColumnStatisticsByPartitions(TableIf
table, Set<String> columnNames,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 7937040d323..0f5c81b1cf0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -180,8 +180,7 @@ public class StatisticsUtil {
}
}
- public static ColumnStatistic
deserializeToColumnStatistics(List<ResultRow> resultBatches)
- throws Exception {
+ public static ColumnStatistic
deserializeToColumnStatistics(List<ResultRow> resultBatches) {
if (CollectionUtils.isEmpty(resultBatches)) {
return null;
}
@@ -192,7 +191,8 @@ public class StatisticsUtil {
return
resultBatches.stream().map(Histogram::fromResultRow).collect(Collectors.toList());
}
- public static PartitionColumnStatistic
deserializeToPartitionStatistics(List<ResultRow> resultBatches) {
+ public static PartitionColumnStatistic
deserializeToPartitionStatistics(List<ResultRow> resultBatches)
+ throws IOException {
if (CollectionUtils.isEmpty(resultBatches)) {
return null;
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
index 729291d5323..9c0b7fff33c 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
@@ -388,4 +388,48 @@ public class CacheTest extends TestWithFeService {
Thread.sleep(100);
Assertions.assertEquals(1,
columnStatisticsCache.synchronous().asMap().size());
}
+
+ @Test
+ public void testLoadWithException() throws Exception {
+ new MockUp<ColumnStatisticsCacheLoader>() {
+ @Mock
+ protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key)
{
+ return null;
+ }
+ };
+ StatisticsCache statisticsCache = new StatisticsCache();
+ ColumnStatistic columnStatistic =
statisticsCache.getColumnStatistics(1, 1, 1, -1, "col");
+ Thread.sleep(3000);
+ Assertions.assertTrue(columnStatistic.isUnKnown);
+
+ new MockUp<ColumnStatisticsCacheLoader>() {
+ @Mock
+ protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key)
{
+ return Optional.of(new ColumnStatistic(1, 2,
+ null, 3, 4, 5, 6, 7,
+ null, null, false,
+ new Date().toString()));
+ }
+ };
+ columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1,
"col");
+ for (int i = 0; i < 60; i++) {
+ columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1,
"col");
+ if (columnStatistic != ColumnStatistic.UNKNOWN) {
+ break;
+ }
+ System.out.println("Not ready yet.");
+ Thread.sleep(1000);
+ }
+ if (columnStatistic != ColumnStatistic.UNKNOWN) {
+ Assertions.assertEquals(1, columnStatistic.count);
+ Assertions.assertEquals(2, columnStatistic.ndv);
+ Assertions.assertEquals(3, columnStatistic.avgSizeByte);
+ Assertions.assertEquals(4, columnStatistic.numNulls);
+ Assertions.assertEquals(5, columnStatistic.dataSize);
+ Assertions.assertEquals(6, columnStatistic.minValue);
+ Assertions.assertEquals(7, columnStatistic.maxValue);
+ } else {
+ Assertions.fail("Column stats is still unknown");
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]