This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 01090cf61f7 [improvement](statistics)Improve statistics cache loading
logic. (#38829) (#39410)
01090cf61f7 is described below
commit 01090cf61f728702436baeab1ee1158aa5ac3e9b
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Aug 15 17:01:24 2024 +0800
[improvement](statistics)Improve statistics cache loading logic. (#38829)
(#39410)
backport: https://github.com/apache/doris/pull/38829
---
.../doris/datasource/hive/HMSExternalTable.java | 12 +-
.../java/org/apache/doris/qe/ShowExecutor.java | 9 +-
.../apache/doris/statistics/ColumnStatistic.java | 138 +++++++++------------
.../statistics/ColumnStatisticsCacheLoader.java | 33 +----
.../doris/statistics/StatisticsRepository.java | 13 +-
.../doris/statistics/util/StatisticsUtil.java | 3 +-
.../org/apache/doris/statistics/CacheTest.java | 44 +++++++
7 files changed, 132 insertions(+), 120 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 8b0fee92ad8..401509049ea 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -610,21 +610,13 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
continue;
}
ColumnStatisticsData data = tableStat.getStatsData();
- try {
- setStatData(column, data, columnStatisticBuilder, count);
- } catch (AnalysisException e) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(e);
- }
- return Optional.empty();
- }
+ setStatData(column, data, columnStatisticBuilder, count);
}
return Optional.of(columnStatisticBuilder.build());
}
- private void setStatData(Column col, ColumnStatisticsData data,
ColumnStatisticBuilder builder, long count)
- throws AnalysisException {
+ private void setStatData(Column col, ColumnStatisticsData data,
ColumnStatisticBuilder builder, long count) {
long ndv = 0;
long nulls = 0;
double colSize = 0;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index e693f0aa352..5529f3a6a9f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -2547,7 +2547,14 @@ public class ShowExecutor {
if (indexName == null) {
continue;
}
- columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)),
ColumnStatistic.fromResultRow(row)));
+ try {
+ columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)),
ColumnStatistic.fromResultRow(row)));
+ } catch (Exception e) {
+ LOG.warn("Failed to deserialize column statistics. reason:
[{}]. Row [{}]", e.getMessage(), row);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(e);
+ }
+ }
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index c932bceac16..bae830e9ed8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -112,99 +112,81 @@ public class ColumnStatistic {
public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
Map<String, ColumnStatistic> partitionIdToColStats = new HashMap<>();
ColumnStatistic columnStatistic = null;
- try {
- for (ResultRow resultRow : resultRows) {
- String partId = resultRow.get(6);
- if (partId == null) {
- columnStatistic = fromResultRow(resultRow);
- } else {
- partitionIdToColStats.put(partId,
fromResultRow(resultRow));
- }
- }
- } catch (Throwable t) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed to deserialize column stats", t);
+ for (ResultRow resultRow : resultRows) {
+ String partId = resultRow.get(6);
+ if (partId == null) {
+ columnStatistic = fromResultRow(resultRow);
+ } else {
+ partitionIdToColStats.put(partId, fromResultRow(resultRow));
}
- return ColumnStatistic.UNKNOWN;
- }
- if (columnStatistic == null) {
- return ColumnStatistic.UNKNOWN;
}
return columnStatistic;
}
// TODO: use thrift
public static ColumnStatistic fromResultRow(ResultRow row) {
- try {
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
- double count = Double.parseDouble(row.get(7));
- columnStatisticBuilder.setCount(count);
- double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
- columnStatisticBuilder.setNdv(ndv);
- String nullCount = row.getWithDefault(9, "0");
- columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
- columnStatisticBuilder.setDataSize(Double
- .parseDouble(row.getWithDefault(12, "0")));
-
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
- ? 0 : columnStatisticBuilder.getDataSize()
- / columnStatisticBuilder.getCount());
- long catalogId = Long.parseLong(row.get(1));
- long idxId = Long.parseLong(row.get(4));
- long dbID = Long.parseLong(row.get(2));
- long tblId = Long.parseLong(row.get(3));
- String colName = row.get(5);
- Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId,
idxId, colName);
- if (col == null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed to deserialize column statistics, ctlId:
{} dbId: {}"
- + "tblId: {} column: {} not exists",
- catalogId, dbID, tblId, colName);
- }
- return ColumnStatistic.UNKNOWN;
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
+ double count = Double.parseDouble(row.get(7));
+ columnStatisticBuilder.setCount(count);
+ double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
+ columnStatisticBuilder.setNdv(ndv);
+ String nullCount = row.getWithDefault(9, "0");
+ columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
+ columnStatisticBuilder.setDataSize(Double
+ .parseDouble(row.getWithDefault(12, "0")));
+
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
+ ? 0 : columnStatisticBuilder.getDataSize()
+ / columnStatisticBuilder.getCount());
+ long catalogId = Long.parseLong(row.get(1));
+ long idxId = Long.parseLong(row.get(4));
+ long dbID = Long.parseLong(row.get(2));
+ long tblId = Long.parseLong(row.get(3));
+ String colName = row.get(5);
+ Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId,
colName);
+ if (col == null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed to deserialize column statistics, ctlId: {}
dbId: {}"
+ + "tblId: {} column: {} not exists",
+ catalogId, dbID, tblId, colName);
}
- String min = row.get(10);
- String max = row.get(11);
- if (min != null && !min.equalsIgnoreCase("NULL")) {
- // Internal catalog get the min/max value using a separate SQL,
- // and the value is already encoded by base64. Need to handle
internal and external catalog separately.
- if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
min.equalsIgnoreCase("NULL")) {
+ return ColumnStatistic.UNKNOWN;
+ }
+ String min = row.get(10);
+ String max = row.get(11);
+ if (min != null && !min.equalsIgnoreCase("NULL")) {
+ // Internal catalog get the min/max value using a separate SQL,
+ // and the value is already encoded by base64. Need to handle
internal and external catalog separately.
+ if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
min.equalsIgnoreCase("NULL")) {
+ columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+ } else {
+ try {
+
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
min));
+
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
min));
+ } catch (AnalysisException e) {
+ LOG.warn("Failed to deserialize column {} min value {}.",
col, min, e);
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
- } else {
- try {
-
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
min));
-
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
min));
- } catch (AnalysisException e) {
- LOG.warn("Failed to deserialize column {} min value
{}.", col, min, e);
-
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
- }
}
- } else {
- columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
- if (max != null && !max.equalsIgnoreCase("NULL")) {
- if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
max.equalsIgnoreCase("NULL")) {
+ } else {
+ columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+ }
+ if (max != null && !max.equalsIgnoreCase("NULL")) {
+ if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID &&
max.equalsIgnoreCase("NULL")) {
+ columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
+ } else {
+ try {
+
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
max));
+
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
max));
+ } catch (AnalysisException e) {
+ LOG.warn("Failed to deserialize column {} max value {}.",
col, max, e);
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- } else {
- try {
-
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
max));
-
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
max));
- } catch (AnalysisException e) {
- LOG.warn("Failed to deserialize column {} max value
{}.", col, max, e);
-
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- }
}
- } else {
- columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- }
- columnStatisticBuilder.setUpdatedTime(row.get(13));
- return columnStatisticBuilder.build();
- } catch (Exception e) {
- LOG.warn("Failed to deserialize column statistics. reason: [{}].
Row [{}]", e.getMessage(), row);
- if (LOG.isDebugEnabled()) {
- LOG.debug(e);
}
- return ColumnStatistic.UNKNOWN;
+ } else {
+ columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
+ columnStatisticBuilder.setUpdatedTime(row.get(13));
+ return columnStatisticBuilder.build();
}
public static boolean isAlmostUnique(double ndv, double rowCount) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
index 056ed7bcee5..cca5e347177 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
@@ -18,7 +18,6 @@
package org.apache.doris.statistics;
import org.apache.doris.catalog.TableIf;
-import org.apache.doris.qe.InternalQueryExecutionException;
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.logging.log4j.LogManager;
@@ -33,22 +32,14 @@ public class ColumnStatisticsCacheLoader extends
BasicAsyncCacheLoader<Statistic
@Override
protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
- Optional<ColumnStatistic> columnStatistic = Optional.empty();
+ Optional<ColumnStatistic> columnStatistic;
try {
// Load from statistics table.
columnStatistic = loadFromStatsTable(key);
if (!columnStatistic.isPresent()) {
// Load from data source metadata
- try {
- TableIf table = StatisticsUtil.findTable(key.catalogId,
key.dbId, key.tableId);
- columnStatistic = table.getColumnStatistic(key.colName);
- } catch (Exception e) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("Exception to get column
statistics by metadata."
- + "[Catalog:{}, DB:{}, Table:{}]",
- key.catalogId, key.dbId, key.tableId), e);
- }
- }
+ TableIf table = StatisticsUtil.findTable(key.catalogId,
key.dbId, key.tableId);
+ columnStatistic = table.getColumnStatistic(key.colName);
}
} catch (Throwable t) {
LOG.warn("Failed to load stats for column [Catalog:{}, DB:{},
Table:{}, Column:{}], Reason: {}",
@@ -56,6 +47,7 @@ public class ColumnStatisticsCacheLoader extends
BasicAsyncCacheLoader<Statistic
if (LOG.isDebugEnabled()) {
LOG.debug(t);
}
+ return null;
}
if (columnStatistic.isPresent()) {
// For non-empty table, return UNKNOWN if we can't collect ndv
value.
@@ -69,22 +61,9 @@ public class ColumnStatisticsCacheLoader extends
BasicAsyncCacheLoader<Statistic
}
private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey
key) {
- List<ResultRow> columnResults = null;
- try {
- columnResults = StatisticsRepository.loadColStats(
+ List<ResultRow> columnResults = StatisticsRepository.loadColStats(
key.catalogId, key.dbId, key.tableId, key.idxId,
key.colName);
- } catch (InternalQueryExecutionException e) {
- LOG.info("Failed to load stats for table {} column {}. Reason:{}",
- key.tableId, key.colName, e.getMessage());
- return Optional.empty();
- }
- ColumnStatistic columnStatistics;
- try {
- columnStatistics =
StatisticsUtil.deserializeToColumnStatistics(columnResults);
- } catch (Exception e) {
- LOG.warn("Exception to deserialize column statistics", e);
- return Optional.empty();
- }
+ ColumnStatistic columnStatistics =
StatisticsUtil.deserializeToColumnStatistics(columnResults);
if (columnStatistics == null) {
return Optional.empty();
} else {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 4d2aede413b..9eaf80ea893 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -111,11 +111,20 @@ public class StatisticsRepository {
public static ColumnStatistic queryColumnStatisticsByName(
long ctlId, long dbId, long tableId, long indexId, String colName)
{
+ ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
ResultRow resultRow = queryColumnStatisticById(ctlId, dbId, tableId,
indexId, colName);
if (resultRow == null) {
- return ColumnStatistic.UNKNOWN;
+ return columnStatistic;
}
- return ColumnStatistic.fromResultRow(resultRow);
+ try {
+ columnStatistic = ColumnStatistic.fromResultRow(resultRow);
+ } catch (Exception e) {
+ LOG.warn("Failed to deserialize column statistics. reason: [{}].
Row [{}]", e.getMessage(), resultRow);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(e);
+ }
+ }
+ return columnStatistic;
}
public static List<ColumnStatistic>
queryColumnStatisticsByPartitions(TableName tableName, String colName,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 2be007b1419..c2c7de48a9c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -163,8 +163,7 @@ public class StatisticsUtil {
}
}
- public static ColumnStatistic
deserializeToColumnStatistics(List<ResultRow> resultBatches)
- throws Exception {
+ public static ColumnStatistic
deserializeToColumnStatistics(List<ResultRow> resultBatches) {
if (CollectionUtils.isEmpty(resultBatches)) {
return null;
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
index 729291d5323..9c0b7fff33c 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
@@ -388,4 +388,48 @@ public class CacheTest extends TestWithFeService {
Thread.sleep(100);
Assertions.assertEquals(1,
columnStatisticsCache.synchronous().asMap().size());
}
+
+ @Test
+ public void testLoadWithException() throws Exception {
+ new MockUp<ColumnStatisticsCacheLoader>() {
+ @Mock
+ protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key)
{
+ return null;
+ }
+ };
+ StatisticsCache statisticsCache = new StatisticsCache();
+ ColumnStatistic columnStatistic =
statisticsCache.getColumnStatistics(1, 1, 1, -1, "col");
+ Thread.sleep(3000);
+ Assertions.assertTrue(columnStatistic.isUnKnown);
+
+ new MockUp<ColumnStatisticsCacheLoader>() {
+ @Mock
+ protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key)
{
+ return Optional.of(new ColumnStatistic(1, 2,
+ null, 3, 4, 5, 6, 7,
+ null, null, false,
+ new Date().toString()));
+ }
+ };
+ columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1,
"col");
+ for (int i = 0; i < 60; i++) {
+ columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1,
"col");
+ if (columnStatistic != ColumnStatistic.UNKNOWN) {
+ break;
+ }
+ System.out.println("Not ready yet.");
+ Thread.sleep(1000);
+ }
+ if (columnStatistic != ColumnStatistic.UNKNOWN) {
+ Assertions.assertEquals(1, columnStatistic.count);
+ Assertions.assertEquals(2, columnStatistic.ndv);
+ Assertions.assertEquals(3, columnStatistic.avgSizeByte);
+ Assertions.assertEquals(4, columnStatistic.numNulls);
+ Assertions.assertEquals(5, columnStatistic.dataSize);
+ Assertions.assertEquals(6, columnStatistic.minValue);
+ Assertions.assertEquals(7, columnStatistic.maxValue);
+ } else {
+ Assertions.fail("Column stats is still unknown");
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]