This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new ffae9ffd86b [improvement](statistics)Improve statistics cache loading 
logic. (#38829) (#39409)
ffae9ffd86b is described below

commit ffae9ffd86bb6e834d4990f5f5a4e661d4ca9fd7
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Aug 15 16:26:00 2024 +0800

    [improvement](statistics)Improve statistics cache loading logic. (#38829) 
(#39409)
    
    backport: https://github.com/apache/doris/pull/38829
---
 .../doris/catalog/external/HMSExternalTable.java   |  11 +-
 .../java/org/apache/doris/qe/ShowExecutor.java     |   9 +-
 .../apache/doris/statistics/ColumnStatistic.java   | 134 +++++++++------------
 .../statistics/ColumnStatisticsCacheLoader.java    |  39 ++----
 .../doris/statistics/StatisticsRepository.java     |  13 +-
 .../doris/statistics/util/StatisticsUtil.java      |   3 +-
 .../org/apache/doris/statistics/CacheTest.java     |  45 +++++++
 7 files changed, 137 insertions(+), 117 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index e8a45c66cfb..4632433ee49 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -24,7 +24,6 @@ import org.apache.doris.catalog.HudiUtils;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.Type;
-import org.apache.doris.common.AnalysisException;
 import org.apache.doris.datasource.HMSExternalCatalog;
 import org.apache.doris.datasource.hive.HiveMetaStoreCache;
 import org.apache.doris.datasource.hive.PooledHiveMetaStoreClient;
@@ -563,19 +562,13 @@ public class HMSExternalTable extends ExternalTable {
                 continue;
             }
             ColumnStatisticsData data = tableStat.getStatsData();
-            try {
-                setStatData(column, data, columnStatisticBuilder, count);
-            } catch (AnalysisException e) {
-                LOG.debug(e);
-                return Optional.empty();
-            }
+            setStatData(column, data, columnStatisticBuilder, count);
         }
 
         return Optional.of(columnStatisticBuilder.build());
     }
 
-    private void setStatData(Column col, ColumnStatisticsData data, 
ColumnStatisticBuilder builder, long count)
-            throws AnalysisException {
+    private void setStatData(Column col, ColumnStatisticsData data, 
ColumnStatisticBuilder builder, long count) {
         long ndv = 0;
         long nulls = 0;
         double colSize = 0;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index e176d5c784e..8e1c8a84732 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -2523,7 +2523,14 @@ public class ShowExecutor {
                     continue;
                 }
             }
-            columnStatistics.add(Pair.of(Pair.of(row.get(5), indexName), 
ColumnStatistic.fromResultRow(row)));
+            try {
+                columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), 
ColumnStatistic.fromResultRow(row)));
+            } catch (Exception e) {
+                LOG.warn("Failed to deserialize column statistics. reason: 
[{}]. Row [{}]", e.getMessage(), row);
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug(e);
+                }
+            }
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 672cb91d2ca..3cb1750af47 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -112,95 +112,79 @@ public class ColumnStatistic {
     public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
         Map<String, ColumnStatistic> partitionIdToColStats = new HashMap<>();
         ColumnStatistic columnStatistic = null;
-        try {
-            for (ResultRow resultRow : resultRows) {
-                String partId = resultRow.get(6);
-                if (partId == null) {
-                    columnStatistic = fromResultRow(resultRow);
-                } else {
-                    partitionIdToColStats.put(partId, 
fromResultRow(resultRow));
-                }
+        for (ResultRow resultRow : resultRows) {
+            String partId = resultRow.get(6);
+            if (partId == null) {
+                columnStatistic = fromResultRow(resultRow);
+            } else {
+                partitionIdToColStats.put(partId, fromResultRow(resultRow));
             }
-        } catch (Throwable t) {
-            LOG.debug("Failed to deserialize column stats", t);
-            return ColumnStatistic.UNKNOWN;
-        }
-        if (columnStatistic == null) {
-            return ColumnStatistic.UNKNOWN;
         }
         return columnStatistic;
     }
 
     // TODO: use thrift
     public static ColumnStatistic fromResultRow(ResultRow row) {
-        try {
-            ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder();
-            double count = Double.parseDouble(row.get(7));
-            columnStatisticBuilder.setCount(count);
-            double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
-            columnStatisticBuilder.setNdv(ndv);
-            String nullCount = row.getWithDefault(9, "0");
-            columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
-            columnStatisticBuilder.setDataSize(Double
-                    .parseDouble(row.getWithDefault(12, "0")));
-            
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
-                    ? 0 : columnStatisticBuilder.getDataSize()
-                    / columnStatisticBuilder.getCount());
-            long catalogId = Long.parseLong(row.get(1));
-            long idxId = Long.parseLong(row.get(4));
-            long dbID = Long.parseLong(row.get(2));
-            long tblId = Long.parseLong(row.get(3));
-            String colName = row.get(5);
-            Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, 
idxId, colName);
-            if (col == null) {
-                LOG.debug("Failed to deserialize column statistics, ctlId: {} 
dbId: {}"
-                                + "tblId: {} column: {} not exists",
-                        catalogId, dbID, tblId, colName);
-                return ColumnStatistic.UNKNOWN;
-            }
-            String min = row.get(10);
-            String max = row.get(11);
-            if (min != null && !min.equalsIgnoreCase("NULL")) {
-                // Internal catalog get the min/max value using a separate SQL,
-                // and the value is already encoded by base64. Need to handle 
internal and external catalog separately.
-                if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
min.equalsIgnoreCase("NULL")) {
+        ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder();
+        double count = Double.parseDouble(row.get(7));
+        columnStatisticBuilder.setCount(count);
+        double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
+        columnStatisticBuilder.setNdv(ndv);
+        String nullCount = row.getWithDefault(9, "0");
+        columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
+        columnStatisticBuilder.setDataSize(Double
+                .parseDouble(row.getWithDefault(12, "0")));
+        
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
+                ? 0 : columnStatisticBuilder.getDataSize()
+                / columnStatisticBuilder.getCount());
+        long catalogId = Long.parseLong(row.get(1));
+        long idxId = Long.parseLong(row.get(4));
+        long dbID = Long.parseLong(row.get(2));
+        long tblId = Long.parseLong(row.get(3));
+        String colName = row.get(5);
+        Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, 
colName);
+        if (col == null) {
+            LOG.debug("Failed to deserialize column statistics, ctlId: {} 
dbId: {}"
+                            + "tblId: {} column: {} not exists",
+                    catalogId, dbID, tblId, colName);
+            return ColumnStatistic.UNKNOWN;
+        }
+        String min = row.get(10);
+        String max = row.get(11);
+        if (min != null && !min.equalsIgnoreCase("NULL")) {
+            // Internal catalog get the min/max value using a separate SQL,
+            // and the value is already encoded by base64. Need to handle 
internal and external catalog separately.
+            if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
min.equalsIgnoreCase("NULL")) {
+                columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+            } else {
+                try {
+                    
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
 min));
+                    
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), 
min));
+                } catch (AnalysisException e) {
+                    LOG.warn("Failed to deserialize column {} min value {}.", 
col, min, e);
                     
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
-                } else {
-                    try {
-                        
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
 min));
-                        
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), 
min));
-                    } catch (AnalysisException e) {
-                        LOG.warn("Failed to deserialize column {} min value 
{}.", col, min, e);
-                        
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
-                    }
                 }
-            } else {
-                columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
             }
-            if (max != null && !max.equalsIgnoreCase("NULL")) {
-                if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
max.equalsIgnoreCase("NULL")) {
+        } else {
+            columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+        }
+        if (max != null && !max.equalsIgnoreCase("NULL")) {
+            if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
max.equalsIgnoreCase("NULL")) {
+                columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
+            } else {
+                try {
+                    
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
 max));
+                    
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), 
max));
+                } catch (AnalysisException e) {
+                    LOG.warn("Failed to deserialize column {} max value {}.", 
col, max, e);
                     
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
-                } else {
-                    try {
-                        
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
 max));
-                        
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), 
max));
-                    } catch (AnalysisException e) {
-                        LOG.warn("Failed to deserialize column {} max value 
{}.", col, max, e);
-                        
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
-                    }
                 }
-            } else {
-                columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
-            }
-            columnStatisticBuilder.setUpdatedTime(row.get(13));
-            return columnStatisticBuilder.build();
-        } catch (Exception e) {
-            LOG.warn("Failed to deserialize column statistics. reason: [{}]. 
Row [{}]", e.getMessage(), row);
-            if (LOG.isDebugEnabled()) {
-                LOG.debug(e);
             }
-            return ColumnStatistic.UNKNOWN;
+        } else {
+            columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
         }
+        columnStatisticBuilder.setUpdatedTime(row.get(13));
+        return columnStatisticBuilder.build();
     }
 
     public static boolean isAlmostUnique(double ndv, double rowCount) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
index 91006cc4953..42d5d542359 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
@@ -18,7 +18,6 @@
 package org.apache.doris.statistics;
 
 import org.apache.doris.catalog.TableIf;
-import org.apache.doris.qe.InternalQueryExecutionException;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import org.apache.logging.log4j.LogManager;
@@ -33,27 +32,24 @@ public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<Statistic
 
     @Override
     protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
-        Optional<ColumnStatistic> columnStatistic = Optional.empty();
+        Optional<ColumnStatistic> columnStatistic;
         try {
             // Load from statistics table.
             columnStatistic = loadFromStatsTable(key);
             if (!columnStatistic.isPresent()) {
                 // Load from data source metadata
-                try {
-                    TableIf table = StatisticsUtil.findTable(key.catalogId, 
key.dbId, key.tableId);
-                    columnStatistic = table.getColumnStatistic(key.colName);
-                } catch (Exception e) {
-                    if (LOG.isDebugEnabled()) {
-                        LOG.debug(String.format("Exception to get column 
statistics by metadata."
-                                + "[Catalog:{}, DB:{}, Table:{}]",
-                                key.catalogId, key.dbId, key.tableId), e);
-                    }
-                }
+                TableIf table = StatisticsUtil.findTable(key.catalogId, 
key.dbId, key.tableId);
+                columnStatistic = table.getColumnStatistic(key.colName);
             }
         } catch (Throwable t) {
             LOG.warn("Failed to load stats for column [Catalog:{}, DB:{}, 
Table:{}, Column:{}], Reason: {}",
                     key.catalogId, key.dbId, key.tableId, key.colName, 
t.getMessage());
-            LOG.debug(t);
+            if (LOG.isDebugEnabled()) {
+                LOG.debug(t);
+            }
+            // Return null so next time try to get cache with the same key,
+            // it will trigger load function again without cache an empty 
value.
+            return null;
         }
         if (columnStatistic.isPresent()) {
             // For non-empty table, return UNKNOWN if we can't collect ndv 
value.
@@ -67,21 +63,8 @@ public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<Statistic
     }
 
     private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey 
key) {
-        List<ResultRow> columnResults = null;
-        try {
-            columnResults = StatisticsRepository.loadColStats(key.tableId, 
key.idxId, key.colName);
-        } catch (InternalQueryExecutionException e) {
-            LOG.info("Failed to load stats for table {} column {}. Reason:{}",
-                    key.tableId, key.colName, e.getMessage());
-            return Optional.empty();
-        }
-        ColumnStatistic columnStatistics;
-        try {
-            columnStatistics = 
StatisticsUtil.deserializeToColumnStatistics(columnResults);
-        } catch (Exception e) {
-            LOG.warn("Exception to deserialize column statistics", e);
-            return Optional.empty();
-        }
+        List<ResultRow> columnResults = 
StatisticsRepository.loadColStats(key.tableId, key.idxId, key.colName);
+        ColumnStatistic columnStatistics = 
StatisticsUtil.deserializeToColumnStatistics(columnResults);
         if (columnStatistics == null) {
             return Optional.empty();
         } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 0e0ab5e621b..87fdf1225bf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -111,11 +111,20 @@ public class StatisticsRepository {
             + " AND part_id IS NULL";
 
     public static ColumnStatistic queryColumnStatisticsByName(long tableId, 
long indexId, String colName) {
+        ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
         ResultRow resultRow = queryColumnStatisticById(tableId, indexId, 
colName);
         if (resultRow == null) {
-            return ColumnStatistic.UNKNOWN;
+            return columnStatistic;
         }
-        return ColumnStatistic.fromResultRow(resultRow);
+        try {
+            columnStatistic = ColumnStatistic.fromResultRow(resultRow);
+        } catch (Exception e) {
+            LOG.warn("Failed to deserialize column statistics. reason: [{}]. 
Row [{}]", e.getMessage(), resultRow);
+            if (LOG.isDebugEnabled()) {
+                LOG.debug(e);
+            }
+        }
+        return columnStatistic;
     }
 
     public static List<ColumnStatistic> 
queryColumnStatisticsByPartitions(TableName tableName, String colName,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 2164016a47e..e0eef39a217 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -167,8 +167,7 @@ public class StatisticsUtil {
         }
     }
 
-    public static ColumnStatistic 
deserializeToColumnStatistics(List<ResultRow> resultBatches)
-            throws Exception {
+    public static ColumnStatistic 
deserializeToColumnStatistics(List<ResultRow> resultBatches) {
         if (CollectionUtils.isEmpty(resultBatches)) {
             return null;
         }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
index bba6cd0c590..e49feb8a4f4 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
@@ -59,6 +59,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Date;
 import java.util.List;
+import java.util.Optional;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ThreadPoolExecutor;
@@ -411,4 +412,48 @@ public class CacheTest extends TestWithFeService {
         Thread.sleep(100);
         Assertions.assertEquals(1, 
columnStatisticsCache.synchronous().asMap().size());
     }
+
+    @Test
+    public void testLoadWithException() throws Exception {
+        new MockUp<ColumnStatisticsCacheLoader>() {
+            @Mock
+            protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) 
{
+                return null;
+            }
+        };
+        StatisticsCache statisticsCache = new StatisticsCache();
+        ColumnStatistic columnStatistic = 
statisticsCache.getColumnStatistics(1, 1, 1, -1, "col");
+        Thread.sleep(3000);
+        Assertions.assertTrue(columnStatistic.isUnKnown);
+
+        new MockUp<ColumnStatisticsCacheLoader>() {
+            @Mock
+            protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) 
{
+                return Optional.of(new ColumnStatistic(1, 2,
+                    null, 3, 4, 5, 6, 7,
+                    null, null, false,
+                        new Date().toString()));
+            }
+        };
+        columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, 
"col");
+        for (int i = 0; i < 60; i++) {
+            columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, 
"col");
+            if (columnStatistic != ColumnStatistic.UNKNOWN) {
+                break;
+            }
+            System.out.println("Not ready yet.");
+            Thread.sleep(1000);
+        }
+        if (columnStatistic != ColumnStatistic.UNKNOWN) {
+            Assertions.assertEquals(1, columnStatistic.count);
+            Assertions.assertEquals(2, columnStatistic.ndv);
+            Assertions.assertEquals(3, columnStatistic.avgSizeByte);
+            Assertions.assertEquals(4, columnStatistic.numNulls);
+            Assertions.assertEquals(5, columnStatistic.dataSize);
+            Assertions.assertEquals(6, columnStatistic.minValue);
+            Assertions.assertEquals(7, columnStatistic.maxValue);
+        } else {
+            Assertions.fail("Column stats is still unknown");
+        }
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to