This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 601d8016822 [improvement](statistics)External table getRowCount return 
-1 when row count is not available or row count is 0. (#43009)
601d8016822 is described below

commit 601d8016822602f5c39d75e8e2fec4e7b41b609e
Author: Jibing-Li <[email protected]>
AuthorDate: Fri Nov 1 14:26:42 2024 +0800

    [improvement](statistics)External table getRowCount return -1 when row 
count is not available or row count is 0. (#43009)
    
    External table getRowCount return -1 when row count is not available or
    row count is 0. So the behavior of external table could match with
    internal olap table.
---
 .../java/org/apache/doris/catalog/OlapTable.java   |  8 ++----
 .../main/java/org/apache/doris/catalog/Table.java  |  2 +-
 .../java/org/apache/doris/catalog/TableIf.java     |  2 ++
 .../doris/datasource/ExternalRowCountCache.java    |  6 ++--
 .../org/apache/doris/datasource/ExternalTable.java |  4 +--
 .../doris/datasource/hive/HMSExternalTable.java    | 18 ++++++------
 .../datasource/iceberg/IcebergExternalTable.java   |  3 +-
 .../doris/datasource/iceberg/IcebergUtils.java     |  5 ++--
 .../datasource/paimon/PaimonExternalTable.java     |  8 ++++--
 .../doris/statistics/StatisticsAutoCollector.java  |  2 +-
 .../doris/statistics/util/StatisticsUtil.java      |  8 +++---
 .../datasource/ExternalRowCountCacheTest.java      | 32 ++++++++++++++++++++--
 12 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 999e0c43995..b0d27ac7b5c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -139,8 +139,6 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf, GsonPostProc
         WAITING_STABLE
     }
 
-    public static long ROW_COUNT_BEFORE_REPORT = -1;
-
     @SerializedName(value = "tst", alternate = {"state"})
     private volatile OlapTableState state;
 
@@ -1618,12 +1616,12 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf, GsonPostProc
             if (index == null) {
                 LOG.warn("Index {} not exist in partition {}, table {}, {}",
                         indexId, entry.getValue().getName(), id, name);
-                return ROW_COUNT_BEFORE_REPORT;
+                return UNKNOWN_ROW_COUNT;
             }
             if (strict && !index.getRowCountReported()) {
-                return ROW_COUNT_BEFORE_REPORT;
+                return UNKNOWN_ROW_COUNT;
             }
-            rowCount += index.getRowCount() == -1 ? 0 : index.getRowCount();
+            rowCount += index.getRowCount() == UNKNOWN_ROW_COUNT ? 0 : 
index.getRowCount();
         }
         return rowCount;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
index d98bba5edac..d85d98a8ea5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
@@ -623,7 +623,7 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf, Gso
 
     @Override
     public long fetchRowCount() {
-        return 0;
+        return UNKNOWN_ROW_COUNT;
     }
 
     @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
index 3a688a7b59d..8f6e924f44a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
@@ -55,6 +55,8 @@ import java.util.stream.Collectors;
 public interface TableIf {
     Logger LOG = LogManager.getLogger(TableIf.class);
 
+    long UNKNOWN_ROW_COUNT = -1;
+
     default void readLock() {
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
index 075091e682d..fc955c4964a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
@@ -102,7 +102,7 @@ public class ExternalRowCountCache {
     }
 
     /**
-     * Get cached row count for the given table. Return 0 if cached not loaded 
or table not exists.
+     * Get cached row count for the given table. Return -1 if cached not 
loaded or table not exists.
      * Cached will be loaded async.
      * @return Cached row count or -1 if not exist
      */
@@ -111,13 +111,13 @@ public class ExternalRowCountCache {
         try {
             CompletableFuture<Optional<Long>> f = rowCountCache.get(key);
             if (f.isDone()) {
-                return f.get().orElse(-1L);
+                return f.get().orElse(TableIf.UNKNOWN_ROW_COUNT);
             }
             LOG.info("Row count for table {}.{}.{} is still processing.", 
catalogId, dbId, tableId);
         } catch (Exception e) {
             LOG.warn("Unexpected exception while returning row count", e);
         }
-        return -1;
+        return TableIf.UNKNOWN_ROW_COUNT;
     }
 
     /**
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index 5c57c13b4b8..1eadb46fe82 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -195,7 +195,7 @@ public class ExternalTable implements TableIf, Writable, 
GsonPostProcessable {
             makeSureInitialized();
         } catch (Exception e) {
             LOG.warn("Failed to initialize table {}.{}.{}", catalog.getName(), 
dbName, name, e);
-            return -1;
+            return TableIf.UNKNOWN_ROW_COUNT;
         }
         // All external table should get external row count from cache.
         return 
Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(),
 dbId, id);
@@ -221,7 +221,7 @@ public class ExternalTable implements TableIf, Writable, 
GsonPostProcessable {
      * This is called by ExternalRowCountCache to load row count cache.
      */
     public long fetchRowCount() {
-        return -1;
+        return UNKNOWN_ROW_COUNT;
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index f72421da8a1..b48b47acf13 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -337,7 +337,7 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
     }
 
     private long getRowCountFromExternalSource() {
-        long rowCount = -1;
+        long rowCount = UNKNOWN_ROW_COUNT;
         switch (dlaType) {
             case HIVE:
                 rowCount = StatisticsUtil.getHiveRowCount(this);
@@ -350,7 +350,7 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
                     LOG.debug("getRowCount for dlaType {} is not supported.", 
dlaType);
                 }
         }
-        return rowCount;
+        return rowCount > 0 ? rowCount : UNKNOWN_ROW_COUNT;
     }
 
     @Override
@@ -524,7 +524,7 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         // Get row count from hive metastore property.
         long rowCount = getRowCountFromExternalSource();
         // Only hive table supports estimate row count by listing file.
-        if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) {
+        if (rowCount == UNKNOWN_ROW_COUNT && dlaType.equals(DLAType.HIVE)) {
             LOG.info("Will estimate row count for table {} from file list.", 
name);
             rowCount = getRowCountFromFileList();
         }
@@ -834,11 +834,11 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
      */
     private long getRowCountFromFileList() {
         if (!GlobalVariable.enable_get_row_count_from_file_list) {
-            return -1;
+            return UNKNOWN_ROW_COUNT;
         }
         if (isView()) {
-            LOG.info("Table {} is view, return 0.", name);
-            return 0;
+            LOG.info("Table {} is view, return -1.", name);
+            return UNKNOWN_ROW_COUNT;
         }
         HiveMetaStoreCache.HivePartitionValues partitionValues = 
getAllPartitionValues();
 
@@ -865,8 +865,8 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
             estimatedRowSize += column.getDataType().getSlotSize();
         }
         if (estimatedRowSize == 0) {
-            LOG.warn("Table {} estimated size is 0, return 0.", name);
-            return 0;
+            LOG.warn("Table {} estimated size is 0, return -1.", name);
+            return UNKNOWN_ROW_COUNT;
         }
 
         int totalPartitionSize = partitionValues == null ? 1 : 
partitionValues.getIdToPartitionItem().size();
@@ -878,7 +878,7 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         long rows = totalSize / estimatedRowSize;
         LOG.info("Table {} rows {}, total size is {}, estimatedRowSize is {}",
                 name, rows, totalSize, estimatedRowSize);
-        return rows;
+        return rows > 0 ? rows : UNKNOWN_ROW_COUNT;
     }
 
     // Get all partition values from cache.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
index d4361a47797..feded88ea32 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
@@ -83,7 +83,8 @@ public class IcebergExternalTable extends ExternalTable {
     @Override
     public long fetchRowCount() {
         makeSureInitialized();
-        return IcebergUtils.getIcebergRowCount(getCatalog(), getDbName(), 
getName());
+        long rowCount = IcebergUtils.getIcebergRowCount(getCatalog(), 
getDbName(), getName());
+        return rowCount > 0 ? rowCount : UNKNOWN_ROW_COUNT;
     }
 
     public Table getIcebergTable() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 7ae600756f1..ba6d628e492 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -41,6 +41,7 @@ import org.apache.doris.catalog.MapType;
 import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.StructField;
 import org.apache.doris.catalog.StructType;
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.info.SimpleTableInfo;
@@ -604,9 +605,9 @@ public class IcebergUtils {
                 .getIcebergTable(catalog, dbName, tbName);
         Snapshot snapshot = icebergTable.currentSnapshot();
         if (snapshot == null) {
-            LOG.info("Iceberg table {}.{}.{} is empty, return row count 0.", 
catalog.getName(), dbName, tbName);
+            LOG.info("Iceberg table {}.{}.{} is empty, return -1.", 
catalog.getName(), dbName, tbName);
             // empty table
-            return 0;
+            return TableIf.UNKNOWN_ROW_COUNT;
         }
         Map<String, String> summary = snapshot.summary();
         long rows = Long.parseLong(summary.get(TOTAL_RECORDS)) - 
Long.parseLong(summary.get(TOTAL_POSITION_DELETES));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
index 4b364ef45ca..c9eaf1b7df3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
@@ -193,12 +193,16 @@ public class PaimonExternalTable extends ExternalTable {
         Table paimonTable = schemaCacheValue.map(value -> 
((PaimonSchemaCacheValue) value).getPaimonTable())
                 .orElse(null);
         if (paimonTable == null) {
-            return -1;
+            LOG.info("Paimon table {} is null.", name);
+            return UNKNOWN_ROW_COUNT;
         }
         List<Split> splits = 
paimonTable.newReadBuilder().newScan().plan().splits();
         for (Split split : splits) {
             rowCount += split.rowCount();
         }
-        return rowCount;
+        if (rowCount == 0) {
+            LOG.info("Paimon table {} row count is 0, return -1", name);
+        }
+        return rowCount > 0 ? rowCount : UNKNOWN_ROW_COUNT;
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 9ba52169605..28c2bd95c96 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -208,7 +208,7 @@ public class StatisticsAutoCollector extends MasterDaemon {
         }
         if (table instanceof OlapTable && 
analysisMethod.equals(AnalysisMethod.SAMPLE)) {
             OlapTable ot = (OlapTable) table;
-            if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) == 
OlapTable.ROW_COUNT_BEFORE_REPORT) {
+            if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) == 
TableIf.UNKNOWN_ROW_COUNT) {
                 LOG.info("Table {} row count is not fully reported, skip auto 
analyzing this time.", ot.getName());
                 return null;
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index b0fc3b9c1cf..a9c1612eb48 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -619,19 +619,19 @@ public class StatisticsUtil {
     public static long getHiveRowCount(HMSExternalTable table) {
         Map<String, String> parameters = 
table.getRemoteTable().getParameters();
         if (parameters == null) {
-            return -1;
+            return TableIf.UNKNOWN_ROW_COUNT;
         }
         // Table parameters contains row count, simply get and return it.
         if (parameters.containsKey(NUM_ROWS)) {
             long rows = Long.parseLong(parameters.get(NUM_ROWS));
             // Sometimes, the NUM_ROWS in hms is 0 but actually is not. Need 
to check TOTAL_SIZE if NUM_ROWS is 0.
-            if (rows != 0) {
+            if (rows > 0) {
                 LOG.info("Get row count {} for hive table {} in table 
parameters.", rows, table.getName());
                 return rows;
             }
         }
         if (!parameters.containsKey(TOTAL_SIZE)) {
-            return -1;
+            return TableIf.UNKNOWN_ROW_COUNT;
         }
         // Table parameters doesn't contain row count but contain total size. 
Estimate row count : totalSize/rowSize
         long totalSize = Long.parseLong(parameters.get(TOTAL_SIZE));
@@ -641,7 +641,7 @@ public class StatisticsUtil {
         }
         if (estimatedRowSize == 0) {
             LOG.warn("Hive table {} estimated row size is invalid {}", 
table.getName(), estimatedRowSize);
-            return -1;
+            return TableIf.UNKNOWN_ROW_COUNT;
         }
         long rows = totalSize / estimatedRowSize;
         LOG.info("Get row count {} for hive table {} by total size {} and row 
size {}",
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalRowCountCacheTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalRowCountCacheTest.java
index 10b6b01527c..81605f93dcd 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalRowCountCacheTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalRowCountCacheTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.datasource;
 
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.ThreadPoolManager;
 
 import mockit.Mock;
@@ -44,7 +45,7 @@ public class ExternalRowCountCacheTest {
         };
         ExternalRowCountCache cache = new ExternalRowCountCache(executor);
         long cachedRowCount = cache.getCachedRowCount(1, 1, 1);
-        Assertions.assertEquals(-1, cachedRowCount);
+        Assertions.assertEquals(TableIf.UNKNOWN_ROW_COUNT, cachedRowCount);
         for (int i = 0; i < 60; i++) {
             if (counter.get() == 1) {
                 break;
@@ -63,12 +64,39 @@ public class ExternalRowCountCacheTest {
         cache.getCachedRowCount(1, 1, 1);
         for (int i = 0; i < 60; i++) {
             cachedRowCount = cache.getCachedRowCount(1, 1, 1);
-            if (cachedRowCount != -1) {
+            if (cachedRowCount != TableIf.UNKNOWN_ROW_COUNT) {
                 Assertions.assertEquals(100, cachedRowCount);
                 break;
             }
             Thread.sleep(1000);
         }
+        cachedRowCount = cache.getCachedRowCount(1, 1, 1);
+        Assertions.assertEquals(100, cachedRowCount);
         Assertions.assertEquals(2, counter.get());
+
+        new MockUp<ExternalRowCountCache.RowCountCacheLoader>() {
+            @Mock
+            protected Optional<Long> doLoad(ExternalRowCountCache.RowCountKey 
rowCountKey) {
+                counter.incrementAndGet();
+                try {
+                    Thread.sleep(1000000);
+                } catch (InterruptedException e) {
+                    e.printStackTrace();
+                }
+                return Optional.of(100L);
+            }
+        };
+        cachedRowCount = cache.getCachedRowCount(2, 2, 2);
+        Assertions.assertEquals(TableIf.UNKNOWN_ROW_COUNT, cachedRowCount);
+        Thread.sleep(1000);
+        cachedRowCount = cache.getCachedRowCount(2, 2, 2);
+        Assertions.assertEquals(TableIf.UNKNOWN_ROW_COUNT, cachedRowCount);
+        for (int i = 0; i < 60; i++) {
+            if (counter.get() == 3) {
+                break;
+            }
+            Thread.sleep(1000);
+        }
+        Assertions.assertEquals(3, counter.get());
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to