This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new dd03546959f [improvement](statistics)Add log for estimating hive table
row count logic. (#42921)
dd03546959f is described below
commit dd03546959f0d3bb36887a92dc71229940a7417f
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Oct 31 10:36:11 2024 +0800
[improvement](statistics)Add log for estimating hive table row count logic.
(#42921)
Add log for estimating hive table row count logic to help investigate
user issues.
---
.../doris/datasource/ExternalRowCountCache.java | 1 +
.../doris/datasource/hive/HMSExternalTable.java | 28 ++++++++++++++++++----
.../doris/datasource/iceberg/IcebergUtils.java | 5 +++-
.../doris/statistics/util/StatisticsUtil.java | 7 +++++-
4 files changed, 35 insertions(+), 6 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
index 4602c594571..075091e682d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
@@ -113,6 +113,7 @@ public class ExternalRowCountCache {
if (f.isDone()) {
return f.get().orElse(-1L);
}
+ LOG.info("Row count for table {}.{}.{} is still processing.",
catalogId, dbId, tableId);
} catch (Exception e) {
LOG.warn("Unexpected exception while returning row count", e);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 516448bdfbb..f72421da8a1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -525,7 +525,7 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
long rowCount = getRowCountFromExternalSource();
// Only hive table supports estimate row count by listing file.
if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) {
- LOG.debug("Will estimate row count from file list.");
+ LOG.info("Will estimate row count for table {} from file list.",
name);
rowCount = getRowCountFromFileList();
}
return rowCount;
@@ -837,14 +837,16 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
return -1;
}
if (isView()) {
+ LOG.info("Table {} is view, return 0.", name);
return 0;
}
HiveMetaStoreCache.HivePartitionValues partitionValues =
getAllPartitionValues();
// Get files for all partitions.
int samplePartitionSize = Config.hive_stats_partition_sample_size;
- List<HiveMetaStoreCache.FileCacheValue> filesByPartitions =
getFilesForPartitions(partitionValues,
- samplePartitionSize);
+ List<HiveMetaStoreCache.FileCacheValue> filesByPartitions =
+ getFilesForPartitions(partitionValues, samplePartitionSize);
+ LOG.info("Number of files selected for hive table {} is {}", name,
filesByPartitions.size());
long totalSize = 0;
// Calculate the total file size.
for (HiveMetaStoreCache.FileCacheValue files : filesByPartitions) {
@@ -863,14 +865,20 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
estimatedRowSize += column.getDataType().getSlotSize();
}
if (estimatedRowSize == 0) {
+ LOG.warn("Table {} estimated size is 0, return 0.", name);
return 0;
}
int totalPartitionSize = partitionValues == null ? 1 :
partitionValues.getIdToPartitionItem().size();
if (samplePartitionSize != 0 && samplePartitionSize <
totalPartitionSize) {
+ LOG.info("Table {} sampled {} of {} partitions, sampled size is
{}",
+ name, samplePartitionSize, totalPartitionSize, totalSize);
totalSize = totalSize * totalPartitionSize / samplePartitionSize;
}
- return totalSize / estimatedRowSize;
+ long rows = totalSize / estimatedRowSize;
+ LOG.info("Table {} rows {}, total size is {}, estimatedRowSize is {}",
+ name, rows, totalSize, estimatedRowSize);
+ return rows;
}
// Get all partition values from cache.
@@ -888,6 +896,12 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
// no need to worry that this call will invalid or refresh the
cache.
// because it has enough space to keep partition info of all
tables in cache.
partitionValues = cache.getPartitionValues(dbName, name,
partitionColumnTypes);
+ if (partitionValues == null ||
partitionValues.getPartitionNameToIdMap() == null) {
+ LOG.warn("Partition values for hive table {} is null", name);
+ } else {
+ LOG.info("Partition values size for hive table {} is {}",
+ name,
partitionValues.getPartitionNameToIdMap().size());
+ }
}
return partitionValues;
}
@@ -923,6 +937,7 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
// get partitions without cache, so that it will not invalid the
cache when executing
// non query request such as `show table status`
hivePartitions = cache.getAllPartitionsWithoutCache(dbName, name,
partitionValuesList);
+ LOG.info("Partition list size for hive partition table {} is {}",
name, hivePartitions.size());
} else {
hivePartitions.add(new HivePartition(dbName, name, true,
getRemoteTable().getSd().getInputFormat(),
@@ -930,6 +945,11 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
}
// Get files for all partitions.
String bindBrokerName = catalog.bindBrokerName();
+ if (LOG.isDebugEnabled()) {
+ for (HivePartition partition : hivePartitions) {
+ LOG.debug("Chosen partition for table {}. [{}]", name,
partition.toString());
+ }
+ }
return cache.getFilesByPartitionsWithoutCache(hivePartitions,
bindBrokerName);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 893ee7bc93b..7ae600756f1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -604,11 +604,14 @@ public class IcebergUtils {
.getIcebergTable(catalog, dbName, tbName);
Snapshot snapshot = icebergTable.currentSnapshot();
if (snapshot == null) {
+ LOG.info("Iceberg table {}.{}.{} is empty, return row count 0.",
catalog.getName(), dbName, tbName);
// empty table
return 0;
}
Map<String, String> summary = snapshot.summary();
- return Long.parseLong(summary.get(TOTAL_RECORDS)) -
Long.parseLong(summary.get(TOTAL_POSITION_DELETES));
+ long rows = Long.parseLong(summary.get(TOTAL_RECORDS)) -
Long.parseLong(summary.get(TOTAL_POSITION_DELETES));
+ LOG.info("Iceberg table {}.{}.{} row count in summary is {}",
catalog.getName(), dbName, tbName, rows);
+ return rows;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index d51281eb0e6..b0fc3b9c1cf 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -626,6 +626,7 @@ public class StatisticsUtil {
long rows = Long.parseLong(parameters.get(NUM_ROWS));
// Sometimes, the NUM_ROWS in hms is 0 but actually is not. Need
to check TOTAL_SIZE if NUM_ROWS is 0.
if (rows != 0) {
+ LOG.info("Get row count {} for hive table {} in table
parameters.", rows, table.getName());
return rows;
}
}
@@ -639,9 +640,13 @@ public class StatisticsUtil {
estimatedRowSize += column.getDataType().getSlotSize();
}
if (estimatedRowSize == 0) {
+ LOG.warn("Hive table {} estimated row size is invalid {}",
table.getName(), estimatedRowSize);
return -1;
}
- return totalSize / estimatedRowSize;
+ long rows = totalSize / estimatedRowSize;
+ LOG.info("Get row count {} for hive table {} by total size {} and row
size {}",
+ rows, table.getName(), totalSize, estimatedRowSize);
+ return rows;
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]