This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 445f72b3951 [ut](stats) Added tests for HMS analysis tasks (#28583)
445f72b3951 is described below
commit 445f72b39514569a0d3dae4bee1539832046f3d6
Author: Nitin-Kashyap <[email protected]>
AuthorDate: Fri Dec 29 23:09:50 2023 +0700
[ut](stats) Added tests for HMS analysis tasks (#28583)
---
.../apache/doris/statistics/HMSAnalysisTask.java | 2 +-
.../doris/statistics/HMSAnalysisTaskTest.java | 151 +++++++++++++++++++++
2 files changed, 152 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index efd99d1eca9..5053fc62a2d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -88,7 +88,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
/**
* Get column statistics and insert the result to
__internal_schema.column_statistics
*/
- private void getTableColumnStats() throws Exception {
+ protected void getTableColumnStats() throws Exception {
if (!info.usingSqlForPartitionColumn && isPartitionColumn()) {
try {
getPartitionColumnStats();
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index 12a1a9c046b..fb0a3b3c2ca 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -19,12 +19,17 @@ package org.apache.doris.statistics;
import org.apache.doris.analysis.TableSample;
import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.common.Pair;
+import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.statistics.util.StatisticsUtil;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
+import mockit.Expectations;
import mockit.Mock;
import mockit.MockUp;
import mockit.Mocked;
@@ -32,7 +37,9 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
+import java.util.Set;
public class HMSAnalysisTaskTest {
@@ -160,4 +167,148 @@ public class HMSAnalysisTaskTest {
Assertions.assertEquals(0, info2.second);
}
+ @Test
+ public void testGetSampleInfoPercent(@Mocked HMSExternalTable tableIf)
+ throws Exception {
+ new MockUp<HMSExternalTable>() {
+ @Mock
+ public List<Long> getChunkSizes() {
+ return Arrays.asList(1024L, 2048L);
+ }
+ };
+ HMSAnalysisTask task = new HMSAnalysisTask();
+ task.setTable(tableIf);
+ AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
+ analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL);
+
analysisInfoBuilder.setAnalysisMethod(AnalysisInfo.AnalysisMethod.SAMPLE);
+ analysisInfoBuilder.setSamplePercent(10);
+ task.info = analysisInfoBuilder.build();
+
+ task.tableSample = new TableSample(true, 10L);
+ Pair<Double, Long> info = task.getSampleInfo();
+ Assertions.assertEquals(1.5, info.first);
+ Assertions.assertEquals(2048, info.second);
+ }
+
+ @Test
+ public void testOrdinaryStats(@Mocked CatalogIf catalogIf, @Mocked
DatabaseIf databaseIf, @Mocked HMSExternalTable tableIf)
+ throws Exception {
+
+ new Expectations() {
+ {
+ tableIf.getId();
+ result = 30001;
+ tableIf.getName();
+ result = "test";
+ catalogIf.getId();
+ result = 10001;
+ catalogIf.getName();
+ result = "hms";
+ databaseIf.getId();
+ result = 20001;
+ databaseIf.getFullName();
+ result = "default";
+ }
+ };
+
+ new MockUp<HMSExternalTable>() {
+ @Mock
+ public Set<String> getPartitionNames() {
+ return ImmutableSet.of("date=20230101/hour=12");
+ }
+ };
+
+ new MockUp<HMSAnalysisTask>() {
+ @Mock
+ public void runQuery(String sql) {
+ Assertions.assertEquals("SELECT CONCAT(30001, '-', -1, '-',
'hour') AS `id`,"
+ + " 10001 AS `catalog_id`,"
+ + " 20001 AS `db_id`,"
+ + " 30001 AS `tbl_id`,"
+ + " -1 AS `idx_id`,"
+ + " 'hour' AS `col_id`,"
+ + " NULL AS `part_id`,"
+ + " COUNT(1) AS `row_count`,"
+ + " NDV(`hour`) AS `ndv`,"
+ + " COUNT(1) - COUNT(`hour`) AS `null_count`,"
+ + " SUBSTRING(CAST(MIN(`hour`) AS STRING), 1,
1024) AS `min`,"
+ + " SUBSTRING(CAST(MAX(`hour`) AS STRING), 1,
1024) AS `max`,"
+ + " COUNT(1) * 4 AS `data_size`,"
+ + " NOW() AS `update_time`"
+ + " FROM `hms`.`default`.`test`", sql);
+ }
+ };
+
+ HMSAnalysisTask task = new HMSAnalysisTask();
+ task.col = new Column("hour", PrimitiveType.INT);
+ task.tbl = tableIf;
+ task.catalog = catalogIf;
+ task.db = databaseIf;
+ task.setTable(tableIf);
+
+ AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
+ analysisInfoBuilder.setColName("hour");
+ analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL);
+ analysisInfoBuilder.setUsingSqlForPartitionColumn(true);
+ task.info = analysisInfoBuilder.build();
+
+ task.getTableColumnStats();
+ }
+
+
+ @Test
+ public void testPartitionHMSStats(@Mocked CatalogIf catalogIf, @Mocked
DatabaseIf databaseIf, @Mocked HMSExternalTable tableIf)
+ throws Exception {
+
+ new Expectations() {
+ {
+ tableIf.getId();
+ result = 30001;
+ catalogIf.getId();
+ result = 10001;
+ catalogIf.getName();
+ result = "hms";
+ databaseIf.getId();
+ result = 20001;
+ }
+ };
+
+ new MockUp<HMSExternalTable>() {
+ @Mock
+ public Set<String> getPartitionNames() {
+ return ImmutableSet.of("date=20230101/hour=12");
+ }
+
+ @Mock
+ public List<Column> getPartitionColumns() {
+ return ImmutableList.of(new Column("hour", PrimitiveType.INT));
+ }
+ };
+
+ new MockUp<HMSAnalysisTask>() {
+ @Mock
+ public void runQuery(String sql) {
+ Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-',
'hour') AS `id`, "
+ + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, -1 AS `idx_id`, "
+ + "'hour' AS `col_id`, NULL AS `part_id`, 0 AS
`row_count`, 1 AS `ndv`, "
+ + "0 AS `null_count`, SUBSTRING(CAST('12' AS STRING),
1, 1024) AS `min`, "
+ + "SUBSTRING(CAST('12' AS STRING), 1, 1024) AS `max`,
0 AS `data_size`, NOW() ", sql);
+ }
+ };
+
+ HMSAnalysisTask task = new HMSAnalysisTask();
+ task.col = new Column("hour", PrimitiveType.INT);
+ task.tbl = tableIf;
+ task.catalog = catalogIf;
+ task.db = databaseIf;
+ task.setTable(tableIf);
+
+ AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
+ analysisInfoBuilder.setColName("hour");
+ analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL);
+ analysisInfoBuilder.setUsingSqlForPartitionColumn(false);
+ task.info = analysisInfoBuilder.build();
+
+ task.getTableColumnStats();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]