This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 08d3052ddda branch-4.0: [cherry-pick](branch40) cherry-pick from
master 59564 and 59473 (#59729)
08d3052ddda is described below
commit 08d3052ddda695321f23d5a05d87678c3e2e1145
Author: zhangstar333 <[email protected]>
AuthorDate: Mon Jan 12 09:31:52 2026 +0800
branch-4.0: [cherry-pick](branch40) cherry-pick from master 59564 and 59473
(#59729)
### What problem does this PR solve?
cherry-pick from master https://github.com/apache/doris/pull/59564 and
https://github.com/apache/doris/pull/59473
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../create_preinstalled_scripts/iceberg/run22.sql | 13 ++++++-
.../doris/datasource/iceberg/IcebergUtils.java | 13 ++-----
.../apache/doris/datasource/paimon/PaimonUtil.java | 9 ++---
.../apache/doris/statistics/AnalysisManager.java | 26 ++++++++++++--
.../doris/statistics/StatisticsAutoCollector.java | 9 ++---
.../doris/statistics/util/StatisticsUtil.java | 40 ++++++++++++++++++----
.../statistics/StatisticsAutoCollectorTest.java | 2 +-
.../iceberg/test_iceberg_varbinary.out | 4 +++
.../iceberg/test_iceberg_varbinary.groovy | 4 +++
9 files changed, 87 insertions(+), 33 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql
index 4c0d5883089..b92bfcf7fc6 100644
---
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql
+++
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql
@@ -88,4 +88,15 @@ USING iceberg
TBLPROPERTIES(
'write.format.default' = 'parquet',
'format-version' = '1'
-);
\ No newline at end of file
+);
+
+CREATE TABLE binary_partitioned_table (
+ id BIGINT,
+ name STRING,
+ partition_bin BINARY
+)
+USING iceberg
+PARTITIONED BY (partition_bin);
+
+insert into binary_partitioned_table values
+(1, 'a', X"0FF102FDFEFF");
\ No newline at end of file
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index a7dd141357c..c381239bc0c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -117,7 +117,6 @@ import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
import java.time.DateTimeException;
import java.time.Instant;
import java.time.LocalDate;
@@ -686,16 +685,8 @@ public class IcebergUtils {
return null;
}
return value.toString();
- case FIXED:
- case BINARY:
- if (value == null) {
- return null;
- }
- // Fixed and binary types are stored as ByteBuffer
- ByteBuffer buffer = (ByteBuffer) value;
- byte[] res = new byte[buffer.limit()];
- buffer.get(res);
- return new String(res, StandardCharsets.UTF_8);
+ // case binary, fixed should not supported, because if return
string with utf8,
+ // the data maybe be corrupted
case DATE:
if (value == null) {
return null;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
index 38ba98af038..730ba2ff810 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
@@ -79,7 +79,6 @@ import org.apache.paimon.utils.RowDataToObjectArrayConverter;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import java.time.DateTimeException;
import java.time.LocalDate;
import java.time.LocalTime;
@@ -456,12 +455,8 @@ public class PaimonUtil {
return null;
}
return value.toString();
- case BINARY:
- case VARBINARY:
- if (value == null) {
- return null;
- }
- return new String((byte[]) value, StandardCharsets.UTF_8);
+ // case binary, varbinary should not supported, because if return
string with utf8,
+ // the data maybe be corrupted
case DATE:
if (value == null) {
return null;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 08c25db6212..26bc81b985c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -595,8 +595,7 @@ public class AnalysisManager implements Writable {
for (Entry<TableName, Set<Pair<String, String>>> entry :
jobMap.entrySet()) {
TableName table = entry.getKey();
if (tblName == null
- || tblName.getCtl() == null && tblName.getDb() == null
&& tblName.getTbl() == null
- || tblName.equals(table)) {
+ || matchesFilter(tblName, table)) {
result.add(new AutoAnalysisPendingJob(table.getCtl(),
table.getDb(), table.getTbl(), entry.getValue(),
priority));
}
@@ -605,6 +604,29 @@ public class AnalysisManager implements Writable {
return result;
}
+ private boolean matchesFilter(TableName filter, TableName target) {
+ if (StringUtils.isEmpty(filter.getCtl())
+ && StringUtils.isEmpty(filter.getDb())
+ && StringUtils.isEmpty(filter.getTbl())) {
+ return true;
+ }
+
+ if (!StringUtils.isEmpty(filter.getCtl())
+ && !filter.getCtl().equals(target.getCtl())) {
+ return false;
+ }
+ if (!StringUtils.isEmpty(filter.getDb())
+ && !filter.getDb().equals(target.getDb())) {
+ return false;
+ }
+ if (!StringUtils.isEmpty(filter.getTbl())
+ && !filter.getTbl().equals(target.getTbl())) {
+ return false;
+ }
+
+ return true;
+ }
+
public List<AnalysisInfo> findAnalysisJobs(String state, String ctl,
String db,
String table, long jobId, boolean isAuto) {
TableIf tbl = null;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 8cd96bc4957..2ce0f05229b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -26,7 +26,7 @@ import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.Pair;
import org.apache.doris.common.util.MasterDaemon;
-import org.apache.doris.datasource.hive.HMSExternalTable;
+import org.apache.doris.datasource.iceberg.IcebergExternalTable;
import org.apache.doris.persist.TableStatsDeletionLog;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.JobType;
@@ -146,6 +146,9 @@ public class StatisticsAutoCollector extends MasterDaemon {
if (StatisticsUtil.enablePartitionAnalyze() &&
table.isPartitionedTable()) {
analysisMethod = AnalysisMethod.FULL;
}
+ if (table instanceof IcebergExternalTable) { // IcebergExternalTable
table only support full analyze now
+ analysisMethod = AnalysisMethod.FULL;
+ }
boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE);
OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table :
null;
AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
@@ -227,9 +230,7 @@ public class StatisticsAutoCollector extends MasterDaemon {
if (tableIf == null) {
return false;
}
- return tableIf instanceof OlapTable
- || tableIf instanceof HMSExternalTable
- && ((HMSExternalTable)
tableIf).getDlaType().equals(HMSExternalTable.DLAType.HIVE);
+ return StatisticsUtil.supportAutoAnalyze(tableIf);
}
protected AnalysisInfo createAnalyzeJobForTbl(TableIf table,
Set<Pair<String, String>> jobColumns,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 9e5fbfa4d4d..e28191ed075 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -56,6 +56,7 @@ import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.hive.HMSExternalTable.DLAType;
+import org.apache.doris.datasource.iceberg.IcebergExternalTable;
import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IPv4Literal;
import org.apache.doris.nereids.trees.expressions.literal.IPv6Literal;
@@ -1148,20 +1149,45 @@ public class StatisticsUtil {
// 3. Check partition
return needAnalyzePartition(olapTable, tableStatsStatus,
columnStatsMeta);
} else {
- // Now, we only support Hive external table auto analyze.
- if (!(table instanceof HMSExternalTable)) {
+ if (!StatisticsUtil.supportAutoAnalyze(table)) {
return false;
}
- HMSExternalTable hmsTable = (HMSExternalTable) table;
- if (!hmsTable.getDlaType().equals(DLAType.HIVE)) {
- return false;
- }
- // External is hard to calculate change rate, use time interval to
control analyze frequency.
+ // External is hard to calculate change rate, use time interval to
control
+ // analyze frequency.
return System.currentTimeMillis()
- tableStatsStatus.lastAnalyzeTime >
StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
}
}
+ /**
+ * Check if the table supports auto analyze feature.
+ * @param table The table to check
+ * @return true if the table supports auto analyze, false otherwise
+ */
+ public static boolean supportAutoAnalyze(TableIf table) {
+ if (table == null) {
+ return false;
+ }
+
+ // Support OLAP table
+ if (table instanceof OlapTable) {
+ return true;
+ }
+
+ // Support Iceberg table
+ if (table instanceof IcebergExternalTable) {
+ return true;
+ }
+
+ // Support HMS table (only HIVE and ICEBERG types)
+ if (table instanceof HMSExternalTable) {
+ HMSExternalTable hmsTable = (HMSExternalTable) table;
+ DLAType dlaType = hmsTable.getDlaType();
+ return dlaType.equals(DLAType.HIVE) ||
dlaType.equals(DLAType.ICEBERG);
+ }
+ return false;
+ }
+
public static boolean needAnalyzePartition(OlapTable table, TableStatsMeta
tableStatsStatus,
ColStatsMeta columnStatsMeta) {
if (!StatisticsUtil.enablePartitionAnalyze() ||
!table.isPartitionedTable()) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index 625fd904809..9e7e3a627ab 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -145,7 +145,7 @@ public class StatisticsAutoCollectorTest {
HMSExternalCatalog hmsCatalog = new HMSExternalCatalog(0, "jdbc_ctl",
null, Maps.newHashMap(), "");
ExternalTable icebergExternalTable = new HMSExternalTable(1,
"hmsTable", "hmsDb", hmsCatalog,
hmsExternalDatabase);
-
Assertions.assertFalse(collector.supportAutoAnalyze(icebergExternalTable));
+
Assertions.assertTrue(collector.supportAutoAnalyze(icebergExternalTable));
new MockUp<HMSExternalTable>() {
@Mock
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
index 5845c71a460..3d224775383 100644
--- a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
@@ -88,3 +88,7 @@
-- !select22 --
3 3
+
+-- !select23 --
+1 a 0x0FF102FDFEFF
+
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
index 9db7b5fb934..62d3d367f80 100644
---
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
@@ -157,4 +157,8 @@ suite("test_iceberg_varbinary",
"p0,external,doris,external_docker,external_dock
qt_select22 """
select multi_distinct_count(col2),multi_distinct_count(col1) from
test_ice_uuid_parquet;
"""
+
+ qt_select23 """
+ select * from binary_partitioned_table where
from_hex(partition_bin)="0FF102FDFEFF";
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]