This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch branch-1.0
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/branch-1.0 by this push:
new ad3372cd26 [#8218] fix(doris): Fix BUCKETS AUTO parsing for RANDOM
distribution (#8447)
ad3372cd26 is described below
commit ad3372cd261a0e459b96bd0c31b1b68ec597523c
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Sep 5 11:03:42 2025 +0800
[#8218] fix(doris): Fix BUCKETS AUTO parsing for RANDOM distribution (#8447)
<!--
1. Title: [#<issue>] <type>(<scope>): <subject>
Examples:
- "[#123] feat(operator): support xxx"
- "[#233] fix: check null before access result in xxx"
- "[MINOR] refactor: fix typo in variable name"
- "[MINOR] docs: fix typo in README"
- "[#255] test: fix flaky test NameOfTheTest"
Reference: https://www.conventionalcommits.org/en/v1.0.0/
2. If the PR is unfinished, please mark this PR as draft.
-->
### What changes were proposed in this pull request?
Fixed the `DorisUtils.extractBucketNum()` method to properly handle
`BUCKETS AUTO` parsing for `DISTRIBUTED BY RANDOM` statements. The
method was incorrectly calling `matcher.find(5)` which advances the
matcher position, causing the bucket value extraction to fail and
resulting in a `NumberFormatException` when parsing null values.
**Changes:**
- Changed `if (matcher.find(5))` to `if (matcher.group(5) != null)` in
`DorisUtils.extractBucketNum()`
- Added test case for `DISTRIBUTED BY RANDOM BUCKETS AUTO` in
`TestDorisUtils.testDistributedInfoPattern()`
### Why are the changes needed?
This fixes a critical bug where Doris table operations fail when
encountering `DISTRIBUTED BY RANDOM BUCKETS AUTO` syntax. The root cause
was that `matcher.find(5)` advances the matcher position after the
initial match, and since there are no subsequent matches, it returns
`false` and never reaches the bucket value extraction logic. This leaves
`bucketValue` as null, causing `Integer.valueOf(bucketValue)` to throw
`NumberFormatException: Cannot parse null string`.
The bug affects users trying to load tables with auto bucket
distribution using random strategy, which is a valid Doris SQL syntax.
Fix: #8218
### Does this PR introduce _any_ user-facing change?
No user-facing API changes. This is a bug fix that enables proper
parsing of existing Doris SQL syntax that was previously failing. Users
can now successfully work with tables using `DISTRIBUTED BY RANDOM
BUCKETS AUTO` without encountering parsing errors.
### How was this patch tested?
1. **Existing tests**: Verified all existing Doris utility tests
continue to pass, ensuring no regression
2. **New test case**: Added
`TestDorisUtils.testDistributedInfoPattern()` test case specifically for
`DISTRIBUTED BY RANDOM BUCKETS AUTO` to prevent future regressions
3. **Manual verification**: Created and ran a standalone test to
reproduce the original bug and verify the fix resolves the
`NumberFormatException`
4. **Code formatting**: Applied Spotless formatting to ensure code style
compliance
**Test command:**
```
./gradlew :catalogs:catalog-jdbc-doris:test --tests="*TestDorisUtils*"
```
All tests pass successfully with the fix applied.
Co-authored-by: Mathieu Baurin <[email protected]>
---
.../doris/converter/DorisExceptionConverter.java | 21 +++++++++++++++++++++
.../gravitino/catalog/doris/utils/DorisUtils.java | 2 +-
.../converter/TestDorisExceptionConverter.java | 11 +++++++++++
.../catalog/doris/utils/TestDorisUtils.java | 6 ++++++
4 files changed, 39 insertions(+), 1 deletion(-)
diff --git
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/converter/DorisExceptionConverter.java
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/converter/DorisExceptionConverter.java
index ccdd4c7da2..bbd4e5099e 100644
---
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/converter/DorisExceptionConverter.java
+++
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/converter/DorisExceptionConverter.java
@@ -49,6 +49,7 @@ public class DorisExceptionConverter extends
JdbcExceptionConverter {
static final int CODE_OTHER = 1105;
static final int CODE_DELETE_NON_EXISTING_PARTITION = 1507;
static final int CODE_PARTITION_ALREADY_EXISTS = 1517;
+ static final int CODE_BUCKETS_AUTO_NOT_SUPPORTED = 1064;
private static final String DATABASE_ALREADY_EXISTS_PATTERN_STRING =
".*?detailMessage = Can't create database '.*?'; database exists";
@@ -83,6 +84,12 @@ public class DorisExceptionConverter extends
JdbcExceptionConverter {
private static final Pattern PARTITION_ALREADY_EXISTS_PARTITION =
Pattern.compile(PARTITION_ALREADY_EXISTS_STRING);
+ private static final String BUCKETS_AUTO_NOT_SUPPORTED_STRING =
+ ".*?syntax error.*AUTO.*|.*?You have an error in your SQL
syntax.*AUTO.*|.*?errCode = 2, detailMessage = Syntax error.*AUTO.*";
+
+ private static final Pattern BUCKETS_AUTO_NOT_SUPPORTED_PATTERN =
+ Pattern.compile(BUCKETS_AUTO_NOT_SUPPORTED_STRING,
Pattern.CASE_INSENSITIVE);
+
@SuppressWarnings("FormatStringAnnotation")
@Override
public GravitinoRuntimeException toGravitinoException(SQLException se) {
@@ -109,6 +116,15 @@ public class DorisExceptionConverter extends
JdbcExceptionConverter {
return new NoSuchPartitionException(se, se.getMessage());
case CODE_PARTITION_ALREADY_EXISTS:
return new PartitionAlreadyExistsException(se, se.getMessage());
+ case CODE_BUCKETS_AUTO_NOT_SUPPORTED:
+ String bucketsAutoMessage =
+ String.format(
+ "BUCKETS AUTO is not supported in this version of Apache Doris
(requires Doris 1.2.2+). "
+ + "BUCKETS AUTO was introduced in Doris 1.2.2. "
+ + "Please either upgrade to Doris 1.2.2+ or specify a
specific bucket number instead of AUTO. "
+ + "Original error: %s",
+ se.getMessage());
+ return new GravitinoRuntimeException(se, bucketsAutoMessage);
default:
if (se.getMessage() != null && se.getMessage().contains("Access
denied")) {
return new ConnectionFailedException(se, se.getMessage());
@@ -145,6 +161,11 @@ public class DorisExceptionConverter extends
JdbcExceptionConverter {
if (PARTITION_ALREADY_EXISTS_PARTITION.matcher(message).matches()) {
return CODE_PARTITION_ALREADY_EXISTS;
}
+
+ if (BUCKETS_AUTO_NOT_SUPPORTED_PATTERN.matcher(message).matches()) {
+ return CODE_BUCKETS_AUTO_NOT_SUPPORTED;
+ }
+
return CODE_OTHER;
}
}
diff --git
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
index b5a4541b57..6d93924416 100644
---
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
+++
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
@@ -221,7 +221,7 @@ public final class DorisUtils {
private static int extractBucketNum(Matcher matcher) {
int bucketNum = 1;
- if (matcher.find(5)) {
+ if (matcher.group(5) != null) {
String bucketValue = matcher.group(5);
// Use -1 to indicate auto bucket.
bucketNum =
diff --git
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/converter/TestDorisExceptionConverter.java
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/converter/TestDorisExceptionConverter.java
index 4ac3ad7bb0..b365eb2d73 100644
---
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/converter/TestDorisExceptionConverter.java
+++
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/converter/TestDorisExceptionConverter.java
@@ -46,5 +46,16 @@ public class TestDorisExceptionConverter {
Assertions.assertEquals(
DorisExceptionConverter.CODE_NO_SUCH_TABLE,
DorisExceptionConverter.getErrorCodeFromMessage(msg));
+
+ msg = "errCode = 2, detailMessage = Syntax error in line 3: unexpected
token: AUTO";
+ Assertions.assertEquals(
+ DorisExceptionConverter.CODE_BUCKETS_AUTO_NOT_SUPPORTED,
+ DorisExceptionConverter.getErrorCodeFromMessage(msg));
+
+ msg =
+ "You have an error in your SQL syntax; check the manual that
corresponds to your MySQL server version for the right syntax to use near
'AUTO' at line 1";
+ Assertions.assertEquals(
+ DorisExceptionConverter.CODE_BUCKETS_AUTO_NOT_SUPPORTED,
+ DorisExceptionConverter.getErrorCodeFromMessage(msg));
}
}
diff --git
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
index a73f04ed6e..43c61c29f0 100644
---
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
+++
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
@@ -184,5 +184,11 @@ public class TestDorisUtils {
"CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n
PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY HASH(`col1`) BUCKETS AUTO";
Distribution distribution2 =
DorisUtils.extractDistributionInfoFromSql(createTableSqlWithAuto);
assertEquals(distribution2.number(), -1);
+
+ String createTableSqlWithRandomAuto =
+ "CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n
PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY RANDOM BUCKETS AUTO";
+ Distribution distribution3 =
+
DorisUtils.extractDistributionInfoFromSql(createTableSqlWithRandomAuto);
+ assertEquals(distribution3.number(), -1);
}
}