This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e2e806a5e7 [improve](clickhouse jdbc) support clickhouse array type
(#17993)
e2e806a5e7 is described below
commit e2e806a5e7fe6304597ed82a97584cf9215066df
Author: yongkang.zhong <[email protected]>
AuthorDate: Wed Mar 22 19:42:32 2023 +0800
[improve](clickhouse jdbc) support clickhouse array type (#17993)
In this PR, I match the array type of ClickHouse to the array type of
Doris's jdbc external.
---
be/src/vec/exec/vjdbc_connector.cpp | 3 +-
docs/en/docs/lakehouse/external-table/jdbc.md | 46 ++++++++++++----------
docs/en/docs/lakehouse/multi-catalog/jdbc.md | 37 ++++++++---------
docs/zh-CN/docs/lakehouse/external-table/jdbc.md | 44 +++++++++++----------
docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md | 37 ++++++++---------
.../org/apache/doris/external/jdbc/JdbcClient.java | 6 +++
.../java/org/apache/doris/udf/JdbcExecutor.java | 38 +++++++++++++++++-
7 files changed, 131 insertions(+), 80 deletions(-)
diff --git a/be/src/vec/exec/vjdbc_connector.cpp
b/be/src/vec/exec/vjdbc_connector.cpp
index e9caf93647..226904588d 100644
--- a/be/src/vec/exec/vjdbc_connector.cpp
+++ b/be/src/vec/exec/vjdbc_connector.cpp
@@ -313,7 +313,8 @@ Status JdbcConnector::_check_type(SlotDescriptor*
slot_desc, const std::string&
break;
}
case TYPE_ARRAY: {
- if (type_str != "java.sql.Array" && type_str != "java.lang.String") {
+ if (type_str != "java.sql.Array" && type_str != "java.lang.String" &&
+ type_str != "java.lang.Object") {
return Status::InternalError(error_msg);
}
if (!slot_desc->type().children[0].children.empty()) {
diff --git a/docs/en/docs/lakehouse/external-table/jdbc.md
b/docs/en/docs/lakehouse/external-table/jdbc.md
index 5cfb8c1b9b..9d5d478e37 100644
--- a/docs/en/docs/lakehouse/external-table/jdbc.md
+++ b/docs/en/docs/lakehouse/external-table/jdbc.md
@@ -177,7 +177,8 @@ Test information on more versions will be provided in the
future.
| ClickHouse Version | ClickHouse JDBC Driver Version |
| ------------------ | ------------------------------------- |
-| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| 22 | clickhouse-jdbc-0.4.1-all.jar |
#### 6.Sap_HanaTest
@@ -275,29 +276,32 @@ The followings list how data types in different databases
are mapped in Doris.
### ClickHouse
-| ClickHouse | Doris |
-| :--------: | :------: |
-| BOOLEAN | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| STRING | STRING |
-| DATE | DATE |
-| Float32 | FLOAT |
-| Float64 | DOUBLE |
-| Int8 | TINYINT |
-| Int16 | SMALLINT |
-| Int32 | INT |
-| Int64 | BIGINT |
-| Int128 | LARGEINT |
-| DATETIME | DATETIME |
-| DECIMAL | DECIMAL |
-
-Note:
-
+| ClickHouse | Doris
|
+|:-------------------------------------------------------:|:------------------------:|
+| Boolean | BOOLEAN
|
+| String | STRING
|
+| Date/Date32 | DATE/DATEV2
|
+| DateTime/DateTime64 |
DATETIME/DATETIMEV2 |
+| Float32 | FLOAT
|
+| Float64 | DOUBLE
|
+| Int8 | TINYINT
|
+| Int16/UInt8 | SMALLINT
|
+| Int32/UInt16 | INT
|
+| Int64/Uint32 | BIGINT
|
+| Int128/UInt64 | LARGEINT
|
+| Int256/UInt128/UInt256 | STRING
|
+| Decimal |
DECIMAL/DECIMALV3/STRING |
+| Enum/IPv4/IPv6/UUID | STRING
|
+| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\>
|
+
+
+**Note:**
+
+- <version since="dev" type="inline"> For Array types in ClickHouse, use
Doris's Array type to match them. For basic types in an Array, see Basic type
matching rules. Nested arrays are not supported. </version>
- Some data types in ClickHouse, such as UUID, IPv4, IPv6, and Enum8, will be
mapped to Varchar/String in Doris. IPv4 and IPv6 will be displayed with an `/`
as a prefix. You can use the `split_part` function to remove the `/` .
- The Point Geo type in ClickHouse cannot be mapped in Doris by far.
-### SAP_HANA
+### SAP HANA
| SAP_HANA | Doris |
|:------------:|:-------------------:|
diff --git a/docs/en/docs/lakehouse/multi-catalog/jdbc.md
b/docs/en/docs/lakehouse/multi-catalog/jdbc.md
index c96db6df6e..758986dcac 100644
--- a/docs/en/docs/lakehouse/multi-catalog/jdbc.md
+++ b/docs/en/docs/lakehouse/multi-catalog/jdbc.md
@@ -329,23 +329,24 @@ The transaction mechanism ensures the atomicity of data
writing to JDBC External
### Clickhouse
-| ClickHouse Type | Doris Type | Comment
|
-| ---------------------- | ----------- |
------------------------------------------------------------ |
-| Bool | BOOLEAN |
|
-| String | STRING |
|
-| Date/Date32 | DATE |
|
-| DateTime/DateTime64 | DATETIME | Data beyond the maximum precision of
DateTime in Doris will be truncated. |
-| Float32 | FLOAT |
|
-| Float64 | DOUBLE |
|
-| Int8 | TINYINT |
|
-| Int16/UInt8 | SMALLINT | Doris does not support UNSIGNED data
types so UInt8 will be mapped to SMALLINT. |
-| Int32/UInt16 | INT | Doris does not support UNSIGNED data
types so UInt16 will be mapped to INT. |
-| Int64/Uint32 | BIGINT | Doris does not support UNSIGNED data
types so UInt32 will be mapped to BIGINT. |
-| Int128/UInt64 | LARGEINT | Doris does not support UNSIGNED data
types so UInt64 will be mapped to LARGEINT. |
-| Int256/UInt128/UInt256 | STRING | Doris does not support data types of
such orders of magnitude so these will be mapped to STRING. |
-| DECIMAL | DECIMAL | Data beyond the maximum decimal
precision in Doris will be truncated. |
-| Enum/IPv4/IPv6/UUID | STRING | Data of IPv4 and IPv6 type will be
displayed with an extra `/` as a prefix. To remove the `/`, you can use the
`split_part`function. |
-| Other | UNSUPPORTED |
|
+| ClickHouse Type | Doris Type
| Comment
|
+|---------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------|
+| Bool | BOOLEAN
|
|
+| String | STRING
|
|
+| Date/Date32 | DATEV2
| JDBC CATLOG uses Datev2 type default when connecting DORIS
|
+| DateTime/DateTime64 | DATETIMEV2
| JDBC CATLOG uses DateTimev2 type default when connecting DORIS
|
+| Float32 | FLOAT
|
|
+| Float64 | DOUBLE
|
|
+| Int8 | TINYINT
|
|
+| Int16/UInt8 | SMALLINT
| Doris does not support UNSIGNED data types so UInt8 will be mapped to
SMALLINT. |
+| Int32/UInt16 | INT
| Doris does not support UNSIGNED data types so UInt16 will be mapped to
INT. |
+| Int64/Uint32 | BIGINT
| Doris does not support UNSIGNED data types so UInt32 will be mapped to
BIGINT. |
+| Int128/UInt64 | LARGEINT
| Doris does not support UNSIGNED data types so UInt64 will be mapped to
LARGEINT. |
+| Int256/UInt128/UInt256 | STRING
| Doris does not support data types of such orders of magnitude so these
will be mapped to STRING. |
+| DECIMAL |
DECIMAL/DECIMALV3/STRING | The Data type is based on the DECIMAL field's
(precision, scale) and the `enable_decimal_conversion` configuration.
|
+| Enum/IPv4/IPv6/UUID | STRING
| Data of IPv4 and IPv6 type will be displayed with an extra `/` as a
prefix. To remove the `/`, you can use the `split_part`function. |
+| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\>
| Array internal basic type adaptation logic refers to the preceding
types. Nested types are not supported |
+| Other | UNSUPPORTED
|
|
### Doris
@@ -370,7 +371,7 @@ The transaction mechanism ensures the atomicity of data
writing to JDBC External
| TEXT | STRING | |
|Other| UNSUPPORTED |
-### SAP_HANA
+### SAP HANA
| SAP_HANA | Doris | Comment
|
|--------------|--------------------------|--------------------------------------------------------------------------------------------------------------------|
diff --git a/docs/zh-CN/docs/lakehouse/external-table/jdbc.md
b/docs/zh-CN/docs/lakehouse/external-table/jdbc.md
index 0773d26ea7..4b04a1abec 100644
--- a/docs/zh-CN/docs/lakehouse/external-table/jdbc.md
+++ b/docs/zh-CN/docs/lakehouse/external-table/jdbc.md
@@ -169,9 +169,10 @@ PROPERTIES (
目前只测试了这一个版本其他版本测试后补充
#### 5.ClickHouse测试
-| ClickHouse版本 | ClickHouse JDBC驱动版本 |
-|----------| ------------------- |
-| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| ClickHouse版本 | ClickHouse JDBC驱动版本 |
+|--------------|---------------------------------------|
+| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| 22 | clickhouse-jdbc-0.4.1-all.jar |
#### 6.Sap_Hana测试
@@ -269,28 +270,31 @@ PROPERTIES (
### ClickHouse
-| ClickHouse | Doris |
-|:----------:|:--------:|
-| BOOLEAN | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| STRING | STRING |
-| DATE | DATE |
-| Float32 | FLOAT |
-| Float64 | DOUBLE |
-| Int8 | TINYINT |
-| Int16 | SMALLINT |
-| Int32 | INT |
-| Int64 | BIGINT |
-| Int128 | LARGEINT |
-| DATETIME | DATETIME |
-| DECIMAL | DECIMAL |
+| ClickHouse | Doris
|
+|:--------------------------------------------------------:|:------------------------:|
+| Boolean | BOOLEAN
|
+| String | STRING
|
+| Date/Date32 | DATE/DATEV2
|
+| DateTime/DateTime64 |
DATETIME/DATETIMEV2 |
+| Float32 | FLOAT
|
+| Float64 | DOUBLE
|
+| Int8 | TINYINT
|
+| Int16/UInt8 | SMALLINT
|
+| Int32/UInt16 | INT
|
+| Int64/Uint32 | BIGINT
|
+| Int128/UInt64 | LARGEINT
|
+| Int256/UInt128/UInt256 | STRING
|
+| Decimal |
DECIMAL/DECIMALV3/STRING |
+| Enum/IPv4/IPv6/UUID | STRING
|
+| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\>
|
**注意:**
+
+- <version since="dev" type="inline">
对于ClickHouse里的Array类型,可用Doris的Array类型来匹配,Array内的基础类型匹配参考基础类型匹配规则即可,不支持嵌套Array
</version>
-
对于ClickHouse里的一些特殊类型,如UUID,IPv4,IPv6,Enum8可以用Doris的Varchar/String类型来匹配,但是在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理
- 对于ClickHouse的Geo类型Point,无法进行匹配
-### SAP_HANA
+### SAP HANA
| SAP_HANA | Doris |
|:------------:|:-------------------:|
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md
b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md
index 0494443355..a0d0b7d437 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md
@@ -328,23 +328,24 @@ set enable_odbc_transcation = true;
### Clickhouse
-| ClickHouse Type | Doris Type | Comment
|
-|------------------------|------------|-----------------------------------------------------|
-| Bool | BOOLEAN |
|
-| String | STRING |
|
-| Date/Date32 | DATE |
|
-| DateTime/DateTime64 | DATETIME | 对于超过了Doris最大的DateTime精度的数据,将截断处理
|
-| Float32 | FLOAT |
|
-| Float64 | DOUBLE |
|
-| Int8 | TINYINT |
|
-| Int16/UInt8 | SMALLINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级
|
-| Int32/UInt16 | INT | Doris没有UNSIGNED数据类型,所以扩大一个数量级
|
-| Int64/Uint32 | BIGINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级
|
-| Int128/UInt64 | LARGEINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级
|
-| Int256/UInt128/UInt256 | STRING | Doris没有这个数量级的数据类型,采用STRING处理
|
-| DECIMAL | DECIMAL | 对于超过了Doris最大的Decimal精度的数据,将映射为STRING
|
-| Enum/IPv4/IPv6/UUID | STRING |
在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理 |
-|Other| UNSUPPORTED |
+| ClickHouse Type | Doris Type
| Comment |
+|---------------------------------------------------------|--------------------------|---------------------------------------------------|
+| Bool | BOOLEAN
| |
+| String | STRING
| |
+| Date/Date32 | DATEV2
| Jdbc Catlog连接Doris时默认使用DATEV2类型 |
+| DateTime/DateTime64 | DATETIMEV2
| Jdbc Catlog连接Doris时默认使用DATETIMEV2类型 |
+| Float32 | FLOAT
| |
+| Float64 | DOUBLE
| |
+| Int8 | TINYINT
| |
+| Int16/UInt8 | SMALLINT
| Doris没有UNSIGNED数据类型,所以扩大一个数量级 |
+| Int32/UInt16 | INT
| Doris没有UNSIGNED数据类型,所以扩大一个数量级 |
+| Int64/Uint32 | BIGINT
| Doris没有UNSIGNED数据类型,所以扩大一个数量级 |
+| Int128/UInt64 | LARGEINT
| Doris没有UNSIGNED数据类型,所以扩大一个数量级 |
+| Int256/UInt128/UInt256 | STRING
| Doris没有这个数量级的数据类型,采用STRING处理 |
+| DECIMAL |
DECIMAL/DECIMALV3/STRING | 将根据Doris DECIMAL字段的(precision,
scale)和`enable_decimal_conversion`开关选择用何种类型|
+| Enum/IPv4/IPv6/UUID | STRING
| 在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理 |
+| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\>
| Array内部类型适配逻辑参考上述类型,不支持嵌套类型 |
+| Other | UNSUPPORTED
| |
### Doris
@@ -369,7 +370,7 @@ set enable_odbc_transcation = true;
| TEXT | STRING | |
|Other| UNSUPPORTED |
-### SAP_HANA
+### SAP HANA
| SAP_HANA | Doris | Comment
|
|--------------|--------------------------|---------------------------------------------------------------------------------------|
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
index 1310a89f03..32ef14f9ce 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
@@ -17,6 +17,7 @@
package org.apache.doris.external.jdbc;
+import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.JdbcResource;
import org.apache.doris.catalog.PrimitiveType;
@@ -578,6 +579,11 @@ public class JdbcClient {
return ScalarType.createStringType();
} else if (ckType.startsWith("DateTime")) {
return ScalarType.createDatetimeV2Type(0);
+ } else if (ckType.startsWith("Array")) {
+ String cktype = ckType.substring(6, ckType.length() - 1);
+ fieldSchema.setDataTypeName(cktype);
+ Type type = clickhouseTypeToDoris(fieldSchema);
+ return ArrayType.create(type, true);
}
switch (ckType) {
case "Bool":
diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
index df30b8e1b5..9e9ea2cb8a 100644
--- a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
+++ b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
@@ -32,6 +32,8 @@ import java.lang.reflect.Array;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
+import java.net.Inet4Address;
+import java.net.Inet6Address;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.sql.Connection;
@@ -45,7 +47,11 @@ import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.function.Function;
public class JdbcExecutor {
private static final Logger LOG = Logger.getLogger(JdbcExecutor.class);
@@ -197,9 +203,37 @@ public class JdbcExecutor {
Object[] columnData = (Object[]) obj;
if (columnData[idx] instanceof String) {
return (String) columnData[idx];
- } else {
+ } else if (columnData[idx] instanceof java.sql.Array) {
return (java.sql.Array) columnData[idx];
- }
+ } else {
+ //For the ClickHouse array type, we need the concatenated string
after toString
+ return convertClickHouseArray(columnData[idx]);
+ }
+ }
+
+ private static final Map<Class<?>, Function<Object, String>>
CK_ARRAY_CONVERTERS = new HashMap<>();
+
+ static {
+ CK_ARRAY_CONVERTERS.put(String[].class, res ->
Arrays.toString((String[]) res));
+ CK_ARRAY_CONVERTERS.put(byte[].class, res -> Arrays.toString((byte[])
res));
+ CK_ARRAY_CONVERTERS.put(Byte[].class, res -> Arrays.toString((Byte[])
res));
+ CK_ARRAY_CONVERTERS.put(LocalDate[].class, res ->
Arrays.toString((LocalDate[]) res));
+ CK_ARRAY_CONVERTERS.put(LocalDateTime[].class, res ->
Arrays.toString((LocalDateTime[]) res));
+ CK_ARRAY_CONVERTERS.put(float[].class, res ->
Arrays.toString((float[]) res));
+ CK_ARRAY_CONVERTERS.put(double[].class, res ->
Arrays.toString((double[]) res));
+ CK_ARRAY_CONVERTERS.put(short[].class, res ->
Arrays.toString((short[]) res));
+ CK_ARRAY_CONVERTERS.put(int[].class, res -> Arrays.toString((int[])
res));
+ CK_ARRAY_CONVERTERS.put(long[].class, res -> Arrays.toString((long[])
res));
+ CK_ARRAY_CONVERTERS.put(BigInteger[].class, res ->
Arrays.toString((BigInteger[]) res));
+ CK_ARRAY_CONVERTERS.put(BigDecimal[].class, res ->
Arrays.toString((BigDecimal[]) res));
+ CK_ARRAY_CONVERTERS.put(Inet4Address[].class, res ->
Arrays.toString((Inet4Address[]) res));
+ CK_ARRAY_CONVERTERS.put(Inet6Address[].class, res ->
Arrays.toString((Inet6Address[]) res));
+ CK_ARRAY_CONVERTERS.put(UUID[].class, res -> Arrays.toString((UUID[])
res));
+ }
+
+ public static Object convertClickHouseArray(Object obj) {
+ Function<Object, String> converter =
CK_ARRAY_CONVERTERS.get(obj.getClass());
+ return converter != null ? converter.apply(obj) : obj;
}
private void init(String driverUrl, String sql, int batchSize, String
driverClass, String jdbcUrl, String jdbcUser,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]