This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push:
new 619665c454 [fix](trino-connector) fix wrong properties for trino
connector (#732)
619665c454 is described below
commit 619665c4540fe9e36222d2faac986c42d317ad97
Author: Mingyu Chen <[email protected]>
AuthorDate: Mon Jun 10 17:50:33 2024 +0800
[fix](trino-connector) fix wrong properties for trino connector (#732)
---
.../trino-connector-developer-guide.md | 49 ++++++++++------------
docs/lakehouse/datalake-analytics/tpcds.md | 6 +--
docs/lakehouse/datalake-analytics/tpch.md | 6 +--
.../trino-connector-developer-guide.md | 49 ++++++++++------------
.../current/lakehouse/datalake-analytics/tpcds.md | 8 ++--
.../current/lakehouse/datalake-analytics/tpch.md | 10 ++---
6 files changed, 59 insertions(+), 69 deletions(-)
diff --git a/community/how-to-contribute/trino-connector-developer-guide.md
b/community/how-to-contribute/trino-connector-developer-guide.md
index 22569b7eb6..9bed03e93b 100644
--- a/community/how-to-contribute/trino-connector-developer-guide.md
+++ b/community/how-to-contribute/trino-connector-developer-guide.md
@@ -83,12 +83,12 @@ After completing the previous two steps, we can use the
Trino-Connector Catalog
```sql
create catalog kafka_tpch properties (
- "type"="trino-connector",
- -- The following four properties are derived from trino and are consistent
with the properties in etc/catalog/kakfa.properties of trino
- "connector.name"="kafka",
-
"kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
- "kafka.nodes"="localhost:9092",
- "kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
+ "type"="trino-connector",
+ -- The following four properties are derived from trino and are
consistent with the properties in etc/catalog/kakfa.properties of trino. But
need to add "trino." prefix
+ "trino.connector.name"="kafka",
+
"trino.kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
+ "trino.kafka.nodes"="localhost:9092",
+ "trino.kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
);
```
@@ -109,27 +109,24 @@ The following are the Doris Trino-Connector catalog
configuration of several com
```sql
create catalog emr_hive properties (
"type"="trino-connector",
-
- "connector.name"="hive",
- "hive.metastore.uri"="thrift://ip:port",
- "hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
+ "trino.connector.name"="hive",
+ "trino.hive.metastore.uri"="thrift://ip:port",
+
"trino.hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
);
```
> Note:
> - You should add Hadoop's user name in the JVM parameters:
-DHADOOP_USER_NAME=ftw, which can be configured at the end of the
JAVA_OPTS_FOR_JDK_17 parameter in the fe.conf / be.conf file, such as
JAVA_OPTS_FOR_JDK_17="...-DHADOOP_USER_NAME=ftw"
-
2. Mysql
```sql
create catalog trino_mysql properties (
"type"="trino-connector",
-
- "connector.name"="mysql",
- "connection-url" = "jdbc:mysql://ip:port",
- "connection-user" = "user",
- "connection-password" = "password"
+ "trino.connector.name"="mysql",
+ "trino.connection-url" = "jdbc:mysql://ip:port",
+ "trino.connection-user" = "user",
+ "trino.connection-password" = "password"
);
```
@@ -141,24 +138,22 @@ The following are the Doris Trino-Connector catalog
configuration of several com
```sql
create catalog kafka properties (
"type"="trino-connector",
-
- "connector.name"="kafka",
- "kafka.nodes"="localhost:9092",
- "kafka.table-description-supplier"="CONFLUENT",
- "kafka.confluent-schema-registry-url"="http://localhost:8081",
- "kafka.hide-internal-columns" = "false"
+ "trino.connector.name"="kafka",
+ "trino.kafka.nodes"="localhost:9092",
+ "trino.kafka.table-description-supplier"="CONFLUENT",
+ "trino.kafka.confluent-schema-registry-url"="http://localhost:8081",
+ "trino.kafka.hide-internal-columns" = "false"
);
```
-
4. BigQuery
```sql
create catalog bigquery_catalog properties (
"type"="trino-connector",
-
- "connector.name"="bigquery",
- "bigquery.project-id"="steam-circlet-388406",
-
"bigquery.credentials-file"="/path/to/application_default_credentials.json"
+ "trino.connector.name"="bigquery",
+ "trino.bigquery.project-id"="steam-circlet-388406",
+
"trino.bigquery.credentials-file"="/path/to/application_default_credentials.json"
);
```
+
diff --git a/docs/lakehouse/datalake-analytics/tpcds.md
b/docs/lakehouse/datalake-analytics/tpcds.md
index 4b3144d9bb..a705c79dfe 100644
--- a/docs/lakehouse/datalake-analytics/tpcds.md
+++ b/docs/lakehouse/datalake-analytics/tpcds.md
@@ -66,12 +66,12 @@ After deployment, it is recommended to restart the FE and
BE nodes to ensure the
```sql
CREATE CATALOG `tpcds` PROPERTIES (
"type" = "trino-connector",
- "connector.name" = "tpcds",
- "tpcds.split-count" = "32"
+ "trino.connector.name" = "tpcds",
+ "trino.tpcds.split-count" = "32"
);
```
-The `tpcds.split-count` property sets the level of concurrency. It is
recommended to set it to twice the number of cores per BE node to achieve
optimal concurrency and improve data generation efficiency.
+The `trino.tpcds.split-count` property sets the level of concurrency. It is
recommended to set it to twice the number of cores per BE node to achieve
optimal concurrency and improve data generation efficiency.
## Using the TPCDS Catalog
diff --git a/docs/lakehouse/datalake-analytics/tpch.md
b/docs/lakehouse/datalake-analytics/tpch.md
index 252193779f..52fb20f20f 100644
--- a/docs/lakehouse/datalake-analytics/tpch.md
+++ b/docs/lakehouse/datalake-analytics/tpch.md
@@ -66,9 +66,9 @@ After deployment, it is recommended to restart the FE and BE
nodes to ensure the
```sql
CREATE CATALOG `tpch` PROPERTIES (
"type" = "trino-connector",
- "connector.name" = "tpch",
- "tpch.column-naming" = "STANDARD",
- "tpch.splits-per-node" = "32"
+ "trino.connector.name" = "tpch",
+ "trino.tpch.column-naming" = "STANDARD",
+ "trino.tpch.splits-per-node" = "32"
);
```
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
b/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
index bb3d5d631a..e543578055 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
@@ -82,12 +82,12 @@ Trino 没有提供官方编译好的 Connector 插件,所以需要我们自己
```sql
create catalog kafka_tpch properties (
- "type"="trino-connector",
- -- 下面这四个属性来源于 trino,与 trino 的 etc/catalog/kakfa.properties 中的属性一致。
- "connector.name"="kafka",
-
"kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
- "kafka.nodes"="localhost:9092",
- "kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
+ "type"="trino-connector",
+ -- 下面这四个属性来源于 trino,与 trino 的 etc/catalog/kakfa.properties
中的属性一致。但需要统一增加 "trino." 前缀
+ "trino.connector.name"="kafka",
+
"trino.kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
+ "trino.kafka.nodes"="localhost:9092",
+ "trino.kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
);
```
@@ -108,27 +108,24 @@ Trino 没有提供官方编译好的 Connector 插件,所以需要我们自己
```sql
create catalog emr_hive properties (
"type"="trino-connector",
-
- "connector.name"="hive",
- "hive.metastore.uri"="thrift://ip:port",
- "hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
+ "trino.connector.name"="hive",
+ "trino.hive.metastore.uri"="thrift://ip:port",
+
"trino.hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
);
```
> 使用 Hive 插件时需要注意:
> - 需要在 JVM 参数里加上 Hadoop 的用户:-DHADOOP_USER_NAME=ftw,可以配置在 fe.conf /
be.conf 文件的JAVA_OPTS_FOR_JDK_17 参数末尾,如
JAVA_OPTS_FOR_JDK_17="...-DHADOOP_USER_NAME=ftw"
-
2. Mysql
```sql
create catalog trino_mysql properties (
"type"="trino-connector",
-
- "connector.name"="mysql",
- "connection-url" = "jdbc:mysql://ip:port",
- "connection-user" = "user",
- "connection-password" = "password"
+ "trino.connector.name"="mysql",
+ "trino.connection-url" = "jdbc:mysql://ip:port",
+ "trino.connection-user" = "user",
+ "trino.connection-password" = "password"
);
```
@@ -140,24 +137,22 @@ Trino 没有提供官方编译好的 Connector 插件,所以需要我们自己
```sql
create catalog kafka properties (
"type"="trino-connector",
-
- "connector.name"="kafka",
- "kafka.nodes"="localhost:9092",
- "kafka.table-description-supplier"="CONFLUENT",
- "kafka.confluent-schema-registry-url"="http://localhost:8081",
- "kafka.hide-internal-columns" = "false"
+ "trino.connector.name"="kafka",
+ "trino.kafka.nodes"="localhost:9092",
+ "trino.kafka.table-description-supplier"="CONFLUENT",
+ "trino.kafka.confluent-schema-registry-url"="http://localhost:8081",
+ "trino.kafka.hide-internal-columns" = "false"
);
```
-
4. BigQuery
```sql
create catalog bigquery_catalog properties (
"type"="trino-connector",
-
- "connector.name"="bigquery",
- "bigquery.project-id"="steam-circlet-388406",
-
"bigquery.credentials-file"="/path/to/application_default_credentials.json"
+ "trino.connector.name"="bigquery",
+ "trino.bigquery.project-id"="steam-circlet-388406",
+
"trino.bigquery.credentials-file"="/path/to/application_default_credentials.json"
);
```
+
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
index 69a0853ef5..2e88c532cc 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
@@ -66,8 +66,8 @@ mvn clean install -DskipTest
```sql
CREATE CATALOG `tpcds` PROPERTIES (
"type" = "trino-connector",
- "connector.name" = "tpcds",
- "tpcds.split-count" = "32"
+ "trino.connector.name" = "tpcds",
+ "trino.tpcds.split-count" = "32"
);
```
@@ -135,7 +135,7 @@ mysql> SHOW TABLES;
通过 SELECT 语句可以直接查询这些表。
-:::tips
+:::tip
这些预制数据集的数据,并没有实际存储,而是在查询时实时生成的。所以这些预制数据集不适合用来直接进行 Benchmark 测试。适用于通过 `INSERT
INTO SELECT` 将数据集写入到其他目的表(如 Doris 内表、Hive、Iceberg 等所有 Doris
支持写入的数据源)后,对目的表进行性能测试。
:::
@@ -173,7 +173,7 @@ CREATE TABLE hive.tpcds100.web_sales
PROPERTIES("file_format" = "pa
CREATE TABLE hive.tpcds100.web_site PROPERTIES("file_format" =
"parquet") AS SELECT * FROM tpcds.sf100.web_site ;
```
-:::tips
+:::tip
在包含 3 个 16C BE 节点的 Doris 集群上,创建一个 TPCDS 1000 的 Hive 数据集,大约需要 3 到 4 个小时。
:::
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
index 8dc31766fc..d3e0749a79 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
@@ -66,9 +66,9 @@ mvn clean install -DskipTest
```sql
CREATE CATALOG `tpch` PROPERTIES (
"type" = "trino-connector",
- "connector.name" = "tpch",
- "tpch.column-naming" = "STANDARD",
- "tpch.splits-per-node" = "32"
+ "trino.connector.name" = "tpch",
+ "trino.tpch.column-naming" = "STANDARD",
+ "trino.tpch.splits-per-node" = "32"
);
```
@@ -121,7 +121,7 @@ mysql> SHOW TABLES;
通过 SELECT 语句可以直接查询这些表。
-:::tips
+:::tip
这些预制数据集的数据,并没有实际存储,而是在查询时实时生成的。所以这些预制数据集不适合用来直接进行 Benchmark 测试。适用于通过 `INSERT
INTO SELECT` 将数据集写入到其他目的表(如 Doris 内表、Hive、Iceberg 等所有 Doris
支持写入的数据源)后,对目的表进行性能测试。
:::
@@ -142,7 +142,7 @@ CREATE TABLE hive.tpch100.region PROPERTIES("file_format"
= "parquet") AS SELE
CREATE TABLE hive.tpch100.supplier PROPERTIES("file_format" = "parquet") AS
SELECT * FROM tpch.sf100.supplier ;
```
-:::tips
+:::tip
在包含 3 个 16C BE 节点的 Doris 集群上,创建一个 TPCH 1000 的 Hive 数据集,大约需要 25 分钟,TPCH 10000
大约需要 4 到 5 个小时。
:::
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]