(doris-website) branch master updated: [fix](trino-connector) fix wrong properties for trino connector (#732)

morningman Mon, 10 Jun 2024 02:50:44 -0700

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git



The following commit(s) were added to refs/heads/master by this push:
     new 619665c454 [fix](trino-connector) fix wrong properties for trino 
connector (#732)
619665c454 is described below

commit 619665c4540fe9e36222d2faac986c42d317ad97
Author: Mingyu Chen <[email protected]>
AuthorDate: Mon Jun 10 17:50:33 2024 +0800

    [fix](trino-connector) fix wrong properties for trino connector (#732)
---
 .../trino-connector-developer-guide.md             | 49 ++++++++++------------
 docs/lakehouse/datalake-analytics/tpcds.md         |  6 +--
 docs/lakehouse/datalake-analytics/tpch.md          |  6 +--
 .../trino-connector-developer-guide.md             | 49 ++++++++++------------
 .../current/lakehouse/datalake-analytics/tpcds.md  |  8 ++--
 .../current/lakehouse/datalake-analytics/tpch.md   | 10 ++---
 6 files changed, 59 insertions(+), 69 deletions(-)

diff --git a/community/how-to-contribute/trino-connector-developer-guide.md 
b/community/how-to-contribute/trino-connector-developer-guide.md
index 22569b7eb6..9bed03e93b 100644
--- a/community/how-to-contribute/trino-connector-developer-guide.md
+++ b/community/how-to-contribute/trino-connector-developer-guide.md
@@ -83,12 +83,12 @@ After completing the previous two steps, we can use the 
Trino-Connector Catalog
 
     ```sql
     create catalog kafka_tpch properties (
-    "type"="trino-connector",
-    -- The following four properties are derived from trino and are consistent 
with the properties in etc/catalog/kakfa.properties of trino
-    "connector.name"="kafka",
-    
"kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
-    "kafka.nodes"="localhost:9092",
-    "kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
+        "type"="trino-connector",
+        -- The following four properties are derived from trino and are 
consistent with the properties in etc/catalog/kakfa.properties of trino. But 
need to add "trino." prefix
+        "trino.connector.name"="kafka",
+        
"trino.kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
+        "trino.kafka.nodes"="localhost:9092",
+        "trino.kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
     );
     ```
 
@@ -109,27 +109,24 @@ The following are the Doris Trino-Connector catalog 
configuration of several com
     ```sql
     create catalog emr_hive properties (
         "type"="trino-connector",
-
-        "connector.name"="hive",
-        "hive.metastore.uri"="thrift://ip:port",
-        "hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
+        "trino.connector.name"="hive",
+        "trino.hive.metastore.uri"="thrift://ip:port",
+        
"trino.hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
     );
     ```
 
     > Note:
     > - You should add Hadoop's user name in the JVM parameters: 
-DHADOOP_USER_NAME=ftw, which can be configured at the end of the 
JAVA_OPTS_FOR_JDK_17 parameter in the fe.conf / be.conf file, such as 
JAVA_OPTS_FOR_JDK_17="...-DHADOOP_USER_NAME=ftw"
 
-
 2. Mysql
 
     ```sql
     create catalog trino_mysql properties (
         "type"="trino-connector",
-        
-        "connector.name"="mysql",
-        "connection-url" = "jdbc:mysql://ip:port",
-        "connection-user" = "user",
-        "connection-password" = "password"
+        "trino.connector.name"="mysql",
+        "trino.connection-url" = "jdbc:mysql://ip:port",
+        "trino.connection-user" = "user",
+        "trino.connection-password" = "password"
     );
     ```
 
@@ -141,24 +138,22 @@ The following are the Doris Trino-Connector catalog 
configuration of several com
     ```sql
     create catalog kafka properties (
         "type"="trino-connector",
-        
-        "connector.name"="kafka",
-        "kafka.nodes"="localhost:9092",
-        "kafka.table-description-supplier"="CONFLUENT",
-        "kafka.confluent-schema-registry-url"="http://localhost:8081";,
-        "kafka.hide-internal-columns" = "false"
+        "trino.connector.name"="kafka",
+        "trino.kafka.nodes"="localhost:9092",
+        "trino.kafka.table-description-supplier"="CONFLUENT",
+        "trino.kafka.confluent-schema-registry-url"="http://localhost:8081";,
+        "trino.kafka.hide-internal-columns" = "false"
     );
     ```
 
-
 4. BigQuery
 
     ```sql
     create catalog bigquery_catalog properties (
         "type"="trino-connector",
-
-        "connector.name"="bigquery",
-        "bigquery.project-id"="steam-circlet-388406",
-        
"bigquery.credentials-file"="/path/to/application_default_credentials.json"
+        "trino.connector.name"="bigquery",
+        "trino.bigquery.project-id"="steam-circlet-388406",
+        
"trino.bigquery.credentials-file"="/path/to/application_default_credentials.json"
     );
     ```
+
diff --git a/docs/lakehouse/datalake-analytics/tpcds.md 
b/docs/lakehouse/datalake-analytics/tpcds.md
index 4b3144d9bb..a705c79dfe 100644
--- a/docs/lakehouse/datalake-analytics/tpcds.md
+++ b/docs/lakehouse/datalake-analytics/tpcds.md
@@ -66,12 +66,12 @@ After deployment, it is recommended to restart the FE and 
BE nodes to ensure the
 ```sql
 CREATE CATALOG `tpcds` PROPERTIES (
     "type" = "trino-connector",
-    "connector.name" = "tpcds",
-    "tpcds.split-count" = "32"
+    "trino.connector.name" = "tpcds",
+    "trino.tpcds.split-count" = "32"
 );
 ```
 
-The `tpcds.split-count` property sets the level of concurrency. It is 
recommended to set it to twice the number of cores per BE node to achieve 
optimal concurrency and improve data generation efficiency.
+The `trino.tpcds.split-count` property sets the level of concurrency. It is 
recommended to set it to twice the number of cores per BE node to achieve 
optimal concurrency and improve data generation efficiency.
 
 ## Using the TPCDS Catalog
 
diff --git a/docs/lakehouse/datalake-analytics/tpch.md 
b/docs/lakehouse/datalake-analytics/tpch.md
index 252193779f..52fb20f20f 100644
--- a/docs/lakehouse/datalake-analytics/tpch.md
+++ b/docs/lakehouse/datalake-analytics/tpch.md
@@ -66,9 +66,9 @@ After deployment, it is recommended to restart the FE and BE 
nodes to ensure the
 ```sql
 CREATE CATALOG `tpch` PROPERTIES (
     "type" = "trino-connector",
-    "connector.name" = "tpch",
-    "tpch.column-naming" = "STANDARD",
-    "tpch.splits-per-node" = "32"
+    "trino.connector.name" = "tpch",
+    "trino.tpch.column-naming" = "STANDARD",
+    "trino.tpch.splits-per-node" = "32"
 );
 ```
 
diff --git 
a/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
 
b/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
index bb3d5d631a..e543578055 100644
--- 
a/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
+++ 
b/i18n/zh-CN/docusaurus-plugin-content-docs-community/current/how-to-contribute/trino-connector-developer-guide.md
@@ -82,12 +82,12 @@ Trino 没有提供官方编译好的 Connector 插件，所以需要我们自己
 
     ```sql
     create catalog kafka_tpch properties (
-    "type"="trino-connector",
-    -- 下面这四个属性来源于 trino，与 trino 的 etc/catalog/kakfa.properties 中的属性一致。
-    "connector.name"="kafka",
-    
"kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
-    "kafka.nodes"="localhost:9092",
-    "kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
+        "type"="trino-connector",
+        -- 下面这四个属性来源于 trino，与 trino 的 etc/catalog/kakfa.properties 
中的属性一致。但需要统一增加 "trino." 前缀
+        "trino.connector.name"="kafka",
+        
"trino.kafka.table-names"="tpch.customer,tpch.orders,tpch.lineitem,tpch.part,tpch.partsupp,tpch.supplier,tpch.nation,tpch.region",
+        "trino.kafka.nodes"="localhost:9092",
+        "trino.kafka.table-description-dir" = "/mnt/datadisk1/fangtiewei"
     );
     ```
 
@@ -108,27 +108,24 @@ Trino 没有提供官方编译好的 Connector 插件，所以需要我们自己
     ```sql
     create catalog emr_hive properties (
         "type"="trino-connector",
-
-        "connector.name"="hive",
-        "hive.metastore.uri"="thrift://ip:port",
-        "hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
+        "trino.connector.name"="hive",
+        "trino.hive.metastore.uri"="thrift://ip:port",
+        
"trino.hive.config.resources"="/path/to/core-site.xml,/path/to/hdfs-site.xml"
     );
     ```
 
     > 使用 Hive 插件时需要注意：
     > - 需要在 JVM 参数里加上 Hadoop 的用户：-DHADOOP_USER_NAME=ftw，可以配置在 fe.conf / 
be.conf 文件的JAVA_OPTS_FOR_JDK_17 参数末尾，如 
JAVA_OPTS_FOR_JDK_17="...-DHADOOP_USER_NAME=ftw"
 
-
 2. Mysql
 
     ```sql
     create catalog trino_mysql properties (
         "type"="trino-connector",
-        
-        "connector.name"="mysql",
-        "connection-url" = "jdbc:mysql://ip:port",
-        "connection-user" = "user",
-        "connection-password" = "password"
+        "trino.connector.name"="mysql",
+        "trino.connection-url" = "jdbc:mysql://ip:port",
+        "trino.connection-user" = "user",
+        "trino.connection-password" = "password"
     );
     ```
 
@@ -140,24 +137,22 @@ Trino 没有提供官方编译好的 Connector 插件，所以需要我们自己
     ```sql
     create catalog kafka properties (
         "type"="trino-connector",
-        
-        "connector.name"="kafka",
-        "kafka.nodes"="localhost:9092",
-        "kafka.table-description-supplier"="CONFLUENT",
-        "kafka.confluent-schema-registry-url"="http://localhost:8081";,
-        "kafka.hide-internal-columns" = "false"
+        "trino.connector.name"="kafka",
+        "trino.kafka.nodes"="localhost:9092",
+        "trino.kafka.table-description-supplier"="CONFLUENT",
+        "trino.kafka.confluent-schema-registry-url"="http://localhost:8081";,
+        "trino.kafka.hide-internal-columns" = "false"
     );
     ```
 
-
 4. BigQuery
 
     ```sql
     create catalog bigquery_catalog properties (
         "type"="trino-connector",
-
-        "connector.name"="bigquery",
-        "bigquery.project-id"="steam-circlet-388406",
-        
"bigquery.credentials-file"="/path/to/application_default_credentials.json"
+        "trino.connector.name"="bigquery",
+        "trino.bigquery.project-id"="steam-circlet-388406",
+        
"trino.bigquery.credentials-file"="/path/to/application_default_credentials.json"
     );
     ```
+
diff --git 
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
 
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
index 69a0853ef5..2e88c532cc 100644
--- 
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
+++ 
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpcds.md
@@ -66,8 +66,8 @@ mvn clean install -DskipTest
 ```sql
 CREATE CATALOG `tpcds` PROPERTIES (
     "type" = "trino-connector",
-    "connector.name" = "tpcds",
-    "tpcds.split-count" = "32"
+    "trino.connector.name" = "tpcds",
+    "trino.tpcds.split-count" = "32"
 );
 ```
 
@@ -135,7 +135,7 @@ mysql> SHOW TABLES;
 
 通过 SELECT 语句可以直接查询这些表。
 
-:::tips
+:::tip
 这些预制数据集的数据，并没有实际存储，而是在查询时实时生成的。所以这些预制数据集不适合用来直接进行 Benchmark 测试。适用于通过 `INSERT 
INTO SELECT` 将数据集写入到其他目的表（如 Doris 内表、Hive、Iceberg 等所有 Doris 
支持写入的数据源）后，对目的表进行性能测试。
 :::
 
@@ -173,7 +173,7 @@ CREATE TABLE hive.tpcds100.web_sales              
PROPERTIES("file_format" = "pa
 CREATE TABLE hive.tpcds100.web_site               PROPERTIES("file_format" = 
"parquet") AS SELECT * FROM tpcds.sf100.web_site              ;
 ```
 
-:::tips
+:::tip
 在包含 3 个 16C BE 节点的 Doris 集群上，创建一个 TPCDS 1000 的 Hive 数据集，大约需要 3 到 4 个小时。
 :::
 
diff --git 
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
 
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
index 8dc31766fc..d3e0749a79 100644
--- 
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
+++ 
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/datalake-analytics/tpch.md
@@ -66,9 +66,9 @@ mvn clean install -DskipTest
 ```sql
 CREATE CATALOG `tpch` PROPERTIES (
     "type" = "trino-connector",
-    "connector.name" = "tpch",
-    "tpch.column-naming" = "STANDARD",
-    "tpch.splits-per-node" = "32"
+    "trino.connector.name" = "tpch",
+    "trino.tpch.column-naming" = "STANDARD",
+    "trino.tpch.splits-per-node" = "32"
 );
 ```
 
@@ -121,7 +121,7 @@ mysql> SHOW TABLES;
 
 通过 SELECT 语句可以直接查询这些表。
 
-:::tips
+:::tip
 这些预制数据集的数据，并没有实际存储，而是在查询时实时生成的。所以这些预制数据集不适合用来直接进行 Benchmark 测试。适用于通过 `INSERT 
INTO SELECT` 将数据集写入到其他目的表（如 Doris 内表、Hive、Iceberg 等所有 Doris 
支持写入的数据源）后，对目的表进行性能测试。
 :::
 
@@ -142,7 +142,7 @@ CREATE TABLE hive.tpch100.region   PROPERTIES("file_format" 
= "parquet") AS SELE
 CREATE TABLE hive.tpch100.supplier PROPERTIES("file_format" = "parquet") AS 
SELECT * FROM tpch.sf100.supplier  ;
 ```
 
-:::tips
+:::tip
 在包含 3 个 16C BE 节点的 Doris 集群上，创建一个 TPCH 1000 的 Hive 数据集，大约需要 25 分钟，TPCH 10000 
大约需要 4 到 5 个小时。
 :::
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris-website) branch master updated: [fix](trino-connector) fix wrong properties for trino connector (#732)

Reply via email to