This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new b00016d984 [docs] Add missing document for `migrate` related
procedures (#5073)
b00016d984 is described below
commit b00016d984cd52b45658b0e7ed17aa3c9b9a39b0
Author: Yubin Li <[email protected]>
AuthorDate: Mon Feb 17 19:51:29 2025 +0800
[docs] Add missing document for `migrate` related procedures (#5073)
---
docs/content/flink/procedures.md | 73 ++++++++++++++++++++++
docs/content/spark/procedures.md | 6 +-
.../spark/procedure/MigrateDatabaseProcedure.java | 2 +-
3 files changed, 77 insertions(+), 4 deletions(-)
diff --git a/docs/content/flink/procedures.md b/docs/content/flink/procedures.md
index 67a4290fb9..db364ff0e3 100644
--- a/docs/content/flink/procedures.md
+++ b/docs/content/flink/procedures.md
@@ -505,6 +505,79 @@ All available procedures are listed below.
CALL sys.purge_files(`table` => 'default.T')
</td>
</tr>
+ <tr>
+ <td>migrate_database</td>
+ <td>
+ -- for Flink 1.18<br/>
+ -- migrate all hive tables in database to paimon tables.<br/>
+ CALL [catalog].sys.migrate_database('connector', 'dbIdentifier',
'options'[, <parallelism>])<br/><br/>
+ -- for Flink 1.19 and later<br/>
+ -- migrate all hive tables in database to paimon tables.<br/>
+ CALL [catalog].sys.migrate_database(connector => 'connector',
source_database => 'dbIdentifier', options => 'options'[, <parallelism =>
parallelism>])<br/><br/>
+ </td>
+ <td>
+ To migrate all hive tables in database to paimon table. Argument:
+ <li>connector: the origin database's type to be migrated, such as
hive. Cannot be empty.</li>
+ <li>source_database: name of the origin database to be migrated.
Cannot be empty.</li>
+ <li>options: the table options of the paimon table to migrate.</li>
+ <li>parallelism: the parallelism for migrate process, default is
core numbers of machine.</li>
+ </td>
+ <td>
+ -- for Flink 1.18<br/>
+ CALL sys.migrate_database('hive', 'db01', 'file.format=parquet', 6)
+ -- for Flink 1.19 and later<br/>
+ CALL sys.migrate_database(connector => 'hive', source_database =>
'db01', options => 'file.format=parquet', parallelism => 6)
+ </td>
+ </tr>
+ <tr>
+ <td>migrate_table</td>
+ <td>
+ -- for Flink 1.18<br/>
+ -- migrate hive table to a paimon table.<br/>
+ CALL [catalog].sys.migrate_table('connector', 'tableIdentifier',
'options'[, <parallelism>])<br/><br/>
+ -- for Flink 1.19 and later<br/>
+ -- migrate hive table to a paimon table.<br/>
+ CALL [catalog].sys.migrate_table(connector => 'connector',
source_table => 'tableIdentifier', options => 'options'[, <parallelism =>
parallelism>])<br/><br/>
+ </td>
+ <td>
+ To migrate hive table to a paimon table. Argument:
+ <li>connector: the origin table's type to be migrated, such as
hive. Cannot be empty.</li>
+ <li>source_table: name of the origin table to be migrated. Cannot
be empty.</li>
+ <li>options: the table options of the paimon table to migrate.</li>
+ <li>parallelism: the parallelism for migrate process, default is
core numbers of machine.</li>
+ </td>
+ <td>
+ -- for Flink 1.18<br/>
+ CALL sys.migrate_table('hive', 'db01.t1', 'file.format=parquet', 6)
+ -- for Flink 1.19 and later<br/>
+ CALL sys.migrate_table(connector => 'hive', source_table =>
'db01.t1', options => 'file.format=parquet', parallelism => 6)
+ </td>
+ </tr>
+ <tr>
+ <td>migrate_file</td>
+ <td>
+ -- for Flink 1.18<br/>
+ -- migrate files from hive table to a paimon table.<br/>
+ CALL [catalog].sys.migrate_file('connector', 'srcTableIdentifier',
'destTableIdentifier', [, <delete_origin>, <parallelism>])<br/><br/>
+ -- for Flink 1.19 and later<br/>
+ -- migrate hive table to a paimon table.<br/>
+ CALL [catalog].sys.migrate_file(connector => 'connector',
source_table => 'srcTableIdentifier', target_table => 'destTableIdentifier'[,
<delete_origin => bool>, <parallelism => parallelism>])<br/><br/>
+ </td>
+ <td>
+ To migrate files from hive table to a paimon table. Argument:
+ <li>connector: the origin table's type to be migrated, such as
hive. Cannot be empty.</li>
+ <li>source_table: name of the origin table to migrate. Cannot be
empty.</li>
+ <li>target_table: name of the target table to be migrated. Cannot
be empty.</li>
+ <li>delete_origin: If had set target_table, can set delete_origin
to decide whether delete the origin table metadata from hms after migrate.
Default is true</li>
+ <li>parallelism: the parallelism for migrate process, default is
core numbers of machine.</li>
+ </td>
+ <td>
+ -- for Flink 1.18<br/>
+ CALL sys.migrate_file('hive', 'default.T', 'default.T2', true, 6)
+ -- for Flink 1.19 and later<br/>
+ CALL sys.migrate_file(connector => 'hive', source_table =>
'default.T', target_table => 'default.T2', delete_origin => true, parallelism
=> 6)
+ </td>
+ </tr>
<tr>
<td>expire_snapshots</td>
<td>
diff --git a/docs/content/spark/procedures.md b/docs/content/spark/procedures.md
index 805778b980..0dd009005b 100644
--- a/docs/content/spark/procedures.md
+++ b/docs/content/spark/procedures.md
@@ -207,8 +207,8 @@ This section introduce all available spark procedures about
paimon.
<tr>
<td>migrate_database</td>
<td>
- Migrate hive table to a paimon table. Arguments:
- <li>source_type: the origin table's type to be migrated, such as
hive. Cannot be empty.</li>
+ Migrate all hive tables in database to paimon tables. Arguments:
+ <li>source_type: the origin database's type to be migrated, such
as hive. Cannot be empty.</li>
<li>database: name of the origin database to be migrated. Cannot
be empty.</li>
<li>options: the table options of the paimon table to migrate.</li>
<li>options_map: Options map for adding key-value options which is
a map.</li>
@@ -240,7 +240,7 @@ This section introduce all available spark procedures about
paimon.
<li>delete_origin: If had set target_table, can set delete_origin
to decide whether delete the origin table metadata from hms after migrate.
Default is true</li>
<li>parallelism: the parallelism for migrate process, default is
core numbers of machine.</li>
</td>
- <td>CALL sys.migrate_file(source_type => 'hive', table => 'default.T',
delete_origin => true, parallelism => 6)</td>
+ <td>CALL sys.migrate_file(connector => 'hive', source_table =>
'default.hivetable', target_table => 'default.paimontable', delete_origin =>
true, parallelism => 6)</td>
</tr>
<tr>
<td>remove_orphan_files</td>
diff --git
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/procedure/MigrateDatabaseProcedure.java
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/procedure/MigrateDatabaseProcedure.java
index a9509fdf36..0989c9d790 100644
---
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/procedure/MigrateDatabaseProcedure.java
+++
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/procedure/MigrateDatabaseProcedure.java
@@ -112,7 +112,7 @@ public class MigrateDatabaseProcedure extends BaseProcedure
{
successCount++;
} catch (Exception e) {
errorCount++;
- LOG.error("Call migrate_database error:" + e.getMessage());
+ LOG.error("Call migrate_database error:{}", e.getMessage());
}
}
String retStr =