This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 210f869bd [hive] Recommend creating table without location in hive
(#1081)
210f869bd is described below
commit 210f869bd940c0669e685ade7da9984e0ddc8c39
Author: Jingsong Lee <[email protected]>
AuthorDate: Sat May 6 15:23:57 2023 +0800
[hive] Recommend creating table without location in hive (#1081)
---
docs/content/engines/hive.md | 121 ++++++++-------------
docs/content/how-to/creating-tables.md | 33 +++---
.../org/apache/paimon/catalog/AbstractCatalog.java | 25 +++--
.../java/org/apache/paimon/hive/HiveSchema.java | 35 +++---
.../org/apache/paimon/hive/PaimonMetaHook.java | 19 +++-
.../org/apache/paimon/hive/CreateTableITCase.java | 4 +-
.../apache/paimon/hive/HiveCatalogITCaseBase.java | 8 +-
7 files changed, 118 insertions(+), 127 deletions(-)
diff --git a/docs/content/engines/hive.md b/docs/content/engines/hive.md
index 1234f0460..44606cb50 100644
--- a/docs/content/engines/hive.md
+++ b/docs/content/engines/hive.md
@@ -80,7 +80,7 @@ There are several ways to add this jar to Hive.
NOTE: If you are using HDFS, make sure that the environment variable
`HADOOP_HOME` or `HADOOP_CONF_DIR` is set.
-## Quick Start with Paimon Hive Catalog
+## Flink SQL: with Paimon Hive Catalog
By using paimon Hive catalog, you can create, drop, select and insert into
paimon tables from Flink. These operations directly affect the corresponding
Hive metastore. Tables created in this way can also be accessed directly from
Hive.
@@ -132,53 +132,23 @@ SELECT * FROM test_table;
*/
```
-Select the paimon table created in hive from flink. [creating a table with
Hive SQL]({{< ref "engines/hive#quick-start-with-hive-table" >}}).
+## Hive SQL: access Paimon Tables already in Hive metastore
-```sql
--- Flink SQL CLI
--- Define paimon Hive catalog
-
-CREATE CATALOG my_hive WITH (
- 'type' = 'paimon',
- 'metastore' = 'hive',
- 'uri' = 'thrift://<hive-metastore-host-name>:<port>',
- 'warehouse' = '/path/to/table/store/warehouse'
-);
+Run the following Hive SQL in Hive CLI to access the created table.
--- Use paimon Hive catalog
-
-USE CATALOG my_hive;
-
--- Read a table in paimon Hive catalog (use "default" database by default)
--- Read records from hive_test_table
+```sql
+-- Assume that paimon-hive-connector-<hive-version>-{{< version >}}.jar is
already in auxlib directory.
+-- List tables in Hive
+-- (you might need to switch to "default" database if you're not there by
default)
-SELECT * FROM hive_test_table;
+SHOW TABLES;
/*
-+---+--------+
-| a | b |
-+---+--------+
-| 1 | Paimon |
-+---+--------+
+OK
+test_table
*/
-```
-
-## Quick Start with Hive Table
-
-* To access existing paimon table, you can also register them as external and
internal tables in Hive. Run the following Hive SQL in Hive CLI.
-
-```sql
--- Assume that paimon-hive-connector-{{< version >}}.jar is already in auxlib
directory.
--- Let's use the test_table created in the above section.
--- To create an external table, you don't need to specify any column or table
properties.
--- Pointing the location to the path of table is enough.
-
-CREATE EXTERNAL TABLE external_test_table
-STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
-LOCATION '/path/to/table/store/warehouse/default.db/test_table';
-
--- Read records from external_test_table
+-- Read records from test_table
SELECT a, b FROM test_table ORDER BY a;
@@ -200,60 +170,61 @@ OK
2 Store
3 Paimon
*/
--- Insert records into test table from other table
-
-INSERT INTO test_table SELECT a, b FROM test_table;
-
-SELECT a, b FROM test_table ORDER BY a;
-
-/*
-OK
-1 Table
-1 Table
-2 Store
-2 Store
-3 Paimon
-3 Paimon
-*/
-
```
-* To create paimon tables that do not exist, you can create them as external
and internal tables in Hive. Run the following Hive SQL in Hive CLI.
+## Hive SQL: create new Paimon Tables
+
+You can create new paimon tables in Hive. Run the following Hive SQL in Hive
CLI.
```sql
-- Assume that paimon-hive-connector-{{< version >}}.jar is already in auxlib
directory.
--- Let's create a new external table that doesn't exist in paimon.
--- Need to specify the location to the path of table.
+-- Let's create a new paimon table.
+
+SET hive.metastore.warehouse.dir=warehouse_path;
-CREATE EXTERNAL TABLE hive_test_table(
+CREATE TABLE hive_test_table(
a INT COMMENT 'The a field',
b STRING COMMENT 'The b field'
)
STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
-LOCATION '/path/to/table/store/warehouse/default.db/hive_test_table';
+```
--- Insert records into hive_test_table
+## Hive SQL: access Paimon Tables by External Table
-INSERT INTO hive_test_table VALUES (1, 'Paimon');
+To access existing paimon table, you can also register them as external tables
in Hive. Run the following Hive SQL in Hive CLI.
--- Read records from hive_test_table
+```sql
+-- Assume that paimon-hive-connector-{{< version >}}.jar is already in auxlib
directory.
+-- Let's use the test_table created in the above section.
+-- To create an external table, you don't need to specify any column or table
properties.
+-- Pointing the location to the path of table is enough.
-SELECT a, b FROM hive_test_table;
+CREATE EXTERNAL TABLE external_test_table
+STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
+LOCATION '/path/to/table/store/warehouse/default.db/test_table';
+
+-- Read records from external_test_table
+
+SELECT a, b FROM external_test_table ORDER BY a;
/*
OK
-1 Paimon
+1 Table
+2 Store
*/
--- Let's create a new internal table that doesn't exist in paimon.
--- Need to specify the location to the path of table.
+-- Insert records into test table
-CREATE TABLE hive_internal_test_table(
- a INT COMMENT 'The a field',
- b STRING COMMENT 'The b field'
-)
-STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
-LOCATION '/path/to/table/store/warehouse/default.db/hive_internal_test_table';
+INSERT INTO external_test_table VALUES (3, 'Paimon');
+
+SELECT a, b FROM external_test_table ORDER BY a;
+
+/*
+OK
+1 Table
+2 Store
+3 Paimon
+*/
```
diff --git a/docs/content/how-to/creating-tables.md
b/docs/content/how-to/creating-tables.md
index a3391537b..95a5a6f7e 100644
--- a/docs/content/how-to/creating-tables.md
+++ b/docs/content/how-to/creating-tables.md
@@ -68,13 +68,17 @@ CREATE TABLE MyTable (
{{< tab "Hive" >}}
```sql
+SET hive.metastore.warehouse.dir=warehouse_path;
+
CREATE TABLE MyTable (
user_id BIGINT,
item_id BIGINT,
behavior STRING,
dt STRING,
hh STRING
-) TBLPROPERTIES (
+)
+STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
+TBLPROPERTIES (
'primary-key' = 'dt,hh,user_id'
);
```
@@ -127,13 +131,17 @@ CREATE TABLE MyTable (
{{< tab "Hive" >}}
```sql
+SET hive.metastore.warehouse.dir=warehouse_path;
+
CREATE TABLE MyTable (
user_id BIGINT,
item_id BIGINT,
behavior STRING,
dt STRING,
hh STRING
-) TBLPROPERTIES (
+)
+STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
+TBLPROPERTIES (
'primary-key' = 'dt,hh,user_id',
'partition'='dt,hh'
);
@@ -351,7 +359,9 @@ CREATE TABLE MyTable (
behavior STRING,
dt STRING,
hh STRING
-) TBLPROPERTIES (
+)
+STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
+TBLPROPERTIES (
'primary-key' = 'dt,hh,user_id',
'partition'='dt,hh',
'bucket' = '2',
@@ -415,23 +425,14 @@ val dataset =
spark.read.format("paimon").load("hdfs://path/to/table")
{{< tab "Hive" >}}
-* To access existing paimon table, you can also register them as external and
internal tables in Hive. The following SQL creates an external table named
`my_table`, where the base path of table files is `hdfs://path/to/table`. As
schemas are stored in table files, users do not need to write column
definitions.
+To access existing paimon table, you can also register them as external tables
in Hive. The following SQL creates an
+external table named `my_table`, where the base path of table files is
`hdfs://path/to/table`. As schemas are stored
+in table files, users do not need to write column definitions.
```sql
CREATE EXTERNAL TABLE my_table
STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
-LOCATION 'hdfs://path/to/table';
-```
-
-* To create paimon tables that do not exist, you can create them as external
and internal tables in Hive. The following SQL creates an external table named
`my_table`, where the base path of table files is `hdfs://path/to/table`.
-
-```sql
-CREATE EXTERNAL TABLE my_table(
- a INT COMMENT 'The a field',
- b STRING COMMENT 'The b field'
-)
-STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
-LOCATION 'hdfs://path/to/table';
+LOCATION 'hdfs:///path/to/table';
```
{{< /tab >}}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/catalog/AbstractCatalog.java
b/paimon-core/src/main/java/org/apache/paimon/catalog/AbstractCatalog.java
index 9e3d25b43..fb81cb0b6 100644
--- a/paimon-core/src/main/java/org/apache/paimon/catalog/AbstractCatalog.java
+++ b/paimon-core/src/main/java/org/apache/paimon/catalog/AbstractCatalog.java
@@ -85,7 +85,7 @@ public abstract class AbstractCatalog implements Catalog {
@VisibleForTesting
public Path databasePath(String database) {
- return new Path(warehouse(), database + DB_SUFFIX);
+ return databasePath(warehouse(), database);
}
protected abstract String warehouse();
@@ -95,13 +95,7 @@ public abstract class AbstractCatalog implements Catalog {
@VisibleForTesting
public Path getDataTableLocation(Identifier identifier) {
- if (identifier.getObjectName().contains(SYSTEM_TABLE_SPLITTER)) {
- throw new IllegalArgumentException(
- String.format(
- "Table name[%s] cannot contain '%s' separator",
- identifier.getObjectName(),
SYSTEM_TABLE_SPLITTER));
- }
- return new Path(databasePath(identifier.getDatabaseName()),
identifier.getObjectName());
+ return dataTableLocation(warehouse(), identifier);
}
private boolean isSystemTable(Identifier identifier) {
@@ -130,4 +124,19 @@ public abstract class AbstractCatalog implements Catalog {
}
return splits;
}
+
+ public static Path dataTableLocation(String warehouse, Identifier
identifier) {
+ if (identifier.getObjectName().contains(SYSTEM_TABLE_SPLITTER)) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Table name[%s] cannot contain '%s' separator",
+ identifier.getObjectName(),
SYSTEM_TABLE_SPLITTER));
+ }
+ return new Path(
+ databasePath(warehouse, identifier.getDatabaseName()),
identifier.getObjectName());
+ }
+
+ public static Path databasePath(String warehouse, String database) {
+ return new Path(warehouse, database + DB_SUFFIX);
+ }
}
diff --git
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
index dba45389a..063a33916 100644
---
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
+++
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
@@ -90,24 +90,7 @@ public class HiveSchema {
/** Extract {@link HiveSchema} from Hive serde properties. */
public static HiveSchema extract(@Nullable Configuration configuration,
Properties properties) {
String location =
properties.getProperty(hive_metastoreConstants.META_TABLE_LOCATION);
- if (location == null) {
- String tableName =
properties.getProperty(hive_metastoreConstants.META_TABLE_NAME);
- throw new UnsupportedOperationException(
- "Location property is missing for table "
- + tableName
- + ". Currently Paimon only supports hive table
location property must be set.");
- }
- Path path = new Path(location);
- Options options = PaimonJobConf.extractCatalogConfig(configuration);
- options.set(CoreOptions.PATH, location);
- CatalogContext context = CatalogContext.create(options, configuration);
- Optional<TableSchema> tableSchema;
- try {
- tableSchema = new SchemaManager(FileIO.get(path, context),
path).latest();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
-
+ Optional<TableSchema> tableSchema = getExistsSchema(configuration,
location);
String columnProperty =
properties.getProperty(serdeConstants.LIST_COLUMNS);
// Create hive external table with empty ddl
if (StringUtils.isEmpty(columnProperty)) {
@@ -153,6 +136,22 @@ public class HiveSchema {
return new HiveSchema(builder.build());
}
+ private static Optional<TableSchema> getExistsSchema(
+ @Nullable Configuration configuration, @Nullable String location) {
+ if (location == null) {
+ return Optional.empty();
+ }
+ Path path = new Path(location);
+ Options options = PaimonJobConf.extractCatalogConfig(configuration);
+ options.set(CoreOptions.PATH, location);
+ CatalogContext context = CatalogContext.create(options, configuration);
+ try {
+ return new SchemaManager(FileIO.get(path, context), path).latest();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
private static void checkSchemaMatched(
List<String> names, List<TypeInfo> typeInfos, TableSchema
tableSchema) {
List<String> ddlNames = new ArrayList<>(names);
diff --git
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonMetaHook.java
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonMetaHook.java
index 10bbc4483..31a675a6c 100644
---
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonMetaHook.java
+++
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonMetaHook.java
@@ -19,7 +19,9 @@
package org.apache.paimon.hive;
import org.apache.paimon.CoreOptions;
+import org.apache.paimon.catalog.AbstractCatalog;
import org.apache.paimon.catalog.CatalogContext;
+import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.hive.mapred.PaimonInputFormat;
@@ -30,6 +32,7 @@ import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.schema.TableSchema;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -49,7 +52,9 @@ import static
org.apache.paimon.hive.HiveTypeUtils.typeInfoToLogicalType;
* formats.
*/
public class PaimonMetaHook implements HiveMetaHook {
+
private static final Logger LOG =
LoggerFactory.getLogger(PaimonMetaHook.class);
+
private static final String COMMENT = "comment";
private final Configuration conf;
@@ -62,8 +67,8 @@ public class PaimonMetaHook implements HiveMetaHook {
if (table.getPartitionKeysSize() != 0) {
throw new MetaException(
"Paimon currently does not support creating partitioned
table "
- + "with PARTITIONED BY clause. If you want to
query from a partitioned table, "
- + "please add partition columns into the ordinary
table columns.");
+ + "with PARTITIONED BY clause. If you want to
create a partitioned table, "
+ + "please set partition fields in properties.");
}
// hive ql parse cannot recognize input near '$' in table name, no
need to add paimon system
@@ -72,7 +77,15 @@ public class PaimonMetaHook implements HiveMetaHook {
table.getSd().setInputFormat(PaimonInputFormat.class.getCanonicalName());
table.getSd().setOutputFormat(PaimonOutputFormat.class.getCanonicalName());
- Path path = new Path(table.getSd().getLocation());
+ String location = table.getSd().getLocation();
+ if (location == null) {
+ String warehouse =
conf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname);
+ Identifier identifier = Identifier.create(table.getDbName(),
table.getTableName());
+ location = AbstractCatalog.dataTableLocation(warehouse,
identifier).toUri().toString();
+ table.getSd().setLocation(location);
+ }
+
+ Path path = new Path(location);
CatalogContext context = catalogContext(table);
FileIO fileIO;
try {
diff --git
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/CreateTableITCase.java
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/CreateTableITCase.java
index 37f2dfc8d..a831dccca 100644
---
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/CreateTableITCase.java
+++
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/CreateTableITCase.java
@@ -130,8 +130,8 @@ public class CreateTableITCase extends HiveTestBase {
.hasRootCauseInstanceOf(MetaException.class)
.hasRootCauseMessage(
"Paimon currently does not support creating
partitioned table "
- + "with PARTITIONED BY clause. If you want to
query from a partitioned table, "
- + "please add partition columns into the
ordinary table columns.");
+ + "with PARTITIONED BY clause. If you want to
create a partitioned table, "
+ + "please set partition fields in
properties.");
}
@Test
diff --git
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveCatalogITCaseBase.java
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveCatalogITCaseBase.java
index 530f856c7..a39bdf2f9 100644
---
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveCatalogITCaseBase.java
+++
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveCatalogITCaseBase.java
@@ -266,14 +266,12 @@ public abstract class HiveCatalogITCaseBase {
@Test
public void testHiveCreateAndFlinkRead() throws Exception {
+ hiveShell.execute("SET hive.metastore.warehouse.dir=" + path);
hiveShell.execute(
- "CREATE EXTERNAL TABLE hive_test_table ( a INT, b STRING ) "
+ "CREATE TABLE hive_test_table ( a INT, b STRING ) "
+ "STORED BY '"
+ PaimonStorageHandler.class.getName()
- + "'"
- + "LOCATION '"
- + path
- + "/test_db.db/hive_test_table'");
+ + "'");
hiveShell.execute("INSERT INTO hive_test_table VALUES (1, 'Apache'),
(2, 'Paimon')");
List<Row> actual = collect("SELECT * FROM hive_test_table");
Assertions.assertThat(actual).contains(Row.of(1, "Apache"), Row.of(2,
"Paimon"));