JingsongLi commented on a change in pull request #1393:
URL: https://github.com/apache/iceberg/pull/1393#discussion_r479866703
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkCatalog.java
##########
@@ -320,19 +339,167 @@ public void renameTable(ObjectPath tablePath, String
newTableName, boolean ignor
}
}
- /**
- * TODO Add partitioning to the Flink DDL parser.
- */
@Override
public void createTable(ObjectPath tablePath, CatalogBaseTable table,
boolean ignoreIfExists)
- throws CatalogException {
- throw new UnsupportedOperationException("Not support createTable now.");
+ throws CatalogException, TableAlreadyExistException {
+ validateFlinkTable(table);
+
+ Schema icebergSchema = FlinkSchemaUtil.convert(table.getSchema());
+ PartitionSpec spec = toPartitionSpec(((CatalogTable)
table).getPartitionKeys(), icebergSchema);
+
+ ImmutableMap.Builder<String, String> properties = ImmutableMap.builder();
+ String location = null;
+ for (Map.Entry<String, String> entry : table.getOptions().entrySet()) {
+ if ("location".equalsIgnoreCase(entry.getKey())) {
+ location = entry.getValue();
+ } else {
+ properties.put(entry.getKey(), entry.getValue());
+ }
+ }
+
+ try {
+ icebergCatalog.createTable(
+ toIdentifier(tablePath),
+ icebergSchema,
+ spec,
+ location,
+ properties.build());
+ } catch (AlreadyExistsException e) {
+ throw new TableAlreadyExistException(getName(), tablePath, e);
+ }
}
@Override
public void alterTable(ObjectPath tablePath, CatalogBaseTable newTable,
boolean ignoreIfNotExists)
- throws CatalogException {
- throw new UnsupportedOperationException("Not support alterTable now.");
+ throws CatalogException, TableNotExistException {
+ validateFlinkTable(newTable);
+ Table icebergTable = getIcebergTable(tablePath);
+ CatalogTable table = toCatalogTable(icebergTable);
+
+ // Currently, Flink SQL only support altering table properties.
+
+ if (!table.getSchema().equals(newTable.getSchema())) {
+ throw new UnsupportedOperationException("Altering schema is not
supported yet.");
+ }
+
+ if (!table.getPartitionKeys().equals(((CatalogTable)
newTable).getPartitionKeys())) {
+ throw new UnsupportedOperationException("Altering partition keys is not
supported yet.");
+ }
+
+ Map<String, String> oldOptions = table.getOptions();
+ Map<String, String> setProperties = Maps.newHashMap();
+
+ String setLocation = null;
+ String setSnapshotId = null;
+ String pickSnapshotId = null;
+
+ for (Map.Entry<String, String> entry : newTable.getOptions().entrySet()) {
+ String key = entry.getKey();
+ String value = entry.getValue();
+
+ if (Objects.equals(value, oldOptions.get(key))) {
+ continue;
+ }
+
+ if ("location".equalsIgnoreCase(key)) {
+ setLocation = value;
+ } else if ("current-snapshot-id".equalsIgnoreCase(key)) {
+ setSnapshotId = value;
+ } else if ("cherry-pick-snapshot-id".equalsIgnoreCase(key)) {
+ pickSnapshotId = value;
+ } else {
+ setProperties.put(key, value);
+ }
+ }
+
+ oldOptions.keySet().forEach(k -> {
+ if (!newTable.getOptions().containsKey(k)) {
+ setProperties.put(k, null);
+ }
+ });
+
+ commitChanges(icebergTable, setLocation, setSnapshotId, pickSnapshotId,
setProperties);
+ }
+
+ private static void validateFlinkTable(CatalogBaseTable table) {
+ Preconditions.checkArgument(table instanceof CatalogTable, "The Table
should be a CatalogTable.");
+
+ TableSchema schema = table.getSchema();
+ schema.getTableColumns().forEach(column -> {
+ if (column.isGenerated()) {
+ throw new UnsupportedOperationException("Creating table with computed
columns is not supported yet.");
+ }
+ });
+
+ if (!schema.getWatermarkSpecs().isEmpty()) {
+ throw new UnsupportedOperationException("Creating table with watermark
specs is not supported yet.");
+ }
+
+ if (schema.getPrimaryKey().isPresent()) {
Review comment:
Thanks @kbendick .
At present, in Flink, PK is mainly used to process CDC stream.
-For example, if user access a Kafka source of a CDC stream, the user can
define a primary key. In this way, the downstream can perform efficient dynamic
table / static table conversion (restore to the original static table)
according to a certain primary key.
-For example, when the stream data (CDC) is written into a JDBC database,
the user can define primary key. In this way, Flink can insert the data into
the database by using the upsert writing way.
I think, If iceberg supports CDC native processing in the future, we may be
able to use it.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkCatalog.java
##########
@@ -320,19 +339,167 @@ public void renameTable(ObjectPath tablePath, String
newTableName, boolean ignor
}
}
- /**
- * TODO Add partitioning to the Flink DDL parser.
- */
@Override
public void createTable(ObjectPath tablePath, CatalogBaseTable table,
boolean ignoreIfExists)
- throws CatalogException {
- throw new UnsupportedOperationException("Not support createTable now.");
+ throws CatalogException, TableAlreadyExistException {
+ validateFlinkTable(table);
+
+ Schema icebergSchema = FlinkSchemaUtil.convert(table.getSchema());
+ PartitionSpec spec = toPartitionSpec(((CatalogTable)
table).getPartitionKeys(), icebergSchema);
+
+ ImmutableMap.Builder<String, String> properties = ImmutableMap.builder();
+ String location = null;
+ for (Map.Entry<String, String> entry : table.getOptions().entrySet()) {
+ if ("location".equalsIgnoreCase(entry.getKey())) {
+ location = entry.getValue();
+ } else {
+ properties.put(entry.getKey(), entry.getValue());
+ }
+ }
+
+ try {
+ icebergCatalog.createTable(
+ toIdentifier(tablePath),
+ icebergSchema,
+ spec,
+ location,
+ properties.build());
+ } catch (AlreadyExistsException e) {
+ throw new TableAlreadyExistException(getName(), tablePath, e);
+ }
}
@Override
public void alterTable(ObjectPath tablePath, CatalogBaseTable newTable,
boolean ignoreIfNotExists)
- throws CatalogException {
- throw new UnsupportedOperationException("Not support alterTable now.");
+ throws CatalogException, TableNotExistException {
+ validateFlinkTable(newTable);
+ Table icebergTable = getIcebergTable(tablePath);
+ CatalogTable table = toCatalogTable(icebergTable);
+
+ // Currently, Flink SQL only support altering table properties.
+
+ if (!table.getSchema().equals(newTable.getSchema())) {
+ throw new UnsupportedOperationException("Altering schema is not
supported yet.");
+ }
+
+ if (!table.getPartitionKeys().equals(((CatalogTable)
newTable).getPartitionKeys())) {
+ throw new UnsupportedOperationException("Altering partition keys is not
supported yet.");
+ }
+
+ Map<String, String> oldOptions = table.getOptions();
+ Map<String, String> setProperties = Maps.newHashMap();
+
+ String setLocation = null;
+ String setSnapshotId = null;
+ String pickSnapshotId = null;
+
+ for (Map.Entry<String, String> entry : newTable.getOptions().entrySet()) {
+ String key = entry.getKey();
+ String value = entry.getValue();
+
+ if (Objects.equals(value, oldOptions.get(key))) {
+ continue;
+ }
+
+ if ("location".equalsIgnoreCase(key)) {
+ setLocation = value;
+ } else if ("current-snapshot-id".equalsIgnoreCase(key)) {
+ setSnapshotId = value;
+ } else if ("cherry-pick-snapshot-id".equalsIgnoreCase(key)) {
+ pickSnapshotId = value;
+ } else {
+ setProperties.put(key, value);
+ }
+ }
+
+ oldOptions.keySet().forEach(k -> {
+ if (!newTable.getOptions().containsKey(k)) {
+ setProperties.put(k, null);
+ }
+ });
+
+ commitChanges(icebergTable, setLocation, setSnapshotId, pickSnapshotId,
setProperties);
+ }
+
+ private static void validateFlinkTable(CatalogBaseTable table) {
+ Preconditions.checkArgument(table instanceof CatalogTable, "The Table
should be a CatalogTable.");
+
+ TableSchema schema = table.getSchema();
+ schema.getTableColumns().forEach(column -> {
+ if (column.isGenerated()) {
+ throw new UnsupportedOperationException("Creating table with computed
columns is not supported yet.");
+ }
+ });
+
+ if (!schema.getWatermarkSpecs().isEmpty()) {
+ throw new UnsupportedOperationException("Creating table with watermark
specs is not supported yet.");
+ }
+
+ if (schema.getPrimaryKey().isPresent()) {
Review comment:
Thanks @kbendick .
At present, in Flink, PK is mainly used to process CDC stream.
- For example, if user access a Kafka source of a CDC stream, the user can
define a primary key. In this way, the downstream can perform efficient dynamic
table / static table conversion (restore to the original static table)
according to a certain primary key.
- For example, when the stream data (CDC) is written into a JDBC database,
the user can define primary key. In this way, Flink can insert the data into
the database by using the upsert writing way.
I think, If iceberg supports CDC native processing in the future, we may be
able to use it.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]