aokolnychyi commented on a change in pull request #362: Support create and 
replace transactions in Catalog
URL: https://github.com/apache/incubator-iceberg/pull/362#discussion_r314714596
 
 

 ##########
 File path: 
hive/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java
 ##########
 @@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hive;
+
+import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.HashMap;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.Transaction;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.exceptions.AlreadyExistsException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.iceberg.types.Types;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+
+import static org.apache.iceberg.PartitionSpec.builderFor;
+import static org.apache.iceberg.types.Types.NestedField.required;
+
+public class HiveCreateReplaceTableTest {
+
+  private static final String DB_NAME = "hivedb";
+  private static final String TABLE_NAME = "tbl";
+  private static final TableIdentifier TABLE_IDENTIFIER = 
TableIdentifier.of(DB_NAME, TABLE_NAME);
+  private static final Schema SCHEMA = new Schema(
+      required(3, "id", Types.IntegerType.get()),
+      required(4, "data", Types.StringType.get())
+  );
+  private static final PartitionSpec SPEC = builderFor(SCHEMA)
+      .identity("id")
+      .build();
+
+  private static TestHiveMetastore metastore;
+  private static HiveMetaStoreClient metastoreClient;
+  private static HiveConf hiveConf;
+  private static HiveCatalog catalog;
+
+  @Rule
+  public ExpectedException exceptionRule = ExpectedException.none();
+  @Rule
+  public TemporaryFolder temp = new TemporaryFolder();
+
+  private String tableLocation;
+
+  @BeforeClass
+  public static void startMetastore() throws Exception {
+    HiveCreateReplaceTableTest.metastore = new TestHiveMetastore();
+    metastore.start();
+    HiveCreateReplaceTableTest.hiveConf = metastore.hiveConf();
+    HiveCreateReplaceTableTest.metastoreClient = new 
HiveMetaStoreClient(hiveConf);
+    String dbPath = metastore.getDatabasePath(DB_NAME);
+    Database db = new Database(DB_NAME, "description", dbPath, new 
HashMap<>());
+    metastoreClient.createDatabase(db);
+    HiveCreateReplaceTableTest.catalog = new HiveCatalog(hiveConf);
+  }
+
+  @AfterClass
+  public static void stopMetastore() {
+    catalog.close();
+    HiveCreateReplaceTableTest.catalog = null;
+
+    metastoreClient.close();
+    HiveCreateReplaceTableTest.metastoreClient = null;
+
+    metastore.stop();
+    HiveCreateReplaceTableTest.metastore = null;
+  }
+
+  @Before
+  public void createTableLocation() throws IOException {
+    tableLocation = temp.newFolder("hive-").getPath();
+  }
+
+  @After
+  public void cleanup() {
+    catalog.dropTable(TABLE_IDENTIFIER);
+  }
+
+  @Test
+  public void testCreateTableTxn() {
+    Assert.assertFalse("Table should not exist", 
catalog.tableExists(TABLE_IDENTIFIER));
+
+    Transaction txn = catalog.newCreateTableTransaction(
+        TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap());
+    txn.updateProperties()
+        .set("prop", "value")
+        .commit();
+
+    // verify the table is still not visible before the transaction is 
committed
+    Assert.assertFalse(catalog.tableExists(TABLE_IDENTIFIER));
+
+    txn.commitTransaction();
+
+    Table table = catalog.loadTable(TABLE_IDENTIFIER);
+    Assert.assertEquals("Table props should match", "value", 
table.properties().get("prop"));
+  }
+
+  @Test
+  public void testCreateTableTxnTableCreatedConcurrently() {
+    exceptionRule.expect(RuntimeException.class);
+    exceptionRule.expectMessage("Metastore operation failed");
+
+    Assert.assertFalse("Table should not exist", 
catalog.tableExists(TABLE_IDENTIFIER));
+
+    Transaction txn = catalog.newCreateTableTransaction(
+        TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap());
+
+    // create the table concurrently
+    catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC);
+    Assert.assertTrue("Table should be created", 
catalog.tableExists(TABLE_IDENTIFIER));
+
+    // expect the transaction to fail
+    txn.commitTransaction();
+  }
+
+  @Test
+  public void testCreateTableTxnTableAlreadyExists() {
+    exceptionRule.expect(AlreadyExistsException.class);
+    exceptionRule.expectMessage("Table already exists: hivedb.tbl");
+
+    Assert.assertFalse("Table should not exist", 
catalog.tableExists(TABLE_IDENTIFIER));
+
+    // create a table before starting a transaction
+    catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC);
+    Assert.assertTrue("Table should be created", 
catalog.tableExists(TABLE_IDENTIFIER));
+
+    Transaction txn = catalog.newCreateTableTransaction(
+        TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap());
+    // expect the transaction to fail
+    txn.commitTransaction();
+  }
+
+  @Test
+  public void testReplaceTableTxn() {
+    catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, 
Maps.newHashMap());
+    Assert.assertTrue("Table should exist", 
catalog.tableExists(TABLE_IDENTIFIER));
+
+    Transaction txn = catalog.newReplaceTableTransaction(
+        TABLE_IDENTIFIER, SCHEMA, PartitionSpec.unpartitioned(), 
tableLocation, Maps.newHashMap());
+    txn.commitTransaction();
+
+    Table table = catalog.loadTable(TABLE_IDENTIFIER);
+    Assert.assertEquals("Partition spec should match", 
PartitionSpec.unpartitioned(), table.spec());
+  }
+
+  @Test
+  public void testReplaceTableTxnTableNotExists() {
+    exceptionRule.expect(NoSuchTableException.class);
+    exceptionRule.expectMessage("No such table: hivedb.tbl");
 
 Review comment:
   I interpret the API in Spark as follows:
   
   **Case 1 (create)**
   1. if exists -> fail
   2. if created concurrently -> fail
   
   **Case 2 (replace)**
   1. if exists -> replace
   2. if not exists -> fail
   3. if deleted/modified concurrently -> up to the catalog
   
   **Case 3 (create or replace)**
   1. if exists -> replace
   2. if not exists -> create
   3. if deleted/modified concurrently -> up to the catalog
   
   It seems appropriate to handle 2.3 and 3.3 differently. For example, I would 
expect 2.3 to rather fail than succeed. However, it would be more appropriate 
for 3.3 to succeed. So, exposing a flag and modifying logic accordingly should 
be reasonable. What's your opinion?
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to