jacques-n commented on a change in pull request #1849: URL: https://github.com/apache/iceberg/pull/1849#discussion_r535740503
########## File path: api/src/main/java/org/apache/iceberg/catalog/SupportsCatalogTransactions.java ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import java.util.Collections; +import java.util.Set; + +/** + * Catalog methods for working with catalog-level transactions. + * + * <p>Catalog implementations are not required to support catalog-level transactional state. + * If they do, they may support one or more {@code IsolationLevel}s and one or more + * {@code LockingMode}s. + */ +public interface SupportsCatalogTransactions { + + /** + * The level of isolation for a transaction. + */ + enum IsolationLevel { + + /** + * Only read committed data. Reading the same table multiple times + * may result in different committed reads for each read. + */ + READ_COMMITTED, + + /** + * Only read committed data. Reading the same table multiple times will + * result in the same view of those tables. + */ + REPEATED_READ, + + /** + * Only read committed data. A commit will only succeed if there have + * been no changes to data read during the course of the transaction + * prior to the commit operation. + */ + SERIALIZABLE; + } + + /** + * The type of locking mode used by the transaction. + */ + enum LockingMode { + + /** + * Pessimistically lock tables. In this situation, each table being worked on will grab a lock. + * This mode will typically result in a better chance for transactions to complete at the cost + * of throughput and resource consumption. + */ + PESSIMISTIC, + + /** + * Run the transaction with an optimistic behavior. This assumes that most operations will not + * conflict. It typically increases concurrency and throughput while reducing resource + * consumption. + */ + OPTIMISTIC; + } + + /** + * Get the list of {@link LockingMode}s this Catalog supports. + * @return A set of locking modes supported. + */ + default Set<LockingMode> lockingModes() { + return Collections.emptySet(); + } + + /** + * Get the list of {@link IsolationLevel}s this Catalog supports. + * @return A set of locking modes supported. + */ + default Set<IsolationLevel> isolationLevels() { + return Collections.emptySet(); + } + + /** + * Start a new transaction and return a {@link TransactionalCatalog} that supports + * cross table operations. If a {@link Catalog} supports {@link OPTIMISTIC} + * locking mode, this will use that mode. If it only supports {@link PESSIMISTIC}, + * it will use that mode with default values for lock times. + * + * @param isolationLevel The {@link IsolationLevel} to use for the transaction. + * @return A Catalog with an open transaction. + * @throws IllegalArgumentException If the IsolationLevel is not supported. + */ + TransactionalCatalog createTransaction(IsolationLevel isolationLevel); + + /** + * Start a new transaction that holds locks for the life of the transaction. + * + * <p>If supported, this type of transaction allows a much higher likelihood of + * transaction completion but can also result in more failures, lower concurrency + * and deadlocks. + * + * @param isolationLevel The {@link IsolationLevel} to use for the transaction. + * @param lockWaitInMillis How long to wait in milliseconds when grabbing a pessimistic lock. + * @param maxTimeMillis The maximum amount of time the transaction is allowed to be open. + * @param tables An optional list of tables that should be locked immediately for this + * transaction. Note that this can include tables that exist and tables that do not + * exist as this transaction may be adding new tables and want to grab pessimistic locks + * for those operations. + * + * @return A Catalog with an open transaction. + * + * @throws IllegalArgumentException If the IsolationLevel is not supported. Also throws if + * {@code lockWaitInMillis} or {@code maxTimeMillis} are not within supported limits + * (check specific {@link Catalog} documentation for any limits). + * @throws CommitFailedException If the list of {@code tables} cannot be locked. + * @throws ValidationException If the set of tables names includes tables names that are not + * valid. + */ + TransactionalCatalog createLockingTransaction( + IsolationLevel isolationLevel, + long lockWaitInMillis, + long maxTimeMillis, + TableIdentifier...tables); Review comment: I added a little more doc and renamed the argument tablesHint to help clarify the purpose. ########## File path: api/src/main/java/org/apache/iceberg/catalog/SupportsCatalogTransactions.java ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import java.util.Collections; +import java.util.Set; + +/** + * Catalog methods for working with catalog-level transactions. + * + * <p>Catalog implementations are not required to support catalog-level transactional state. + * If they do, they may support one or more {@code IsolationLevel}s and one or more + * {@code LockingMode}s. + */ +public interface SupportsCatalogTransactions { Review comment: Added some more context in TransactionalCatalog as it seemed like the better place to note this point. ########## File path: api/src/main/java/org/apache/iceberg/catalog/SupportsCatalogTransactions.java ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import java.util.Collections; +import java.util.Set; + +/** + * Catalog methods for working with catalog-level transactions. + * + * <p>Catalog implementations are not required to support catalog-level transactional state. + * If they do, they may support one or more {@code IsolationLevel}s and one or more + * {@code LockingMode}s. + */ +public interface SupportsCatalogTransactions { + + /** + * The level of isolation for a transaction. Review comment: > Maybe add here "all isolation levels can read only committed data" and omit it on the individual levels. The first sentence is the most important one. Agreed, updated as part of edits. >should there also be a level of of READ_UNCOMMITTED? This allows us to have a default with no transaction support in the isolationLevels() method below, and a default DefaultTransactionalCatalog implementation that can wrap any catalogs to support read uncommitted multi-table commits. I don't think one implies the other. A transactional catalog ensures that are all writes are visible at once so you can't use a DefaultTransactionalCatalog to get READ_UNCOMMITTED. IOW, READ_UNCOMMITTED changes the read behavior but it doesn't change the write behavior to show incomplete changes to other readers. In general, given the decentralized nature of Iceberg, I don't see a path to supporting READ_UNCOMMITTED. If people want to do single table operations, they can use a non-transactional catalog or create a transactional catalog per each operation. ########## File path: api/src/main/java/org/apache/iceberg/catalog/SupportsCatalogTransactions.java ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import java.util.Collections; +import java.util.Set; + +/** + * Catalog methods for working with catalog-level transactions. + * + * <p>Catalog implementations are not required to support catalog-level transactional state. + * If they do, they may support one or more {@code IsolationLevel}s and one or more + * {@code LockingMode}s. + */ +public interface SupportsCatalogTransactions { + + /** + * The level of isolation for a transaction. + */ + enum IsolationLevel { + + /** + * Only read committed data. Reading the same table multiple times + * may result in different committed reads for each read. + */ + READ_COMMITTED, Review comment: I've added a bunch more description of each operation. READ_COMMMITTED allows us to upgrade a table before re-reading it or mutating if that is okay for our long-lived transaction. Let me know if my updated descriptions clarify things. ########## File path: api/src/main/java/org/apache/iceberg/catalog/TransactionalCatalog.java ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import org.apache.iceberg.catalog.SupportsCatalogTransactions.IsolationLevel; +import org.apache.iceberg.catalog.SupportsCatalogTransactions.LockingMode; +import org.apache.iceberg.exceptions.CommitFailedException; + +/** + * A {@link Catalog} that applies all mutations within a single transaction. + * + * <p>A TransactionalCatalog can spawn child transactions for multiple operations on different + * tables. All operations will be done within the context of a single Catalog-level transaction + * and they will either all be successful or all fail. + * + * <p>A TransactionalCatalog is initially active upon creation and will remain so until one of + * the following terminal actions occurs: + * <ul> + * <li>{@link rollback} is called. + * <li>{@link commit} is called. + * <li>The transaction expires while using Pessimistic {@link LockingMode}. + * <li>The transaction is terminated externally (for example, when a locking arbitrator + * determines a deadlock between two transactions has occurred). + * <li>The underlying implementation determines that the transaction can no longer complete + * successfully. + * </ul> + * + * <p>When one of the items above occurs, the transaction is no longer valid. Further use + * of the transaction will result in a {@link IllegalStateException} being thrown. + * + * <p>Nested transactions such as creating a new table may fail. Those failures alone do + * not necessarily result in a failure of the catalog-level transaction. + * + */ +public interface TransactionalCatalog extends Catalog, AutoCloseable { + + /** + * An internal identifier associated with this transaction. + * @return An internal identifier. + */ + String transactionId(); + + /** + * Return the current {@code IsolationLevel} for this transaction. + * @return The IsolationLevel for this transaction. + */ + IsolationLevel isolationLevel(); + + /** + * Return the {@link LockingMode} for this transaction. + * @return The LockingMode for this transaction. + */ + LockingMode lockingMode(); + + /** + * Whether the current transaction is still active/open. + * @return True until a terminal action occurs. + */ + boolean active(); + + /** + * Aborts the set of operations here and makes this TransactionalCatalog inoperable. + * + * <p>Once called, no further operations can be done against this catalog. If any + * operations are attempted, {@link IllegalStateException} will be thrown. + */ + void rollback(); + + /** + * Commit the pending changes from all nested transactions against the Catalog. + * + * <p>Once called, no further operations can be done against this catalog. If any + * operations are attempted, {@link IllegalStateException} will be thrown. + * + * @throws CommitFailedException If the updates cannot be committed due to conflicts. + */ + void commit(); + + /** + * A shortcut for {@link commit} that allows users to use this catalog in try-with-resources + * block. + * + * @throws CommitFailedException If the updates cannot be committed due to conflicts. + */ + @Override + default void close() { Review comment: I'm confused by this comment. Can you expound? The model I'm describing is that nested transactions each have to validate or fail cleanly. If they fail, they are noops and shouldn't influence the catalog level transaction. If they validate, they are added as a pending operation to the global transaction. When the global transaction commits, either all the validated pending operations complete or they all fail. ########## File path: api/src/main/java/org/apache/iceberg/catalog/SupportsCatalogTransactions.java ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import java.util.Collections; +import java.util.Set; + +/** + * Catalog methods for working with catalog-level transactions. + * + * <p>Catalog implementations are not required to support catalog-level transactional state. + * If they do, they may support one or more {@code IsolationLevel}s and one or more + * {@code LockingMode}s. + */ +public interface SupportsCatalogTransactions { + + /** + * The level of isolation for a transaction. + */ + enum IsolationLevel { + + /** + * Only read committed data. Reading the same table multiple times + * may result in different committed reads for each read. + */ + READ_COMMITTED, + + /** + * Only read committed data. Reading the same table multiple times will + * result in the same view of those tables. + */ + REPEATED_READ, + + /** + * Only read committed data. A commit will only succeed if there have + * been no changes to data read during the course of the transaction + * prior to the commit operation. + */ + SERIALIZABLE; + } + + /** + * The type of locking mode used by the transaction. + */ + enum LockingMode { + + /** + * Pessimistically lock tables. In this situation, each table being worked on will grab a lock. + * This mode will typically result in a better chance for transactions to complete at the cost + * of throughput and resource consumption. + */ + PESSIMISTIC, + + /** + * Run the transaction with an optimistic behavior. This assumes that most operations will not + * conflict. It typically increases concurrency and throughput while reducing resource + * consumption. + */ + OPTIMISTIC; + } + + /** + * Get the list of {@link LockingMode}s this Catalog supports. + * @return A set of locking modes supported. + */ + default Set<LockingMode> lockingModes() { + return Collections.emptySet(); + } + + /** + * Get the list of {@link IsolationLevel}s this Catalog supports. + * @return A set of locking modes supported. + */ + default Set<IsolationLevel> isolationLevels() { + return Collections.emptySet(); Review comment: Removed defaults. ########## File path: api/src/main/java/org/apache/iceberg/catalog/SupportsCatalogTransactions.java ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import java.util.Collections; +import java.util.Set; + +/** + * Catalog methods for working with catalog-level transactions. + * + * <p>Catalog implementations are not required to support catalog-level transactional state. + * If they do, they may support one or more {@code IsolationLevel}s and one or more + * {@code LockingMode}s. + */ +public interface SupportsCatalogTransactions { + + /** + * The level of isolation for a transaction. + */ + enum IsolationLevel { + + /** + * Only read committed data. Reading the same table multiple times + * may result in different committed reads for each read. + */ + READ_COMMITTED, + + /** + * Only read committed data. Reading the same table multiple times will + * result in the same view of those tables. Review comment: I've added more content to clarify here. I've also increased the requirement of serializable slightly. In my first draft I did not require snapshot read isolation to implement serializable since snapshot is a harder requirement than what I was focused on which was ordered repeated read. I thought about adding SNAPSHOT independent of SERIALIZABLE but I think the nuance of the differences would probably be lost on most people. As such, I've increased the strictness of serializable to also include snapshot read isolation. Per Ryan's comments, a quick example of SERIALIZABLE versus REPEATED_READ: Imagine you read t1 and t2. REPEATED_READ guarantees that reading t1 multiple times always produces the same result. It doesn't guarantee that t1 and t2 come from the same point in time. SERIALIZABLE (with the added SNAPSHOT isolation) guarantees that t1 always returns the same result AND that reading t2 comes from same point in time as the read of t1. ########## File path: api/src/main/java/org/apache/iceberg/catalog/SupportsCatalogTransactions.java ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.catalog; + +import java.util.Collections; +import java.util.Set; + +/** + * Catalog methods for working with catalog-level transactions. + * + * <p>Catalog implementations are not required to support catalog-level transactional state. + * If they do, they may support one or more {@code IsolationLevel}s and one or more + * {@code LockingMode}s. + */ +public interface SupportsCatalogTransactions { + + /** + * The level of isolation for a transaction. + */ + enum IsolationLevel { + + /** + * Only read committed data. Reading the same table multiple times + * may result in different committed reads for each read. + */ + READ_COMMITTED, + + /** + * Only read committed data. Reading the same table multiple times will + * result in the same view of those tables. + */ + REPEATED_READ, + + /** + * Only read committed data. A commit will only succeed if there have + * been no changes to data read during the course of the transaction + * prior to the commit operation. Review comment: Added more text on each. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
