alexr17 commented on code in PR #13103: URL: https://github.com/apache/hudi/pull/13103#discussion_r2038604273
########## hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/StorageBasedLockProvider.java: ########## @@ -0,0 +1,581 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.client.transaction.lock; + +import org.apache.hudi.client.transaction.lock.models.StorageLockData; +import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.client.transaction.lock.models.HeartbeatManager; +import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.client.transaction.lock.models.LockProviderHeartbeatManager; +import org.apache.hudi.client.transaction.lock.models.LockUpdateResult; +import org.apache.hudi.common.config.LockConfiguration; +import org.apache.hudi.common.lock.LockProvider; +import org.apache.hudi.common.lock.LockState; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ReflectionUtils; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.VisibleForTesting; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.common.util.hash.HashID; +import org.apache.hudi.config.StorageBasedLockConfig; +import org.apache.hudi.exception.HoodieLockException; +import org.apache.hudi.exception.HoodieNotSupportedException; +import org.apache.hudi.storage.StorageConfiguration; +import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.StorageSchemes; + +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.GuardedBy; +import javax.annotation.concurrent.ThreadSafe; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Objects; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import static org.apache.hudi.common.lock.LockState.ACQUIRED; +import static org.apache.hudi.common.lock.LockState.ACQUIRING; +import static org.apache.hudi.common.lock.LockState.FAILED_TO_ACQUIRE; +import static org.apache.hudi.common.lock.LockState.FAILED_TO_RELEASE; +import static org.apache.hudi.common.lock.LockState.RELEASED; +import static org.apache.hudi.common.lock.LockState.RELEASING; +import static org.apache.hudi.common.table.HoodieTableMetaClient.LOCKS_FOLDER_NAME; + +/** + * A distributed filesystem storage based lock provider. This {@link LockProvider} implementation + * leverages conditional writes to ensure transactional consistency for multi-writer scenarios. + * The underlying storage client interface {@link StorageLock} is pluggable so it can be implemented for any + * filesystem which supports conditional writes. + */ +@ThreadSafe +public class StorageBasedLockProvider implements LockProvider<StorageLockFile> { + + public static final String DEFAULT_TABLE_LOCK_FILE_NAME = "table_lock"; + // How long to wait before retrying lock acquisition in blocking calls. + private static final long DEFAULT_LOCK_ACQUISITION_BUFFER_MS = 1000; + // Maximum expected clock drift between two nodes. + // This is similar idea as SkewAdjustingTimeGenerator. + // In reality, within a single cloud provider all nodes share the same ntp + // server + // therefore we do not expect drift more than a few ms. + // However, since our lock leases are pretty long, we can use a high buffer. + private static final long CLOCK_DRIFT_BUFFER_MS = 500; + + // When we retry lock upserts, do so 5 times + private static final long LOCK_UPSERT_RETRY_COUNT = 5; + + private static final Logger LOGGER = LoggerFactory.getLogger(StorageBasedLockProvider.class); + + Logger logger; + + // The lock service implementation which interacts with storage + private final StorageLock lockService; + + private final long heartbeatInterval; + private final long lockValidity; + private final String ownerId; + private final String lockFilePath; + private final String bucketName; + private final HeartbeatManager heartbeatManager; + private transient Thread shutdownThread = null; + + @GuardedBy("this") + private StorageLockFile currentLockObj = null; + @GuardedBy("this") + private boolean isClosed = false; + + private synchronized void setLock(StorageLockFile lockObj) { + if (lockObj != null && !Objects.equals(lockObj.getOwner(), this.ownerId)) { + throw new HoodieLockException("Owners do not match. Current lock owner: " + this.ownerId + " lock path: " + + this.lockFilePath + " owner: " + lockObj.getOwner()); + } + this.currentLockObj = lockObj; + } + + /** + * Default constructor for StorageBasedLockProvider, required by LockManager + * to instantiate it using reflection. + * + * @param lockConfiguration The lock configuration, should be transformable into + * StorageBasedLockConfig + * @param conf Storage config, ignored. + */ + public StorageBasedLockProvider(final LockConfiguration lockConfiguration, final StorageConfiguration<?> conf) { + StorageBasedLockConfig config = new StorageBasedLockConfig.Builder() + .fromProperties(lockConfiguration.getConfig()).build(); + heartbeatInterval = config.getHeartbeatPoll(); + lockValidity = config.getLockValidityTimeout(); + + String configuredLocksLocation = config.getLocksLocation(); + + // If not configured, recalculate the locks location as .hoodie/.locks; + // otherwise (the lock location is configured), the configuration location is used as the folder + // to which the lock file is written to, and the lock file name is determined by the table's base path + String locksLocation = StringUtils.isNullOrEmpty(configuredLocksLocation) + ? String.format("%s%s%s", config.getHudiTableBasePath(), StoragePath.SEPARATOR, LOCKS_FOLDER_NAME) + : configuredLocksLocation; + + URI uri = parseURI(locksLocation); + bucketName = uri.getHost(); // For most schemes, the bucket/container is the host. + String folderName = uri.getPath(); // Path after the bucket/container. + + String fileName = StringUtils.isNullOrEmpty(configuredLocksLocation) + ? DEFAULT_TABLE_LOCK_FILE_NAME + : slugifyLockFolderFromBasePath(config.getHudiTableBasePath()); + + lockFilePath = buildLockObjectPath(folderName, fileName); + ownerId = UUID.randomUUID().toString(); + this.logger = LOGGER; + this.heartbeatManager = new LockProviderHeartbeatManager( + ownerId, + heartbeatInterval, + this::renewLock); + + try { + this.lockService = (StorageLock) ReflectionUtils.loadClass( + getLockServiceClassName(uri.getScheme()), + new Class<?>[] { String.class, String.class, String.class, Properties.class }, + new Object[] { ownerId, bucketName, lockFilePath, lockConfiguration.getConfig() }); + } catch (Throwable e) { + throw new HoodieLockException("Failed to load and initialize StorageLock", e); + } + shutdownThread = new Thread(() -> shutdown(true)); + Runtime.getRuntime().addShutdownHook(shutdownThread); + logger.info("Instantiated new storage-based lock provider, owner: {}, lockfilePath: {}", ownerId, lockFilePath); + } + + private URI parseURI(String location) { + try { + return new URI(location); + } catch (URISyntaxException e) { + throw new HoodieLockException("Unable to parse locks location as a URI: " + location, e); + } + } + + private static @NotNull String getLockServiceClassName(String scheme) { + Option<StorageSchemes> schemeOptional = StorageSchemes.getStorageLockImplementationIfExists(scheme); + if (schemeOptional.isPresent()) { + return schemeOptional.get().getStorageLockClass(); + } else { + throw new HoodieNotSupportedException("No implementation of StorageLock supports this scheme: " + scheme); + } + } + + @VisibleForTesting + StorageBasedLockProvider( Review Comment: It's difficult to do this since we need to catch the errors -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
