the-other-tim-brown commented on code in PR #9374:
URL: https://github.com/apache/hudi/pull/9374#discussion_r1285129506


##########
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SyncUtilHelpers.java:
##########
@@ -33,36 +32,56 @@
 
 import java.util.Map;
 import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static 
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT;
+import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
+import static 
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
 
 /**
  * Helper class for syncing Hudi commit data with external metastores.
  */
 public class SyncUtilHelpers {
   private static final Logger LOG = 
LoggerFactory.getLogger(SyncUtilHelpers.class);
+
+  // Locks for each table (base path) to avoid concurrent modification of the 
same underneath meta storage.
+  // Meta store such as Hive may encounter {@code 
ConcurrentModificationException} for #alter_table.
+  private static final ConcurrentHashMap<String, Lock> TABLE_LOCKS = new 
ConcurrentHashMap<>();
+
   /**
    * Create an instance of an implementation of {@link HoodieSyncTool} that 
will sync all the relevant meta information
    * with an external metastore such as Hive etc. to ensure Hoodie tables can 
be queried or read via external systems.
    *
-   * <p>IMPORTANT: make this method class level thread safe to avoid 
concurrent modification of the same underneath meta storage.
-   * Meta store such as Hive may encounter {@code 
ConcurrentModificationException} for #alter_table.
-   *
-   * @param syncToolClassName   Class name of the {@link HoodieSyncTool} 
implementation.
-   * @param props               property map.
-   * @param hadoopConfig        Hadoop confs.
-   * @param fs                  Filesystem used.
-   * @param targetBasePath      The target base path that contains the hoodie 
table.
-   * @param baseFileFormat      The file format used by the hoodie table 
(defaults to PARQUET).
+   * @param syncToolClassName Class name of the {@link HoodieSyncTool} 
implementation.
+   * @param props             property map.
+   * @param hadoopConfig      Hadoop confs.
+   * @param fs                Filesystem used.
+   * @param targetBasePath    The target base path that contains the hoodie 
table.
+   * @param baseFileFormat    The file format used by the hoodie table 
(defaults to PARQUET).
    */
-  public static synchronized void runHoodieMetaSync(String syncToolClassName,
+  public static void runHoodieMetaSync(String syncToolClassName,
                                        TypedProperties props,
                                        Configuration hadoopConfig,
                                        FileSystem fs,
                                        String targetBasePath,
                                        String baseFileFormat) {
-    try (HoodieSyncTool syncTool = instantiateMetaSyncTool(syncToolClassName, 
props, hadoopConfig, fs, targetBasePath, baseFileFormat)) {
-      syncTool.syncHoodieTable();
-    } catch (Throwable e) {
-      throw new HoodieMetaSyncException("Could not sync using the meta sync 
class " + syncToolClassName, e);
+    if (targetBasePath == null) {
+      throw new IllegalArgumentException("Target base path must not be null");
+    }
+
+    // Get or create a lock for the specific table
+    Lock tableLock = TABLE_LOCKS.computeIfAbsent(targetBasePath, k -> new 
ReentrantLock());

Review Comment:
   What are your thoughts on keying off of the table base path and the 
syncToolClassName to allow concurrent updates to multiple meta syncs?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to