This is an automated email from the ASF dual-hosted git repository.

wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 05c732c20 [GOBBLIN-1832] Emit warning instead of failing job for 
retention of Hive Table Views (#3695)
05c732c20 is described below

commit 05c732c2008e38d5fa6bf06dcad5fcd0eb4aba47
Author: umustafi <[email protected]>
AuthorDate: Thu May 11 14:46:06 2023 -0700

    [GOBBLIN-1832] Emit warning instead of failing job for retention of Hive 
Table Views (#3695)
    
    We should not allow hive retention on a view since it shouldn't have access 
to delete underlying data. Instead it should throw a warning message if it is a 
view instead of failing the job as there may be retention jobs configured to 
include both hive tables and views. We want to be able to dynamically determine 
at runtime whether or not to skip retention on the dataset in question rather 
than statically allow/denylist tables in the configurations.
    
    Co-authored-by: Urmi Mustafi <[email protected]>
---
 .../version/finder/AbstractHiveDatasetVersionFinder.java     | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java
index f12204862..a7a933bcf 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java
@@ -18,11 +18,13 @@ package org.apache.gobblin.data.management.version.finder;
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 
 import lombok.extern.slf4j.Slf4j;
 
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 
 import com.google.common.base.Function;
@@ -56,6 +58,8 @@ public abstract class AbstractHiveDatasetVersionFinder 
implements VersionFinder<
    * Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} 
found.
    * <p>
    * Note: If an exception occurs while processing a partition, that partition 
will be ignored in the returned collection
+   * Also note that if the dataset passed is a view type, we will return an 
empty list even if the underlying table is
+   * partitioned.
    * </p>
    *
    * @throws IllegalArgumentException if <code>dataset</code> is not a {@link 
HiveDataset}. Or if {@link HiveDataset#getTable()}
@@ -69,7 +73,13 @@ public abstract class AbstractHiveDatasetVersionFinder 
implements VersionFinder<
     final HiveDataset hiveDataset = (HiveDataset) dataset;
 
     if (!hiveDataset.getTable().isPartitioned()) {
-      throw new IllegalArgumentException("HiveDatasetVersionFinder is only 
compatible with partitioned hive tables");
+      if (hiveDataset.getTable().getTableType() == TableType.VIRTUAL_VIEW) {
+        log.warn("Skipping processing a view type dataset: ", ((HiveDataset) 
dataset).getTable().getTableName());
+        return Collections.emptyList();
+      } else {
+        throw new IllegalArgumentException("HiveDatasetVersionFinder is only 
compatible with partitioned hive tables. "
+            + "This is a snapshot hive table.");
+      }
     }
 
     try (AutoReturnableObject<IMetaStoreClient> client = 
hiveDataset.getClientPool().getClient()) {

Reply via email to