This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new becf5a581 ORC-1310: Allowlist Support for plugin filter
becf5a581 is described below

commit becf5a58107e37f939d5c3d56cb169c182af2ea1
Author: deshanxiao <[email protected]>
AuthorDate: Mon Dec 12 16:47:08 2022 -0800

    ORC-1310: Allowlist Support for plugin filter
    
    ### What changes were proposed in this pull request?
    This PR is aimed to add allowlist support for plugin filter.
    
    ### Why are the changes needed?
    **ServiceLoader** will load all the interfaces in current classpath. When 
we have two implementations of PluginFilterService, Both of them will be loaded 
as plugin filters. This PR will provide a configuration 
"orc.filter.plugin.allowlist". ORC reader will only load the class which is in 
the allowlist when the configuration is not null.
    
    ### How was this patch tested?
    UT
    
    Closes #1314 from deshanxiao/deshan/1310.
    
    Lead-authored-by: deshanxiao <[email protected]>
    Co-authored-by: Deshan Xiao <[email protected]>
    Co-authored-by: Dongjoon Hyun <[email protected]>
    Co-authored-by: Deshan Xiao <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 java/core/src/java/org/apache/orc/OrcConf.java     |  9 ++++
 java/core/src/java/org/apache/orc/Reader.java      | 12 +++++
 .../org/apache/orc/impl/filter/FilterFactory.java  | 30 +++++++++++
 .../orc/impl/filter/TestPluginFilterService.java   | 59 +++++++++++++++++++++-
 4 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java 
b/java/core/src/java/org/apache/orc/OrcConf.java
index d2d22fc8f..3d3625916 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -193,6 +193,15 @@ public enum OrcConf {
                       + "determined, they are combined using AND. The order of 
application is "
                       + "non-deterministic and the filter functionality should 
not depend on the "
                       + "order of application."),
+
+  PLUGIN_FILTER_ALLOWLIST("orc.filter.plugin.allowlist",
+                          "orc.filter.plugin.allowlist",
+                          "*",
+                          "A list of comma-separated class names. If specified 
it restricts "
+                          + "the PluginFilters to just these classes as 
discovered by the "
+                          + "PluginFilterService. The default of * allows all 
discovered classes "
+                          + "and an empty string would not allow any plugins 
to be applied."),
+
   WRITE_VARIABLE_LENGTH_BLOCKS("orc.write.variable.length.blocks", null, false,
       "A boolean flag as to whether the ORC writer should write variable 
length\n"
       + "HDFS blocks."),
diff --git a/java/core/src/java/org/apache/orc/Reader.java 
b/java/core/src/java/org/apache/orc/Reader.java
index b22bdc50e..aac9bd77f 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import java.io.Closeable;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 import java.util.List;
 import java.util.function.Consumer;
 
@@ -234,6 +235,7 @@ public interface Reader extends Closeable {
     private boolean allowSARGToFilter = false;
     private boolean useSelected = false;
     private boolean allowPluginFilters = false;
+    private List<String> pluginAllowListFilters = null;
     private int minSeekSize = (int) 
OrcConf.ORC_MIN_DISK_SEEK_SIZE.getDefaultValue();
     private double minSeekSizeTolerance = (double) 
OrcConf.ORC_MIN_DISK_SEEK_SIZE_TOLERANCE
         .getDefaultValue();
@@ -260,6 +262,7 @@ public interface Reader extends Closeable {
       allowSARGToFilter = OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf);
       useSelected = OrcConf.READER_USE_SELECTED.getBoolean(conf);
       allowPluginFilters = OrcConf.ALLOW_PLUGIN_FILTER.getBoolean(conf);
+      pluginAllowListFilters = 
OrcConf.PLUGIN_FILTER_ALLOWLIST.getStringAsList(conf);
       minSeekSize = OrcConf.ORC_MIN_DISK_SEEK_SIZE.getInt(conf);
       minSeekSizeTolerance = 
OrcConf.ORC_MIN_DISK_SEEK_SIZE_TOLERANCE.getDouble(conf);
       rowBatchSize = OrcConf.ROW_BATCH_SIZE.getInt(conf);
@@ -656,6 +659,15 @@ public interface Reader extends Closeable {
       return this;
     }
 
+    public List<String> pluginAllowListFilters() {
+      return pluginAllowListFilters;
+    }
+
+    public Options pluginAllowListFilters(String... allowLists) {
+      this.pluginAllowListFilters = Arrays.asList(allowLists);
+      return this;
+    }
+
     /**
      * @since 1.8.0
      */
diff --git a/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java 
b/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java
index 7cb3ddc24..4fceb97b9 100644
--- a/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java
@@ -71,6 +71,7 @@ public class FilterFactory {
     // 2. Process PluginFilter
     if (opts.allowPluginFilters()) {
       List<BatchFilter> pluginFilters = findPluginFilters(filePath, conf);
+      pluginFilters = getAllowedFilters(pluginFilters, 
opts.pluginAllowListFilters());
       if (!pluginFilters.isEmpty()) {
         LOG.debug("Added plugin filters {} to the read", pluginFilters);
         filters.addAll(pluginFilters);
@@ -184,4 +185,33 @@ public class FilterFactory {
     }
     return filters;
   }
+
+  /**
+   * Filter BatchFilter which is in the allowList.
+   *
+   * @param filters whole BatchFilter list we load from class path.
+   * @param allowList a Class-Name list that we want to load in.
+   */
+  private static List<BatchFilter> getAllowedFilters(
+      List<BatchFilter> filters, List<String> allowList) {
+    List<BatchFilter> allowBatchFilters = new ArrayList<>();
+
+    if (allowList != null && allowList.contains("*")) {
+      return filters;
+    }
+
+    if (allowList == null || allowList.isEmpty() || filters == null) {
+      LOG.debug("Disable all PluginFilter.");
+      return allowBatchFilters;
+    }
+
+    for (BatchFilter filter: filters) {
+      if (allowList.contains(filter.getClass().getName())) {
+        allowBatchFilters.add(filter);
+      } else {
+        LOG.debug("Ignoring filter service {}", filter);
+      }
+    }
+    return allowBatchFilters;
+  }
 }
diff --git 
a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java 
b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
index 757ea5d35..b1824f207 100644
--- a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
+++ b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
@@ -19,12 +19,18 @@
 package org.apache.orc.impl.filter;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.orc.filter.BatchFilter;
 import org.junit.jupiter.api.Test;
 
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
 import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class TestPluginFilterService {
   private final Configuration conf;
@@ -56,4 +62,53 @@ public class TestPluginFilterService {
   public void testMissingFilter() {
     assertTrue(FilterFactory.findPluginFilters("file://db/table11/file1", 
conf).isEmpty());
   }
-}
\ No newline at end of file
+
+  private Method getAllowedFilters() {
+    Method method = null;
+    try {
+      method = FilterFactory.class.getDeclaredMethod("getAllowedFilters", 
List.class, List.class);
+    } catch (NoSuchMethodException e) {
+      assert(false);
+    }
+    method.setAccessible(true);
+    return method;
+  }
+
+  @Test
+  public void testHitAllowListFilter() throws Exception {
+    conf.set("my.filter.name", "my_str_i_eq");
+    // Hit the allowlist.
+    List<String> allowListHit = new ArrayList<>();
+    
allowListHit.add("org.apache.orc.impl.filter.BatchFilterFactory$BatchFilterImpl");
+
+    List<BatchFilter> pluginFilters = 
FilterFactory.findPluginFilters("file://db/table1/file1", conf);
+    List<BatchFilter> allowListFilter = 
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters, 
allowListHit);
+
+    assertEquals(1, allowListFilter.size());
+  }
+
+  @Test
+  public void testAllowListFilterAllowAll() throws Exception {
+    conf.set("my.filter.name", "my_str_i_eq");
+    // Hit the allowlist.
+    List<String> allowListHit = new ArrayList<>();
+    allowListHit.add("*");
+
+    List<BatchFilter> pluginFilters = 
FilterFactory.findPluginFilters("file://db/table1/file1", conf);
+    List<BatchFilter> allowListFilter = 
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters, 
allowListHit);
+
+    assertEquals(1, allowListFilter.size());
+  }
+
+  @Test
+  public void testAllowListFilterDisallowAll() throws Exception {
+    conf.set("my.filter.name", "my_str_i_eq");
+
+    List<BatchFilter> pluginFilters = 
FilterFactory.findPluginFilters("file://db/table1/file1", conf);
+    List<BatchFilter> allowListFilter = 
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters, new 
ArrayList<>());
+    List<BatchFilter> allowListFilterWithNull = 
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters, null);
+
+    assertEquals(0, allowListFilter.size());
+    assertEquals(0, allowListFilterWithNull.size());
+  }
+}

Reply via email to