This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new becf5a581 ORC-1310: Allowlist Support for plugin filter
becf5a581 is described below
commit becf5a58107e37f939d5c3d56cb169c182af2ea1
Author: deshanxiao <[email protected]>
AuthorDate: Mon Dec 12 16:47:08 2022 -0800
ORC-1310: Allowlist Support for plugin filter
### What changes were proposed in this pull request?
This PR is aimed to add allowlist support for plugin filter.
### Why are the changes needed?
**ServiceLoader** will load all the interfaces in current classpath. When
we have two implementations of PluginFilterService, Both of them will be loaded
as plugin filters. This PR will provide a configuration
"orc.filter.plugin.allowlist". ORC reader will only load the class which is in
the allowlist when the configuration is not null.
### How was this patch tested?
UT
Closes #1314 from deshanxiao/deshan/1310.
Lead-authored-by: deshanxiao <[email protected]>
Co-authored-by: Deshan Xiao <[email protected]>
Co-authored-by: Dongjoon Hyun <[email protected]>
Co-authored-by: Deshan Xiao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
java/core/src/java/org/apache/orc/OrcConf.java | 9 ++++
java/core/src/java/org/apache/orc/Reader.java | 12 +++++
.../org/apache/orc/impl/filter/FilterFactory.java | 30 +++++++++++
.../orc/impl/filter/TestPluginFilterService.java | 59 +++++++++++++++++++++-
4 files changed, 108 insertions(+), 2 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java
b/java/core/src/java/org/apache/orc/OrcConf.java
index d2d22fc8f..3d3625916 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -193,6 +193,15 @@ public enum OrcConf {
+ "determined, they are combined using AND. The order of
application is "
+ "non-deterministic and the filter functionality should
not depend on the "
+ "order of application."),
+
+ PLUGIN_FILTER_ALLOWLIST("orc.filter.plugin.allowlist",
+ "orc.filter.plugin.allowlist",
+ "*",
+ "A list of comma-separated class names. If specified
it restricts "
+ + "the PluginFilters to just these classes as
discovered by the "
+ + "PluginFilterService. The default of * allows all
discovered classes "
+ + "and an empty string would not allow any plugins
to be applied."),
+
WRITE_VARIABLE_LENGTH_BLOCKS("orc.write.variable.length.blocks", null, false,
"A boolean flag as to whether the ORC writer should write variable
length\n"
+ "HDFS blocks."),
diff --git a/java/core/src/java/org/apache/orc/Reader.java
b/java/core/src/java/org/apache/orc/Reader.java
index b22bdc50e..aac9bd77f 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.Arrays;
import java.util.List;
import java.util.function.Consumer;
@@ -234,6 +235,7 @@ public interface Reader extends Closeable {
private boolean allowSARGToFilter = false;
private boolean useSelected = false;
private boolean allowPluginFilters = false;
+ private List<String> pluginAllowListFilters = null;
private int minSeekSize = (int)
OrcConf.ORC_MIN_DISK_SEEK_SIZE.getDefaultValue();
private double minSeekSizeTolerance = (double)
OrcConf.ORC_MIN_DISK_SEEK_SIZE_TOLERANCE
.getDefaultValue();
@@ -260,6 +262,7 @@ public interface Reader extends Closeable {
allowSARGToFilter = OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf);
useSelected = OrcConf.READER_USE_SELECTED.getBoolean(conf);
allowPluginFilters = OrcConf.ALLOW_PLUGIN_FILTER.getBoolean(conf);
+ pluginAllowListFilters =
OrcConf.PLUGIN_FILTER_ALLOWLIST.getStringAsList(conf);
minSeekSize = OrcConf.ORC_MIN_DISK_SEEK_SIZE.getInt(conf);
minSeekSizeTolerance =
OrcConf.ORC_MIN_DISK_SEEK_SIZE_TOLERANCE.getDouble(conf);
rowBatchSize = OrcConf.ROW_BATCH_SIZE.getInt(conf);
@@ -656,6 +659,15 @@ public interface Reader extends Closeable {
return this;
}
+ public List<String> pluginAllowListFilters() {
+ return pluginAllowListFilters;
+ }
+
+ public Options pluginAllowListFilters(String... allowLists) {
+ this.pluginAllowListFilters = Arrays.asList(allowLists);
+ return this;
+ }
+
/**
* @since 1.8.0
*/
diff --git a/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java
b/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java
index 7cb3ddc24..4fceb97b9 100644
--- a/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/filter/FilterFactory.java
@@ -71,6 +71,7 @@ public class FilterFactory {
// 2. Process PluginFilter
if (opts.allowPluginFilters()) {
List<BatchFilter> pluginFilters = findPluginFilters(filePath, conf);
+ pluginFilters = getAllowedFilters(pluginFilters,
opts.pluginAllowListFilters());
if (!pluginFilters.isEmpty()) {
LOG.debug("Added plugin filters {} to the read", pluginFilters);
filters.addAll(pluginFilters);
@@ -184,4 +185,33 @@ public class FilterFactory {
}
return filters;
}
+
+ /**
+ * Filter BatchFilter which is in the allowList.
+ *
+ * @param filters whole BatchFilter list we load from class path.
+ * @param allowList a Class-Name list that we want to load in.
+ */
+ private static List<BatchFilter> getAllowedFilters(
+ List<BatchFilter> filters, List<String> allowList) {
+ List<BatchFilter> allowBatchFilters = new ArrayList<>();
+
+ if (allowList != null && allowList.contains("*")) {
+ return filters;
+ }
+
+ if (allowList == null || allowList.isEmpty() || filters == null) {
+ LOG.debug("Disable all PluginFilter.");
+ return allowBatchFilters;
+ }
+
+ for (BatchFilter filter: filters) {
+ if (allowList.contains(filter.getClass().getName())) {
+ allowBatchFilters.add(filter);
+ } else {
+ LOG.debug("Ignoring filter service {}", filter);
+ }
+ }
+ return allowBatchFilters;
+ }
}
diff --git
a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
index 757ea5d35..b1824f207 100644
--- a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
+++ b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
@@ -19,12 +19,18 @@
package org.apache.orc.impl.filter;
import org.apache.hadoop.conf.Configuration;
+import org.apache.orc.filter.BatchFilter;
import org.junit.jupiter.api.Test;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestPluginFilterService {
private final Configuration conf;
@@ -56,4 +62,53 @@ public class TestPluginFilterService {
public void testMissingFilter() {
assertTrue(FilterFactory.findPluginFilters("file://db/table11/file1",
conf).isEmpty());
}
-}
\ No newline at end of file
+
+ private Method getAllowedFilters() {
+ Method method = null;
+ try {
+ method = FilterFactory.class.getDeclaredMethod("getAllowedFilters",
List.class, List.class);
+ } catch (NoSuchMethodException e) {
+ assert(false);
+ }
+ method.setAccessible(true);
+ return method;
+ }
+
+ @Test
+ public void testHitAllowListFilter() throws Exception {
+ conf.set("my.filter.name", "my_str_i_eq");
+ // Hit the allowlist.
+ List<String> allowListHit = new ArrayList<>();
+
allowListHit.add("org.apache.orc.impl.filter.BatchFilterFactory$BatchFilterImpl");
+
+ List<BatchFilter> pluginFilters =
FilterFactory.findPluginFilters("file://db/table1/file1", conf);
+ List<BatchFilter> allowListFilter =
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters,
allowListHit);
+
+ assertEquals(1, allowListFilter.size());
+ }
+
+ @Test
+ public void testAllowListFilterAllowAll() throws Exception {
+ conf.set("my.filter.name", "my_str_i_eq");
+ // Hit the allowlist.
+ List<String> allowListHit = new ArrayList<>();
+ allowListHit.add("*");
+
+ List<BatchFilter> pluginFilters =
FilterFactory.findPluginFilters("file://db/table1/file1", conf);
+ List<BatchFilter> allowListFilter =
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters,
allowListHit);
+
+ assertEquals(1, allowListFilter.size());
+ }
+
+ @Test
+ public void testAllowListFilterDisallowAll() throws Exception {
+ conf.set("my.filter.name", "my_str_i_eq");
+
+ List<BatchFilter> pluginFilters =
FilterFactory.findPluginFilters("file://db/table1/file1", conf);
+ List<BatchFilter> allowListFilter =
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters, new
ArrayList<>());
+ List<BatchFilter> allowListFilterWithNull =
(List<BatchFilter>)getAllowedFilters().invoke(null, pluginFilters, null);
+
+ assertEquals(0, allowListFilter.size());
+ assertEquals(0, allowListFilterWithNull.size());
+ }
+}