This is an automated email from the ASF dual-hosted git repository.
dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 398c114 DRILL-8057: INFORMATION_SCHEMA filter push down is
inefficient (#2388)
398c114 is described below
commit 398c114b2d920adf1840338bde9b55bd7cb602d3
Author: James Turton <[email protected]>
AuthorDate: Wed Jan 19 08:34:25 2022 +0200
DRILL-8057: INFORMATION_SCHEMA filter push down is inefficient (#2388)
* Add schema search tree pruning capability to InfoSchema.
* Fix checkstyle, address review comments.
* WIP.
* WIP.
* Address review comments.
* Clean up and add a unit test of lazy schema registration.
* Remove call to registerSchemas() in PlanningBase.
---
.../org/apache/calcite/jdbc/DynamicRootSchema.java | 26 +++--
.../org/apache/calcite/jdbc/DynamicSchema.java | 12 ++-
.../apache/drill/exec/ops/FragmentContextImpl.java | 6 +-
.../org/apache/drill/exec/ops/QueryContext.java | 8 --
.../drill/exec/store/DrillSchemaFactory.java | 68 +++----------
.../drill/exec/store/SchemaTreeProvider.java | 68 +------------
.../exec/store/StoragePluginRegistryImpl.java | 24 +----
.../drill/exec/store/ischema/FilterEvaluator.java | 29 +++++-
.../drill/exec/store/ischema/InfoSchemaFilter.java | 106 ++++++++++++++++-----
.../store/ischema/InfoSchemaRecordGenerator.java | 11 ++-
.../drill/exec/store/mock/MockBreakageStorage.java | 3 +
.../drill/exec/work/metadata/MetadataProvider.java | 2 +-
.../test/java/org/apache/drill/PlanningBase.java | 1 -
.../drill/exec/physical/impl/TestSchema.java | 2 +-
.../org/apache/drill/exec/sql/TestInfoSchema.java | 11 +++
.../ischema/TestInfoSchemaFilterPushDown.java | 35 +++++++
16 files changed, 219 insertions(+), 193 deletions(-)
diff --git
a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java
b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java
index 1dfc9d6..1f8b162 100644
---
a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java
+++
b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java
@@ -42,6 +42,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
+import java.util.Set;
/**
* Loads schemas from storage plugins later when {@link #getSubSchema(String,
boolean)}
@@ -58,7 +59,7 @@ public class DynamicRootSchema extends DynamicSchema {
/** Creates a root schema. */
DynamicRootSchema(StoragePluginRegistry storages, SchemaConfig schemaConfig,
AliasRegistryProvider aliasRegistryProvider) {
- super(null, new RootSchema(), ROOT_SCHEMA_NAME);
+ super(null, new RootSchema(storages), ROOT_SCHEMA_NAME);
this.schemaConfig = schemaConfig;
this.storages = storages;
this.aliasRegistryProvider = aliasRegistryProvider;
@@ -78,13 +79,13 @@ public class DynamicRootSchema extends DynamicSchema {
private CalciteSchema getSchema(String schemaName, boolean caseSensitive) {
// Drill registers schemas in lower case, see AbstractSchema constructor
schemaName = schemaName == null ? null : schemaName.toLowerCase();
- CalciteSchema retSchema = getSubSchemaMap().get(schemaName);
+ CalciteSchema retSchema = subSchemaMap.map().get(schemaName);
if (retSchema != null) {
return retSchema;
}
loadSchemaFactory(schemaName, caseSensitive);
- retSchema = getSubSchemaMap().get(schemaName);
+ retSchema = subSchemaMap.map().get(schemaName);
return retSchema;
}
@@ -133,7 +134,7 @@ public class DynamicRootSchema extends DynamicSchema {
for (SchemaPlus schema : secondLevelSchemas) {
org.apache.drill.exec.store.AbstractSchema drillSchema;
try {
- drillSchema =
schema.unwrap(org.apache.drill.exec.store.AbstractSchema.class);
+ drillSchema = schema.unwrap(AbstractSchema.class);
} catch (ClassCastException e) {
throw new RuntimeException(String.format("Schema '%s' is not
expected under root schema", schema.getName()));
}
@@ -154,10 +155,18 @@ public class DynamicRootSchema extends DynamicSchema {
}
}
- static class RootSchema extends AbstractSchema {
+ public static class RootSchema extends AbstractSchema {
- public RootSchema() {
+ private StoragePluginRegistry storages;
+
+ public RootSchema(StoragePluginRegistry storages) {
super(Collections.emptyList(), ROOT_SCHEMA_NAME);
+ this.storages = storages;
+ }
+
+ @Override
+ public Set<String> getSubSchemaNames() {
+ return storages.availablePlugins();
}
@Override
@@ -171,6 +180,11 @@ public class DynamicRootSchema extends DynamicSchema {
DataContext.ROOT,
BuiltInMethod.DATA_CONTEXT_GET_ROOT_SCHEMA.method);
}
+
+ @Override
+ public boolean showInInformationSchema() {
+ return false;
+ }
}
}
diff --git
a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java
b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java
index e315f4a..2a7987b 100644
--- a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java
+++ b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java
@@ -19,7 +19,9 @@ package org.apache.calcite.jdbc;
import org.apache.calcite.schema.Schema;
import org.apache.calcite.schema.SchemaPlus;
+import org.apache.drill.common.AutoCloseables;
import org.apache.drill.exec.alias.AliasRegistryProvider;
+import org.apache.drill.exec.store.AbstractSchema;
import org.apache.drill.exec.store.SchemaConfig;
import org.apache.drill.exec.store.StoragePluginRegistry;
@@ -28,7 +30,7 @@ import org.apache.drill.exec.store.StoragePluginRegistry;
* Unlike SimpleCalciteSchema, DynamicSchema could have an empty or partial
schemaMap, but it could maintain a map of
* name->SchemaFactory, and only register schema when the correspondent name
is requested.
*/
-public class DynamicSchema extends SimpleCalciteSchema {
+public class DynamicSchema extends SimpleCalciteSchema implements
AutoCloseable {
public DynamicSchema(CalciteSchema parent, Schema schema, String name) {
super(parent, schema, name);
@@ -49,4 +51,12 @@ public class DynamicSchema extends SimpleCalciteSchema {
DynamicRootSchema rootSchema = new DynamicRootSchema(storages,
schemaConfig, aliasRegistryProvider);
return rootSchema.plus();
}
+
+ @Override
+ public void close() throws Exception {
+ for (CalciteSchema cs : subSchemaMap.map().values()) {
+
AutoCloseables.closeWithUserException(cs.plus().unwrap(AbstractSchema.class));
+ }
+ }
+
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
index 0a30c2b..7ce944d 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
@@ -311,7 +311,7 @@ public class FragmentContextImpl extends
BaseFragmentContext implements Executor
.setIgnoreAuthErrors(isImpersonationEnabled)
.build();
- return queryContext.getFullRootSchema(schemaConfig);
+ return queryContext.getRootSchema(schemaConfig);
}
private SchemaPlus getFragmentContextRootSchema() {
@@ -323,7 +323,7 @@ public class FragmentContextImpl extends
BaseFragmentContext implements Executor
.setIgnoreAuthErrors(isImpersonationEnabled)
.build();
- return schemaTreeProvider.createFullRootSchema(schemaConfig);
+ return schemaTreeProvider.createRootSchema(schemaConfig);
}
@Override
@@ -696,7 +696,7 @@ public class FragmentContextImpl extends
BaseFragmentContext implements Executor
@Override
public SchemaPlus getRootSchema(String userName) {
- return schemaTreeProvider.createFullRootSchema(userName, this);
+ return schemaTreeProvider.createRootSchema(userName, this);
}
@Override
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java
index 694b4e2..82e9994 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java
@@ -198,15 +198,7 @@ public class QueryContext implements AutoCloseable,
OptimizerRulesContext, Schem
public SchemaPlus getRootSchema(SchemaConfig schemaConfig) {
return schemaTreeProvider.createRootSchema(schemaConfig);
}
- /**
- * Create and return a fully initialized SchemaTree with given
<i>schemaConfig</i>.
- * @param schemaConfig
- * @return A fully initialized SchemaTree with given <i>schemaConfig</i>.
- */
- public SchemaPlus getFullRootSchema(SchemaConfig schemaConfig) {
- return schemaTreeProvider.createFullRootSchema(schemaConfig);
- }
/**
* Get the user name of the user who issued the query that is managed by
this QueryContext.
* @return The user name of the user who issued the query that is managed by
this QueryContext.
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
index 40e67c4..1fc5461 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
@@ -18,74 +18,30 @@
package org.apache.drill.exec.store;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
import org.apache.calcite.schema.SchemaPlus;
-import org.apache.drill.shaded.guava.com.google.common.base.Stopwatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DrillSchemaFactory extends AbstractSchemaFactory {
private static final Logger logger =
LoggerFactory.getLogger(DrillSchemaFactory.class);
- private final StoragePluginRegistryImpl registry;
-
- public DrillSchemaFactory(String name, StoragePluginRegistryImpl registry) {
+ public DrillSchemaFactory(String name) {
super(name);
- this.registry = registry;
}
+ /**
+ * Historical note. This method used to eagerly register schemas for every
+ * enabled storage plugin instance, an operation that is expensive at best
+ * and unreliable in the presence of unresponsive storage plugins at worst.
+ * Now the schemas under the root are registered lazily by DynamicRootSchema.
+ *
+ * @param schemaConfig Configuration for schema objects.
+ * @param parent Reference to parent schema.
+ * @throws IOException
+ */
@Override
public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent)
throws IOException {
- Stopwatch watch = Stopwatch.createStarted();
- registry.registerSchemas(schemaConfig, parent);
-
- // Add second level schema as top level schema with name qualified with
parent schema name
- // Ex: "dfs" schema has "default" and "tmp" as sub schemas. Add following
extra schemas "dfs.default" and
- // "dfs.tmp" under root schema.
- //
- // Before change, schema tree looks like below:
- // "root"
- // -- "dfs"
- // -- "default"
- // -- "tmp"
- // -- "hive"
- // -- "default"
- // -- "hivedb1"
- //
- // After the change, the schema tree looks like below:
- // "root"
- // -- "dfs"
- // -- "default"
- // -- "tmp"
- // -- "dfs.default"
- // -- "dfs.tmp"
- // -- "hive"
- // -- "default"
- // -- "hivedb1"
- // -- "hive.default"
- // -- "hive.hivedb1"
- List<SchemaPlus> secondLevelSchemas = new ArrayList<>();
- for (String firstLevelSchemaName : parent.getSubSchemaNames()) {
- SchemaPlus firstLevelSchema = parent.getSubSchema(firstLevelSchemaName);
- for (String secondLevelSchemaName :
firstLevelSchema.getSubSchemaNames()) {
-
secondLevelSchemas.add(firstLevelSchema.getSubSchema(secondLevelSchemaName));
- }
- }
-
- for (SchemaPlus schema : secondLevelSchemas) {
- AbstractSchema drillSchema;
- try {
- drillSchema = schema.unwrap(AbstractSchema.class);
- } catch (ClassCastException e) {
- throw new RuntimeException(String.format("Schema '%s' is not expected
under root schema", schema.getName()));
- }
- SubSchemaWrapper wrapper = new SubSchemaWrapper(drillSchema);
- parent.add(wrapper.getName(), wrapper);
- }
-
- logger.debug("Took {} ms to register schemas.",
watch.elapsed(TimeUnit.MILLISECONDS));
+ throw new UnsupportedOperationException("Only lazy schema registration by
DynamicRootSchema is supported.");
}
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
index d0e29dd..e28a898 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
@@ -17,15 +17,13 @@
*/
package org.apache.drill.exec.store;
-import java.io.IOException;
import java.util.List;
-import org.apache.calcite.schema.SchemaPlus;
import org.apache.drill.common.AutoCloseables;
-import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.ops.ViewExpansionContext;
import org.apache.calcite.jdbc.DynamicSchema;
+import org.apache.calcite.schema.SchemaPlus;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.server.options.OptionValue;
@@ -104,77 +102,19 @@ public class SchemaTreeProvider implements AutoCloseable {
/**
* Create and return a SchemaTree with given <i>schemaConfig</i>.
* @param schemaConfig
- * @return
- */
- public SchemaPlus createRootSchema(SchemaConfig schemaConfig) {
- SchemaPlus rootSchema =
DynamicSchema.createRootSchema(dContext.getStorage(), schemaConfig,
- dContext.getAliasRegistryProvider());
- schemaTreesToClose.add(rootSchema);
- return rootSchema;
- }
-
- /**
- * Return full root schema with schema owner as the given user.
- *
- * @param userName Name of the user who is accessing the storage sources.
- * @param provider {@link SchemaConfigInfoProvider} instance
* @return Root of the schema tree.
*/
- public SchemaPlus createFullRootSchema(final String userName, final
SchemaConfigInfoProvider provider) {
- final String schemaUser = isImpersonationEnabled ? userName :
ImpersonationUtil.getProcessUserName();
- final SchemaConfig schemaConfig = SchemaConfig.newBuilder(schemaUser,
provider).build();
- return createFullRootSchema(schemaConfig);
- }
-
- /**
- * Create and return a Full SchemaTree with given <i>schemaConfig</i>.
- * @param schemaConfig
- * @return
- */
- public SchemaPlus createFullRootSchema(SchemaConfig schemaConfig) {
- try {
+ public SchemaPlus createRootSchema(SchemaConfig schemaConfig) {
SchemaPlus rootSchema =
DynamicSchema.createRootSchema(dContext.getStorage(), schemaConfig,
dContext.getAliasRegistryProvider());
- dContext.getSchemaFactory().registerSchemas(schemaConfig, rootSchema);
schemaTreesToClose.add(rootSchema);
return rootSchema;
- }
- catch(IOException e) {
- // We can't proceed further without a schema, throw a runtime exception.
- // Improve the error message for client side.
-
- final String contextString = isImpersonationEnabled ? "[Hint: Username
is absent in connection URL or doesn't " +
- "exist on Drillbit node. Please specify a username in connection URL
which is present on Drillbit node.]" :
- "";
- throw UserException
- .resourceError(e)
- .message("Failed to create schema tree.")
- .addContext("IOException: ", e.getMessage())
- .addContext(contextString)
- .build(logger);
- }
}
@Override
public void close() throws Exception {
- List<AutoCloseable> toClose = Lists.newArrayList();
- for(SchemaPlus tree : schemaTreesToClose) {
- addSchemasToCloseList(tree, toClose);
- }
-
- AutoCloseables.close(toClose);
- }
-
- private static void addSchemasToCloseList(final SchemaPlus tree, final
List<AutoCloseable> toClose) {
- for(String subSchemaName : tree.getSubSchemaNames()) {
- addSchemasToCloseList(tree.getSubSchema(subSchemaName), toClose);
- }
-
- try {
- AbstractSchema drillSchemaImpl = tree.unwrap(AbstractSchema.class);
- toClose.add(drillSchemaImpl);
- } catch (ClassCastException e) {
- // Ignore as the SchemaPlus is not an implementation of Drill schema.
+ for (SchemaPlus sp : schemaTreesToClose) {
+ AutoCloseables.closeWithUserException(sp.unwrap(AbstractSchema.class));
}
}
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
index 38a489e..2e9b2d0 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
@@ -30,7 +30,6 @@ import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
-import org.apache.calcite.schema.SchemaPlus;
import org.apache.drill.common.collections.ImmutableEntry;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.UserException;
@@ -198,7 +197,7 @@ public class StoragePluginRegistryImpl implements
StoragePluginRegistry {
public StoragePluginRegistryImpl(DrillbitContext context) {
this.context = new DrillbitPluginRegistryContext(context);
this.pluginCache = new StoragePluginMap();
- this.schemaFactory = new DrillSchemaFactory(null, this);
+ this.schemaFactory = new DrillSchemaFactory(null);
locators.add(new ClassicConnectorLocator(this.context));
locators.add(new SystemPluginLocator(this.context));
this.pluginStore = new StoragePluginStoreImpl(context);
@@ -907,27 +906,6 @@ public class StoragePluginRegistryImpl implements
StoragePluginRegistry {
return connector.pluginEntryFor(name, pluginConfig, type);
}
- // TODO: Replace this. Inefficient to obtain schemas we don't need.
- protected void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent)
{
- // Refresh against the persistent store.
- // TODO: This will hammer the system if queries come in rapidly.
- // Need some better solution: grace period, alert from ZK that there
- // is something new, etc. Even better, don't register all the schemas.
- refresh();
-
- // Register schemas with the refreshed plugins
- // TODO: this code requires instantiating all plugins, even though
- // the query won't use them. Need a way to do deferred registration.
- for (PluginHandle plugin : pluginCache.plugins()) {
- try {
- plugin.plugin().registerSchemas(schemaConfig, parent);
- } catch (Exception e) {
- logger.warn("Error during `{}` schema initialization: {}",
- plugin.name(), e.getMessage(), e.getCause());
- }
- }
- }
-
@Override
public ObjectMapper mapper() {
return context.mapper();
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
index 34219fb..5fce27c 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
@@ -51,6 +51,16 @@ public interface FilterEvaluator {
boolean shouldVisitCatalog();
/**
+ * Prune the given schema.
+ *
+ * @param schemaName name of the schema
+ * @param schema schema object
+ * @return whether to prune this schema and all its descendants from the
+ * search tree.
+ */
+ boolean shouldPruneSchema(String schemaName);
+
+ /**
* Visit the given schema.
*
* @param schemaName name of the schema
@@ -106,11 +116,12 @@ public interface FilterEvaluator {
}
@Override
- public boolean shouldVisitSchema(String schemaName, SchemaPlus schema) {
- if (schemaName == null || schemaName.isEmpty()) {
- return false;
- }
+ public boolean shouldPruneSchema(String schemaName) {
+ return false;
+ }
+ @Override
+ public boolean shouldVisitSchema(String schemaName, SchemaPlus schema) {
try {
AbstractSchema drillSchema = schema.unwrap(AbstractSchema.class);
return drillSchema.showInInformationSchema();
@@ -159,6 +170,16 @@ public interface FilterEvaluator {
}
@Override
+ public boolean shouldPruneSchema(String schemaName) {
+ Map<String, String> recordValues = ImmutableMap.of(
+ CATS_COL_CATALOG_NAME, IS_CATALOG_NAME,
+ SHRD_COL_TABLE_SCHEMA, schemaName,
+ SCHS_COL_SCHEMA_NAME, schemaName);
+
+ return filter.evaluate(recordValues, true) ==
InfoSchemaFilter.Result.FALSE;
+ }
+
+ @Override
public boolean shouldVisitSchema(String schemaName, SchemaPlus schema) {
if (!super.shouldVisitSchema(schemaName, schema)) {
return false;
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
index 8b31c07..bc71c98 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
@@ -24,8 +24,11 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.annotation.JsonTypeName;
import org.apache.drill.common.FunctionNames;
+import org.apache.drill.exec.expr.fn.impl.RegexpUtil;
+import org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternType;
import org.apache.drill.exec.store.ischema.InfoSchemaFilter.ExprNode.Type;
import org.apache.drill.shaded.guava.com.google.common.base.Joiner;
+import org.apache.drill.shaded.guava.com.google.common.base.Strings;
import java.util.List;
import java.util.Map;
@@ -137,54 +140,106 @@ public class InfoSchemaFilter {
*/
@JsonIgnore
public Result evaluate(Map<String, String> recordValues) {
- return evaluateHelper(recordValues, getExprRoot());
+ return evaluateHelper(recordValues, false, getExprRoot());
}
- private Result evaluateHelper(Map<String, String> recordValues, ExprNode
exprNode) {
+ /**
+ * Evaluate the filter for given <COLUMN NAME, VALUE> pairs.
+ *
+ * @param recordValues map of field names and their values
+ * @param prefixMatchesInconclusive whether a prefix match between a column
value and a filter value
+ * results in Result.INCONCLUSIVE. Used
for pruning the schema search
+ * tree, e.g. "dfs" need not be recursed to
find a schema of "cp.default"
+ * @return evaluation result
+ */
+ @JsonIgnore
+ public Result evaluate(Map<String, String> recordValues, boolean
prefixMatchesInconclusive) {
+ return evaluateHelper(recordValues, prefixMatchesInconclusive,
getExprRoot());
+ }
+
+ private Result evaluateHelper(
+ Map<String, String> recordValues,
+ boolean prefixMatchesInconclusive,
+ ExprNode exprNode
+ ) {
if (exprNode.type == Type.FUNCTION) {
- return evaluateHelperFunction(recordValues, (FunctionExprNode) exprNode);
+ return evaluateHelperFunction(
+ recordValues,
+ prefixMatchesInconclusive,
+ (FunctionExprNode) exprNode
+ );
}
throw new UnsupportedOperationException(
String.format("Unknown expression type '%s' in InfoSchemaFilter",
exprNode.type));
}
- private Result evaluateHelperFunction(Map<String, String> recordValues,
FunctionExprNode exprNode) {
+ private Result evaluateHelperFunction(
+ Map<String, String> recordValues,
+ boolean prefixMatchesInconclusive,
+ FunctionExprNode exprNode
+ ) {
switch (exprNode.function) {
case FunctionNames.LIKE: {
FieldExprNode col = (FieldExprNode) exprNode.args.get(0);
ConstantExprNode pattern = (ConstantExprNode) exprNode.args.get(1);
ConstantExprNode escape = exprNode.args.size() > 2 ?
(ConstantExprNode) exprNode.args.get(2) : null;
final String fieldValue = recordValues.get(col.field);
- if (fieldValue != null) {
- if (escape == null) {
- return
Pattern.matches(sqlToRegexLike(pattern.value).getJavaPatternString(),
fieldValue) ?
- Result.TRUE : Result.FALSE;
- } else {
- return Pattern.matches(sqlToRegexLike(pattern.value,
escape.value).getJavaPatternString(), fieldValue) ?
- Result.TRUE : Result.FALSE;
- }
+ if (fieldValue == null) {
+ return Result.INCONCLUSIVE;
}
+ RegexpUtil.SqlPatternInfo spi = escape == null
+ ? sqlToRegexLike(pattern.value)
+ : sqlToRegexLike(pattern.value, escape.value);
+
+ if (Pattern.matches(spi.getJavaPatternString(), fieldValue)) {
+ return Result.TRUE;
+ }
+ if (!prefixMatchesInconclusive) {
+ return Result.FALSE;
+ }
+ if ((spi.getPatternType() == SqlPatternType.STARTS_WITH ||
spi.getPatternType() == SqlPatternType.CONSTANT) &&
+ !pattern.value.startsWith(fieldValue)) {
+ return Result.FALSE;
+ }
return Result.INCONCLUSIVE;
}
case FunctionNames.EQ:
case "not equal": // TODO: Is this name correct?
case "notequal": // TODO: Is this name correct?
case FunctionNames.NE: {
- FieldExprNode arg0 = (FieldExprNode) exprNode.args.get(0);
- ConstantExprNode arg1 = (ConstantExprNode) exprNode.args.get(1);
+ FieldExprNode col = (FieldExprNode) exprNode.args.get(0);
+ ConstantExprNode arg = (ConstantExprNode) exprNode.args.get(1);
+ final String value = recordValues.get(col.field);
+ if (Strings.isNullOrEmpty(value)) {
+ return Result.INCONCLUSIVE;
+ }
+
+ boolean prefixMatch = arg.value.startsWith(value);
+ boolean exactMatch = prefixMatch && arg.value.equals(value);
- final String value = recordValues.get(arg0.field);
- if (value != null) {
- if (exprNode.function.equals(FunctionNames.EQ)) {
- return arg1.value.equals(value) ? Result.TRUE : Result.FALSE;
+ if (exprNode.function.equals(FunctionNames.EQ)) {
+ if (exactMatch) {
+ return Result.TRUE;
} else {
- return arg1.value.equals(value) ? Result.FALSE : Result.TRUE;
+ if (prefixMatchesInconclusive && prefixMatch) {
+ return Result.INCONCLUSIVE;
+ } else {
+ return Result.FALSE;
+ }
+ }
+ } else {
+ if (exactMatch) {
+ return Result.FALSE;
+ } else {
+ if (prefixMatchesInconclusive && prefixMatch) {
+ return Result.INCONCLUSIVE;
+ } else {
+ return Result.TRUE;
+ }
}
}
-
- return Result.INCONCLUSIVE;
}
case FunctionNames.OR:
@@ -194,7 +249,7 @@ public class InfoSchemaFilter {
// For all other cases, return INCONCLUSIVE
Result result = Result.FALSE;
for(ExprNode arg : exprNode.args) {
- Result exprResult = evaluateHelper(recordValues, arg);
+ Result exprResult = evaluateHelper(recordValues,
prefixMatchesInconclusive, arg);
if (exprResult == Result.TRUE) {
return Result.TRUE;
} else if (exprResult == Result.INCONCLUSIVE) {
@@ -213,7 +268,7 @@ public class InfoSchemaFilter {
Result result = Result.TRUE;
for(ExprNode arg : exprNode.args) {
- Result exprResult = evaluateHelper(recordValues, arg);
+ Result exprResult = evaluateHelper(recordValues,
prefixMatchesInconclusive, arg);
if (exprResult == Result.FALSE) {
return exprResult;
}
@@ -226,6 +281,11 @@ public class InfoSchemaFilter {
}
case "in": {
+ // This case will probably only ever run if the user submits a manually
+ // crafted plan because the IN operator is compiled either to a chain
+ // of boolean ORs, or to a hash join with a relation which uses VALUES
+ // to generate the list of constants provided to the IN operator in
+ // the query. See the planner.in_subquery_threshold option.
FieldExprNode col = (FieldExprNode) exprNode.args.get(0);
List<ExprNode> args = exprNode.args.subList(1, exprNode.args.size());
final String fieldValue = recordValues.get(col.field);
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
index 8943721..210d49e 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.ischema;
+import org.apache.calcite.jdbc.DynamicRootSchema;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.drill.exec.store.pojo.PojoRecordReader;
@@ -72,9 +73,15 @@ public abstract class InfoSchemaRecordGenerator<S> {
* @param visitedPaths set used to ensure same path won't be visited twice
*/
private void scanSchemaImpl(String schemaPath, SchemaPlus schema,
Set<String> visitedPaths) {
- for (String name: schema.getSubSchemaNames()) {
+ Set<String> subSchemaNames = schema.getParentSchema() == null
+ ? schema.unwrap(DynamicRootSchema.class).schema.getSubSchemaNames()
+ : schema.getSubSchemaNames();
+
+ for (String name: subSchemaNames) {
String subSchemaPath = schemaPath.isEmpty() ? name : schemaPath + "." +
name;
- scanSchemaImpl(subSchemaPath, schema.getSubSchema(name), visitedPaths);
+ if (!filterEvaluator.shouldPruneSchema(subSchemaPath)) {
+ scanSchemaImpl(subSchemaPath, schema.getSubSchema(name), visitedPaths);
+ }
}
if (filterEvaluator.shouldVisitSchema(schemaPath, schema) &&
visitedPaths.add(schemaPath)) {
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
index d31b061..e6a53ee 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
@@ -39,6 +39,8 @@ public class MockBreakageStorage extends MockStorageEngine {
private boolean breakRegister;
+ public int registerAttemptCount = 0;
+
public MockBreakageStorage(MockBreakageStorageEngineConfig configuration,
DrillbitContext context, String name) {
super(configuration, context, name);
breakRegister = false;
@@ -51,6 +53,7 @@ public class MockBreakageStorage extends MockStorageEngine {
@Override
public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent)
throws IOException {
if (breakRegister) {
+ registerAttemptCount++;
throw new IOException("mock breakRegister!");
}
super.registerSchemas(schemaConfig, parent);
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
index 6ff2383..08d1bab 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
@@ -569,7 +569,7 @@ public class MetadataProvider {
private static <S> PojoRecordReader<S> getPojoRecordReader(final
InfoSchemaTableType tableType, final InfoSchemaFilter filter, final DrillConfig
config,
final SchemaTreeProvider provider, final UserSession userSession, final
MetastoreRegistry metastoreRegistry) {
final SchemaPlus rootSchema =
-
provider.createFullRootSchema(userSession.getCredentials().getUserName(),
newSchemaConfigInfoProvider(config, userSession, provider));
+ provider.createRootSchema(userSession.getCredentials().getUserName(),
newSchemaConfigInfoProvider(config, userSession, provider));
return tableType.getRecordReader(rootSchema, filter,
userSession.getOptions(), metastoreRegistry);
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java
b/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java
index 130fe58..8f5c8fd 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java
@@ -108,7 +108,6 @@ public class PlanningBase extends ExecTest {
final DrillOperatorTable table = new DrillOperatorTable(functionRegistry,
systemOptions);
SchemaConfig schemaConfig = SchemaConfig.newBuilder("foo",
context).build();
SchemaPlus root = DynamicSchema.createRootSchema(registry, schemaConfig,
new AliasRegistryProvider(dbContext));
- registry.getSchemaFactory().registerSchemas(schemaConfig, root);
when(context.getNewDefaultSchema()).thenReturn(root);
when(context.getLpPersistence()).thenReturn(new
LogicalPlanPersistence(config, ClassPathScanner.fromPrescan(config)));
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
index f6d8bca..a7d27ad 100644
---
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
+++
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
@@ -41,7 +41,7 @@ public class TestSchema extends DrillTest {
public static void setup() throws Exception {
cluster = ClusterFixture.builder(dirTestWatcher).buildCustomMockStorage();
boolean breakRegisterSchema = true;
- // With a broken storage which will throw exception in regiterSchema,
every query (even on other storage)
+ // With a broken storage which will throw exception in registerSchema,
every query (even on other storage)
// shall fail if Drill is still loading all schemas (include the broken
schema) before a query.
cluster.insertMockStorage("mock_broken", breakRegisterSchema);
cluster.insertMockStorage("mock_good", !breakRegisterSchema);
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
index 11692d7..fa616e6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
@@ -164,6 +164,17 @@ public class TestInfoSchema extends BaseTestQuery {
}
@Test
+ public void showDatabasesWhereIn() throws Exception {
+ testBuilder()
+ .sqlQuery("SHOW DATABASES WHERE SCHEMA_NAME in ('dfs.tmp',
'dfs.root')")
+ .unOrdered()
+ .baselineColumns("SCHEMA_NAME")
+ .baselineValues("dfs.tmp")
+ .baselineValues("dfs.root")
+ .go();
+ }
+
+ @Test
public void showDatabasesLike() throws Exception{
testBuilder()
.sqlQuery("SHOW DATABASES LIKE '%y%'")
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
index 04a7635..008730f 100644
---
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
+++
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
@@ -17,7 +17,14 @@
*/
package org.apache.drill.exec.store.ischema;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
import org.apache.drill.PlanTestBase;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.mock.MockBreakageStorage;
+import org.apache.drill.test.ClientFixture;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterMockStorageFixture;
import org.junit.Test;
public class TestInfoSchemaFilterPushDown extends PlanTestBase {
@@ -112,6 +119,34 @@ public class TestInfoSchemaFilterPushDown extends
PlanTestBase {
testHelper(query, scan, true);
}
+ /**
+ * As reported in DRILL-8057, even queries against INFORMATION_SCHEMA that
+ * included a filter that could be pushed down would be penalised by eager
+ * schema registration executed on every enabled storage plugin instance.
+ * This resulted in slow or even failed INFORMATION_SCHEMA schema queries
+ * when some storage plugin was unresponsive, even if it was not needed for
+ * the query being run. This test checks that lazy schema registration is
+ * working by installing a mock storage plugin to act as a canary.
+ */
+ @Test
+ public void testLazySchemaRegistration() throws Exception {
+ ClusterMockStorageFixture cluster =
ClusterFixture.builder(dirTestWatcher).buildCustomMockStorage();
+ // This mock storage plugin throws an exception from registerSchemas which
+ // would have been enough to correctly make this test fail were it not that
+ // the exception gets swallowed by StoragePluginRegistryImpl so we need to
+ // inspect a counter instead.
+ cluster.insertMockStorage("registerSchema canary", true);
+ ClientFixture client = cluster.clientFixture();
+
+ final String query = "SELECT * FROM INFORMATION_SCHEMA.`TABLES` WHERE
TABLE_SCHEMA='INFORMATION_SCHEMA'";
+ client.queryBuilder().sql(query).run();
+ StoragePluginRegistry spr = cluster.drillbit().getContext().getStorage();
+ MockBreakageStorage mbs = (MockBreakageStorage)
spr.getPlugin("registerSchema canary");
+
+ // no attempt to register schemas on the canary should have been made
+ assertEquals(0, mbs.registerAttemptCount);
+ }
+
private void testHelper(final String query, String filterInScan, boolean
filterPrelExpected) throws Exception {
String[] expectedPatterns;
String[] excludedPatterns;