This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 398c114  DRILL-8057: INFORMATION_SCHEMA filter push down is 
inefficient (#2388)
398c114 is described below

commit 398c114b2d920adf1840338bde9b55bd7cb602d3
Author: James Turton <[email protected]>
AuthorDate: Wed Jan 19 08:34:25 2022 +0200

    DRILL-8057: INFORMATION_SCHEMA filter push down is inefficient (#2388)
    
    * Add schema search tree pruning capability to InfoSchema.
    
    * Fix checkstyle, address review comments.
    
    * WIP.
    
    * WIP.
    
    * Address review comments.
    
    * Clean up and add a unit test of lazy schema registration.
    
    * Remove call to registerSchemas() in PlanningBase.
---
 .../org/apache/calcite/jdbc/DynamicRootSchema.java |  26 +++--
 .../org/apache/calcite/jdbc/DynamicSchema.java     |  12 ++-
 .../apache/drill/exec/ops/FragmentContextImpl.java |   6 +-
 .../org/apache/drill/exec/ops/QueryContext.java    |   8 --
 .../drill/exec/store/DrillSchemaFactory.java       |  68 +++----------
 .../drill/exec/store/SchemaTreeProvider.java       |  68 +------------
 .../exec/store/StoragePluginRegistryImpl.java      |  24 +----
 .../drill/exec/store/ischema/FilterEvaluator.java  |  29 +++++-
 .../drill/exec/store/ischema/InfoSchemaFilter.java | 106 ++++++++++++++++-----
 .../store/ischema/InfoSchemaRecordGenerator.java   |  11 ++-
 .../drill/exec/store/mock/MockBreakageStorage.java |   3 +
 .../drill/exec/work/metadata/MetadataProvider.java |   2 +-
 .../test/java/org/apache/drill/PlanningBase.java   |   1 -
 .../drill/exec/physical/impl/TestSchema.java       |   2 +-
 .../org/apache/drill/exec/sql/TestInfoSchema.java  |  11 +++
 .../ischema/TestInfoSchemaFilterPushDown.java      |  35 +++++++
 16 files changed, 219 insertions(+), 193 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java 
b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java
index 1dfc9d6..1f8b162 100644
--- 
a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java
+++ 
b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicRootSchema.java
@@ -42,6 +42,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
+import java.util.Set;
 
 /**
  * Loads schemas from storage plugins later when {@link #getSubSchema(String, 
boolean)}
@@ -58,7 +59,7 @@ public class DynamicRootSchema extends DynamicSchema {
 
   /** Creates a root schema. */
   DynamicRootSchema(StoragePluginRegistry storages, SchemaConfig schemaConfig, 
AliasRegistryProvider aliasRegistryProvider) {
-    super(null, new RootSchema(), ROOT_SCHEMA_NAME);
+    super(null, new RootSchema(storages), ROOT_SCHEMA_NAME);
     this.schemaConfig = schemaConfig;
     this.storages = storages;
     this.aliasRegistryProvider = aliasRegistryProvider;
@@ -78,13 +79,13 @@ public class DynamicRootSchema extends DynamicSchema {
   private CalciteSchema getSchema(String schemaName, boolean caseSensitive) {
     // Drill registers schemas in lower case, see AbstractSchema constructor
     schemaName = schemaName == null ? null : schemaName.toLowerCase();
-    CalciteSchema retSchema = getSubSchemaMap().get(schemaName);
+    CalciteSchema retSchema = subSchemaMap.map().get(schemaName);
     if (retSchema != null) {
       return retSchema;
     }
 
     loadSchemaFactory(schemaName, caseSensitive);
-    retSchema = getSubSchemaMap().get(schemaName);
+    retSchema = subSchemaMap.map().get(schemaName);
     return retSchema;
   }
 
@@ -133,7 +134,7 @@ public class DynamicRootSchema extends DynamicSchema {
         for (SchemaPlus schema : secondLevelSchemas) {
           org.apache.drill.exec.store.AbstractSchema drillSchema;
           try {
-            drillSchema = 
schema.unwrap(org.apache.drill.exec.store.AbstractSchema.class);
+            drillSchema = schema.unwrap(AbstractSchema.class);
           } catch (ClassCastException e) {
             throw new RuntimeException(String.format("Schema '%s' is not 
expected under root schema", schema.getName()));
           }
@@ -154,10 +155,18 @@ public class DynamicRootSchema extends DynamicSchema {
     }
   }
 
-  static class RootSchema extends AbstractSchema {
+  public static class RootSchema extends AbstractSchema {
 
-    public RootSchema() {
+    private StoragePluginRegistry storages;
+
+    public RootSchema(StoragePluginRegistry storages) {
       super(Collections.emptyList(), ROOT_SCHEMA_NAME);
+      this.storages = storages;
+    }
+
+    @Override
+    public Set<String> getSubSchemaNames() {
+      return storages.availablePlugins();
     }
 
     @Override
@@ -171,6 +180,11 @@ public class DynamicRootSchema extends DynamicSchema {
           DataContext.ROOT,
           BuiltInMethod.DATA_CONTEXT_GET_ROOT_SCHEMA.method);
     }
+
+    @Override
+    public boolean showInInformationSchema() {
+      return false;
+    }
   }
 }
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java 
b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java
index e315f4a..2a7987b 100644
--- a/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java
+++ b/exec/java-exec/src/main/java/org/apache/calcite/jdbc/DynamicSchema.java
@@ -19,7 +19,9 @@ package org.apache.calcite.jdbc;
 
 import org.apache.calcite.schema.Schema;
 import org.apache.calcite.schema.SchemaPlus;
+import org.apache.drill.common.AutoCloseables;
 import org.apache.drill.exec.alias.AliasRegistryProvider;
+import org.apache.drill.exec.store.AbstractSchema;
 import org.apache.drill.exec.store.SchemaConfig;
 import org.apache.drill.exec.store.StoragePluginRegistry;
 
@@ -28,7 +30,7 @@ import org.apache.drill.exec.store.StoragePluginRegistry;
  * Unlike SimpleCalciteSchema, DynamicSchema could have an empty or partial 
schemaMap, but it could maintain a map of
  * name->SchemaFactory, and only register schema when the correspondent name 
is requested.
  */
-public class DynamicSchema extends SimpleCalciteSchema {
+public class DynamicSchema extends SimpleCalciteSchema implements 
AutoCloseable {
 
   public DynamicSchema(CalciteSchema parent, Schema schema, String name) {
     super(parent, schema, name);
@@ -49,4 +51,12 @@ public class DynamicSchema extends SimpleCalciteSchema {
     DynamicRootSchema rootSchema = new DynamicRootSchema(storages, 
schemaConfig, aliasRegistryProvider);
     return rootSchema.plus();
   }
+
+  @Override
+  public void close() throws Exception {
+    for (CalciteSchema cs : subSchemaMap.map().values()) {
+      
AutoCloseables.closeWithUserException(cs.plus().unwrap(AbstractSchema.class));
+    }
+  }
+
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
index 0a30c2b..7ce944d 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/FragmentContextImpl.java
@@ -311,7 +311,7 @@ public class FragmentContextImpl extends 
BaseFragmentContext implements Executor
         .setIgnoreAuthErrors(isImpersonationEnabled)
         .build();
 
-    return queryContext.getFullRootSchema(schemaConfig);
+    return queryContext.getRootSchema(schemaConfig);
   }
 
   private SchemaPlus getFragmentContextRootSchema() {
@@ -323,7 +323,7 @@ public class FragmentContextImpl extends 
BaseFragmentContext implements Executor
         .setIgnoreAuthErrors(isImpersonationEnabled)
         .build();
 
-    return schemaTreeProvider.createFullRootSchema(schemaConfig);
+    return schemaTreeProvider.createRootSchema(schemaConfig);
   }
 
   @Override
@@ -696,7 +696,7 @@ public class FragmentContextImpl extends 
BaseFragmentContext implements Executor
 
     @Override
     public SchemaPlus getRootSchema(String userName) {
-      return schemaTreeProvider.createFullRootSchema(userName, this);
+      return schemaTreeProvider.createRootSchema(userName, this);
     }
 
     @Override
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java 
b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java
index 694b4e2..82e9994 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ops/QueryContext.java
@@ -198,15 +198,7 @@ public class QueryContext implements AutoCloseable, 
OptimizerRulesContext, Schem
   public SchemaPlus getRootSchema(SchemaConfig schemaConfig) {
     return schemaTreeProvider.createRootSchema(schemaConfig);
   }
-  /**
-   *  Create and return a fully initialized SchemaTree with given 
<i>schemaConfig</i>.
-   * @param schemaConfig
-   * @return A fully initialized SchemaTree with given <i>schemaConfig</i>.
-   */
 
-  public SchemaPlus getFullRootSchema(SchemaConfig schemaConfig) {
-    return schemaTreeProvider.createFullRootSchema(schemaConfig);
-  }
   /**
    * Get the user name of the user who issued the query that is managed by 
this QueryContext.
    * @return The user name of the user who issued the query that is managed by 
this QueryContext.
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
index 40e67c4..1fc5461 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/DrillSchemaFactory.java
@@ -18,74 +18,30 @@
 package org.apache.drill.exec.store;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.calcite.schema.SchemaPlus;
-import org.apache.drill.shaded.guava.com.google.common.base.Stopwatch;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class DrillSchemaFactory extends AbstractSchemaFactory {
   private static final Logger logger = 
LoggerFactory.getLogger(DrillSchemaFactory.class);
 
-  private final StoragePluginRegistryImpl registry;
-
-  public DrillSchemaFactory(String name, StoragePluginRegistryImpl registry) {
+  public DrillSchemaFactory(String name) {
     super(name);
-    this.registry = registry;
   }
 
+  /**
+   * Historical note.  This method used to eagerly register schemas for every
+   * enabled storage plugin instance, an operation that is expensive at best
+   * and unreliable in the presence of unresponsive storage plugins at worst.
+   * Now the schemas under the root are registered lazily by DynamicRootSchema.
+   *
+   * @param schemaConfig Configuration for schema objects.
+   * @param parent Reference to parent schema.
+   * @throws IOException
+   */
   @Override
   public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) 
throws IOException {
-    Stopwatch watch = Stopwatch.createStarted();
-    registry.registerSchemas(schemaConfig, parent);
-
-    // Add second level schema as top level schema with name qualified with 
parent schema name
-    // Ex: "dfs" schema has "default" and "tmp" as sub schemas. Add following 
extra schemas "dfs.default" and
-    // "dfs.tmp" under root schema.
-    //
-    // Before change, schema tree looks like below:
-    // "root"
-    // -- "dfs"
-    // -- "default"
-    // -- "tmp"
-    // -- "hive"
-    // -- "default"
-    // -- "hivedb1"
-    //
-    // After the change, the schema tree looks like below:
-    // "root"
-    // -- "dfs"
-    // -- "default"
-    // -- "tmp"
-    // -- "dfs.default"
-    // -- "dfs.tmp"
-    // -- "hive"
-    // -- "default"
-    // -- "hivedb1"
-    // -- "hive.default"
-    // -- "hive.hivedb1"
-    List<SchemaPlus> secondLevelSchemas = new ArrayList<>();
-    for (String firstLevelSchemaName : parent.getSubSchemaNames()) {
-      SchemaPlus firstLevelSchema = parent.getSubSchema(firstLevelSchemaName);
-      for (String secondLevelSchemaName : 
firstLevelSchema.getSubSchemaNames()) {
-        
secondLevelSchemas.add(firstLevelSchema.getSubSchema(secondLevelSchemaName));
-      }
-    }
-
-    for (SchemaPlus schema : secondLevelSchemas) {
-      AbstractSchema drillSchema;
-      try {
-        drillSchema = schema.unwrap(AbstractSchema.class);
-      } catch (ClassCastException e) {
-        throw new RuntimeException(String.format("Schema '%s' is not expected 
under root schema", schema.getName()));
-      }
-      SubSchemaWrapper wrapper = new SubSchemaWrapper(drillSchema);
-      parent.add(wrapper.getName(), wrapper);
-    }
-
-    logger.debug("Took {} ms to register schemas.", 
watch.elapsed(TimeUnit.MILLISECONDS));
+    throw new UnsupportedOperationException("Only lazy schema registration by 
DynamicRootSchema is supported.");
   }
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
index d0e29dd..e28a898 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaTreeProvider.java
@@ -17,15 +17,13 @@
  */
 package org.apache.drill.exec.store;
 
-import java.io.IOException;
 import java.util.List;
 
-import org.apache.calcite.schema.SchemaPlus;
 import org.apache.drill.common.AutoCloseables;
-import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.ops.ViewExpansionContext;
 import org.apache.calcite.jdbc.DynamicSchema;
+import org.apache.calcite.schema.SchemaPlus;
 import org.apache.drill.exec.server.DrillbitContext;
 import org.apache.drill.exec.server.options.OptionManager;
 import org.apache.drill.exec.server.options.OptionValue;
@@ -104,77 +102,19 @@ public class SchemaTreeProvider implements AutoCloseable {
   /**
    * Create and return a SchemaTree with given <i>schemaConfig</i>.
    * @param schemaConfig
-   * @return
-   */
-  public SchemaPlus createRootSchema(SchemaConfig schemaConfig) {
-      SchemaPlus rootSchema = 
DynamicSchema.createRootSchema(dContext.getStorage(), schemaConfig,
-        dContext.getAliasRegistryProvider());
-      schemaTreesToClose.add(rootSchema);
-      return rootSchema;
-  }
-
-  /**
-   * Return full root schema with schema owner as the given user.
-   *
-   * @param userName Name of the user who is accessing the storage sources.
-   * @param provider {@link SchemaConfigInfoProvider} instance
    * @return Root of the schema tree.
    */
-  public SchemaPlus createFullRootSchema(final String userName, final 
SchemaConfigInfoProvider provider) {
-    final String schemaUser = isImpersonationEnabled ? userName : 
ImpersonationUtil.getProcessUserName();
-    final SchemaConfig schemaConfig = SchemaConfig.newBuilder(schemaUser, 
provider).build();
-    return createFullRootSchema(schemaConfig);
-  }
-
-  /**
-   * Create and return a Full SchemaTree with given <i>schemaConfig</i>.
-   * @param schemaConfig
-   * @return
-   */
-  public SchemaPlus createFullRootSchema(SchemaConfig schemaConfig) {
-    try {
+  public SchemaPlus createRootSchema(SchemaConfig schemaConfig) {
       SchemaPlus rootSchema = 
DynamicSchema.createRootSchema(dContext.getStorage(), schemaConfig,
         dContext.getAliasRegistryProvider());
-      dContext.getSchemaFactory().registerSchemas(schemaConfig, rootSchema);
       schemaTreesToClose.add(rootSchema);
       return rootSchema;
-    }
-    catch(IOException e) {
-      // We can't proceed further without a schema, throw a runtime exception.
-      // Improve the error message for client side.
-
-      final String contextString = isImpersonationEnabled ? "[Hint: Username 
is absent in connection URL or doesn't " +
-          "exist on Drillbit node. Please specify a username in connection URL 
which is present on Drillbit node.]" :
-          "";
-      throw UserException
-          .resourceError(e)
-          .message("Failed to create schema tree.")
-          .addContext("IOException: ", e.getMessage())
-          .addContext(contextString)
-          .build(logger);
-    }
   }
 
   @Override
   public void close() throws Exception {
-    List<AutoCloseable> toClose = Lists.newArrayList();
-    for(SchemaPlus tree : schemaTreesToClose) {
-      addSchemasToCloseList(tree, toClose);
-    }
-
-    AutoCloseables.close(toClose);
-  }
-
-  private static void addSchemasToCloseList(final SchemaPlus tree, final 
List<AutoCloseable> toClose) {
-    for(String subSchemaName : tree.getSubSchemaNames()) {
-      addSchemasToCloseList(tree.getSubSchema(subSchemaName), toClose);
-    }
-
-    try {
-      AbstractSchema drillSchemaImpl =  tree.unwrap(AbstractSchema.class);
-      toClose.add(drillSchemaImpl);
-    } catch (ClassCastException e) {
-      // Ignore as the SchemaPlus is not an implementation of Drill schema.
+    for (SchemaPlus sp : schemaTreesToClose) {
+      AutoCloseables.closeWithUserException(sp.unwrap(AbstractSchema.class));
     }
   }
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
index 38a489e..2e9b2d0 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
@@ -30,7 +30,6 @@ import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.calcite.schema.SchemaPlus;
 import org.apache.drill.common.collections.ImmutableEntry;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.exceptions.UserException;
@@ -198,7 +197,7 @@ public class StoragePluginRegistryImpl implements 
StoragePluginRegistry {
   public StoragePluginRegistryImpl(DrillbitContext context) {
     this.context = new DrillbitPluginRegistryContext(context);
     this.pluginCache = new StoragePluginMap();
-    this.schemaFactory = new DrillSchemaFactory(null, this);
+    this.schemaFactory = new DrillSchemaFactory(null);
     locators.add(new ClassicConnectorLocator(this.context));
     locators.add(new SystemPluginLocator(this.context));
     this.pluginStore = new StoragePluginStoreImpl(context);
@@ -907,27 +906,6 @@ public class StoragePluginRegistryImpl implements 
StoragePluginRegistry {
     return connector.pluginEntryFor(name, pluginConfig, type);
   }
 
-  // TODO: Replace this. Inefficient to obtain schemas we don't need.
-  protected void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) 
{
-    // Refresh against the persistent store.
-    // TODO: This will hammer the system if queries come in rapidly.
-    // Need some better solution: grace period, alert from ZK that there
-    // is something new, etc. Even better, don't register all the schemas.
-    refresh();
-
-    // Register schemas with the refreshed plugins
-    // TODO: this code requires instantiating all plugins, even though
-    // the query won't use them. Need a way to do deferred registration.
-    for (PluginHandle plugin : pluginCache.plugins()) {
-      try {
-        plugin.plugin().registerSchemas(schemaConfig, parent);
-      } catch (Exception e) {
-        logger.warn("Error during `{}` schema initialization: {}",
-            plugin.name(), e.getMessage(), e.getCause());
-      }
-    }
-  }
-
   @Override
   public ObjectMapper mapper() {
     return context.mapper();
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
index 34219fb..5fce27c 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/FilterEvaluator.java
@@ -51,6 +51,16 @@ public interface FilterEvaluator {
   boolean shouldVisitCatalog();
 
   /**
+   * Prune the given schema.
+   *
+   * @param schemaName name of the schema
+   * @param schema schema object
+   * @return whether to prune this schema and all its descendants from the
+   * search tree.
+   */
+  boolean shouldPruneSchema(String schemaName);
+
+  /**
    * Visit the given schema.
    *
    * @param schemaName name of the schema
@@ -106,11 +116,12 @@ public interface FilterEvaluator {
     }
 
     @Override
-    public boolean shouldVisitSchema(String schemaName, SchemaPlus schema) {
-      if (schemaName == null || schemaName.isEmpty()) {
-        return false;
-      }
+    public boolean shouldPruneSchema(String schemaName) {
+      return false;
+    }
 
+    @Override
+    public boolean shouldVisitSchema(String schemaName, SchemaPlus schema) {
       try {
         AbstractSchema drillSchema = schema.unwrap(AbstractSchema.class);
         return drillSchema.showInInformationSchema();
@@ -159,6 +170,16 @@ public interface FilterEvaluator {
     }
 
     @Override
+    public boolean shouldPruneSchema(String schemaName) {
+      Map<String, String> recordValues = ImmutableMap.of(
+        CATS_COL_CATALOG_NAME, IS_CATALOG_NAME,
+        SHRD_COL_TABLE_SCHEMA, schemaName,
+        SCHS_COL_SCHEMA_NAME, schemaName);
+
+      return filter.evaluate(recordValues, true) == 
InfoSchemaFilter.Result.FALSE;
+    }
+
+    @Override
     public boolean shouldVisitSchema(String schemaName, SchemaPlus schema) {
       if (!super.shouldVisitSchema(schemaName, schema)) {
         return false;
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
index 8b31c07..bc71c98 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
@@ -24,8 +24,11 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
 import com.fasterxml.jackson.annotation.JsonTypeName;
 
 import org.apache.drill.common.FunctionNames;
+import org.apache.drill.exec.expr.fn.impl.RegexpUtil;
+import org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternType;
 import org.apache.drill.exec.store.ischema.InfoSchemaFilter.ExprNode.Type;
 import org.apache.drill.shaded.guava.com.google.common.base.Joiner;
+import org.apache.drill.shaded.guava.com.google.common.base.Strings;
 
 import java.util.List;
 import java.util.Map;
@@ -137,54 +140,106 @@ public class InfoSchemaFilter {
    */
   @JsonIgnore
   public Result evaluate(Map<String, String> recordValues) {
-    return evaluateHelper(recordValues, getExprRoot());
+    return evaluateHelper(recordValues, false, getExprRoot());
   }
 
-  private Result evaluateHelper(Map<String, String> recordValues, ExprNode 
exprNode) {
+  /**
+   * Evaluate the filter for given <COLUMN NAME, VALUE> pairs.
+   *
+   * @param recordValues map of field names and their values
+   * @param prefixMatchesInconclusive whether a prefix match between a column 
value and a filter value
+   *                                  results in Result.INCONCLUSIVE.  Used 
for pruning the schema search
+   *                                  tree, e.g. "dfs" need not be recursed to 
find a schema of "cp.default"
+   * @return evaluation result
+   */
+  @JsonIgnore
+  public Result evaluate(Map<String, String> recordValues, boolean 
prefixMatchesInconclusive) {
+    return evaluateHelper(recordValues, prefixMatchesInconclusive, 
getExprRoot());
+  }
+
+  private Result evaluateHelper(
+    Map<String, String> recordValues,
+    boolean prefixMatchesInconclusive,
+    ExprNode exprNode
+  ) {
     if (exprNode.type == Type.FUNCTION) {
-      return evaluateHelperFunction(recordValues, (FunctionExprNode) exprNode);
+      return evaluateHelperFunction(
+        recordValues,
+        prefixMatchesInconclusive,
+        (FunctionExprNode) exprNode
+      );
     }
 
     throw new UnsupportedOperationException(
         String.format("Unknown expression type '%s' in InfoSchemaFilter", 
exprNode.type));
   }
 
-  private Result evaluateHelperFunction(Map<String, String> recordValues, 
FunctionExprNode exprNode) {
+  private Result evaluateHelperFunction(
+    Map<String, String> recordValues,
+    boolean prefixMatchesInconclusive,
+    FunctionExprNode exprNode
+  ) {
     switch (exprNode.function) {
       case FunctionNames.LIKE: {
         FieldExprNode col = (FieldExprNode) exprNode.args.get(0);
         ConstantExprNode pattern = (ConstantExprNode) exprNode.args.get(1);
         ConstantExprNode escape = exprNode.args.size() > 2 ? 
(ConstantExprNode) exprNode.args.get(2) : null;
         final String fieldValue = recordValues.get(col.field);
-        if (fieldValue != null) {
-          if (escape == null) {
-            return 
Pattern.matches(sqlToRegexLike(pattern.value).getJavaPatternString(), 
fieldValue) ?
-                Result.TRUE : Result.FALSE;
-          } else {
-            return Pattern.matches(sqlToRegexLike(pattern.value, 
escape.value).getJavaPatternString(), fieldValue) ?
-                Result.TRUE : Result.FALSE;
-          }
+        if (fieldValue == null) {
+          return Result.INCONCLUSIVE;
         }
 
+        RegexpUtil.SqlPatternInfo spi = escape == null
+          ? sqlToRegexLike(pattern.value)
+          : sqlToRegexLike(pattern.value, escape.value);
+
+        if (Pattern.matches(spi.getJavaPatternString(), fieldValue)) {
+          return Result.TRUE;
+        }
+        if (!prefixMatchesInconclusive) {
+          return Result.FALSE;
+        }
+        if ((spi.getPatternType() == SqlPatternType.STARTS_WITH || 
spi.getPatternType() == SqlPatternType.CONSTANT) &&
+          !pattern.value.startsWith(fieldValue)) {
+            return Result.FALSE;
+          }
         return Result.INCONCLUSIVE;
       }
       case FunctionNames.EQ:
       case "not equal": // TODO: Is this name correct?
       case "notequal":  // TODO: Is this name correct?
       case FunctionNames.NE: {
-        FieldExprNode arg0 = (FieldExprNode) exprNode.args.get(0);
-        ConstantExprNode arg1 = (ConstantExprNode) exprNode.args.get(1);
+        FieldExprNode col = (FieldExprNode) exprNode.args.get(0);
+        ConstantExprNode arg = (ConstantExprNode) exprNode.args.get(1);
+        final String value = recordValues.get(col.field);
+        if (Strings.isNullOrEmpty(value)) {
+          return Result.INCONCLUSIVE;
+        }
+
+        boolean prefixMatch = arg.value.startsWith(value);
+        boolean exactMatch = prefixMatch && arg.value.equals(value);
 
-        final String value = recordValues.get(arg0.field);
-        if (value != null) {
-          if (exprNode.function.equals(FunctionNames.EQ)) {
-            return arg1.value.equals(value) ? Result.TRUE : Result.FALSE;
+        if (exprNode.function.equals(FunctionNames.EQ)) {
+          if (exactMatch) {
+            return Result.TRUE;
           } else {
-            return arg1.value.equals(value) ? Result.FALSE : Result.TRUE;
+            if (prefixMatchesInconclusive && prefixMatch) {
+              return Result.INCONCLUSIVE;
+            } else {
+              return Result.FALSE;
+            }
+          }
+        } else {
+          if (exactMatch) {
+            return Result.FALSE;
+          } else {
+            if (prefixMatchesInconclusive && prefixMatch) {
+              return Result.INCONCLUSIVE;
+            } else {
+              return Result.TRUE;
+            }
           }
         }
-
-        return Result.INCONCLUSIVE;
       }
 
       case FunctionNames.OR:
@@ -194,7 +249,7 @@ public class InfoSchemaFilter {
         // For all other cases, return INCONCLUSIVE
         Result result = Result.FALSE;
         for(ExprNode arg : exprNode.args) {
-          Result exprResult = evaluateHelper(recordValues, arg);
+          Result exprResult = evaluateHelper(recordValues, 
prefixMatchesInconclusive, arg);
           if (exprResult == Result.TRUE) {
             return Result.TRUE;
           } else if (exprResult == Result.INCONCLUSIVE) {
@@ -213,7 +268,7 @@ public class InfoSchemaFilter {
         Result result = Result.TRUE;
 
         for(ExprNode arg : exprNode.args) {
-          Result exprResult = evaluateHelper(recordValues, arg);
+          Result exprResult = evaluateHelper(recordValues, 
prefixMatchesInconclusive, arg);
           if (exprResult == Result.FALSE) {
             return exprResult;
           }
@@ -226,6 +281,11 @@ public class InfoSchemaFilter {
       }
 
       case "in": {
+        // This case will probably only ever run if the user submits a manually
+        // crafted plan because the IN operator is compiled either to a chain
+        // of boolean ORs, or to a hash join with a relation which uses VALUES
+        // to generate the list of constants provided to the IN operator in
+        // the query. See the planner.in_subquery_threshold option.
         FieldExprNode col = (FieldExprNode) exprNode.args.get(0);
         List<ExprNode> args = exprNode.args.subList(1, exprNode.args.size());
         final String fieldValue = recordValues.get(col.field);
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
index 8943721..210d49e 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.ischema;
 
+import org.apache.calcite.jdbc.DynamicRootSchema;
 import org.apache.calcite.schema.SchemaPlus;
 import org.apache.drill.exec.store.pojo.PojoRecordReader;
 
@@ -72,9 +73,15 @@ public abstract class InfoSchemaRecordGenerator<S> {
    * @param visitedPaths set used to ensure same path won't be visited twice
    */
   private void scanSchemaImpl(String schemaPath, SchemaPlus schema, 
Set<String> visitedPaths) {
-    for (String name: schema.getSubSchemaNames()) {
+    Set<String> subSchemaNames = schema.getParentSchema() == null
+      ? schema.unwrap(DynamicRootSchema.class).schema.getSubSchemaNames()
+      : schema.getSubSchemaNames();
+
+    for (String name: subSchemaNames) {
       String subSchemaPath = schemaPath.isEmpty() ? name : schemaPath + "." + 
name;
-      scanSchemaImpl(subSchemaPath, schema.getSubSchema(name), visitedPaths);
+      if (!filterEvaluator.shouldPruneSchema(subSchemaPath)) {
+        scanSchemaImpl(subSchemaPath, schema.getSubSchema(name), visitedPaths);
+      }
     }
 
     if (filterEvaluator.shouldVisitSchema(schemaPath, schema) && 
visitedPaths.add(schemaPath)) {
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
index d31b061..e6a53ee 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockBreakageStorage.java
@@ -39,6 +39,8 @@ public class MockBreakageStorage extends MockStorageEngine {
 
   private boolean breakRegister;
 
+  public int registerAttemptCount = 0;
+
   public MockBreakageStorage(MockBreakageStorageEngineConfig configuration, 
DrillbitContext context, String name) {
     super(configuration, context, name);
     breakRegister = false;
@@ -51,6 +53,7 @@ public class MockBreakageStorage extends MockStorageEngine {
   @Override
   public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) 
throws IOException {
     if (breakRegister) {
+      registerAttemptCount++;
       throw new IOException("mock breakRegister!");
     }
     super.registerSchemas(schemaConfig, parent);
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
index 6ff2383..08d1bab 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/work/metadata/MetadataProvider.java
@@ -569,7 +569,7 @@ public class MetadataProvider {
   private static <S> PojoRecordReader<S> getPojoRecordReader(final 
InfoSchemaTableType tableType, final InfoSchemaFilter filter, final DrillConfig 
config,
       final SchemaTreeProvider provider, final UserSession userSession, final 
MetastoreRegistry metastoreRegistry) {
     final SchemaPlus rootSchema =
-        
provider.createFullRootSchema(userSession.getCredentials().getUserName(), 
newSchemaConfigInfoProvider(config, userSession, provider));
+        provider.createRootSchema(userSession.getCredentials().getUserName(), 
newSchemaConfigInfoProvider(config, userSession, provider));
     return tableType.getRecordReader(rootSchema, filter, 
userSession.getOptions(), metastoreRegistry);
   }
 
diff --git a/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java 
b/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java
index 130fe58..8f5c8fd 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/PlanningBase.java
@@ -108,7 +108,6 @@ public class PlanningBase extends ExecTest {
     final DrillOperatorTable table = new DrillOperatorTable(functionRegistry, 
systemOptions);
     SchemaConfig schemaConfig = SchemaConfig.newBuilder("foo", 
context).build();
     SchemaPlus root = DynamicSchema.createRootSchema(registry, schemaConfig, 
new AliasRegistryProvider(dbContext));
-    registry.getSchemaFactory().registerSchemas(schemaConfig, root);
 
     when(context.getNewDefaultSchema()).thenReturn(root);
     when(context.getLpPersistence()).thenReturn(new 
LogicalPlanPersistence(config, ClassPathScanner.fromPrescan(config)));
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
index f6d8bca..a7d27ad 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestSchema.java
@@ -41,7 +41,7 @@ public class TestSchema extends DrillTest {
   public static void setup() throws Exception {
     cluster = ClusterFixture.builder(dirTestWatcher).buildCustomMockStorage();
     boolean breakRegisterSchema = true;
-    // With a broken storage which will throw exception in regiterSchema, 
every query (even on other storage)
+    // With a broken storage which will throw exception in registerSchema, 
every query (even on other storage)
     // shall fail if Drill is still loading all schemas (include the broken 
schema) before a query.
     cluster.insertMockStorage("mock_broken", breakRegisterSchema);
     cluster.insertMockStorage("mock_good", !breakRegisterSchema);
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java 
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
index 11692d7..fa616e6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
@@ -164,6 +164,17 @@ public class TestInfoSchema extends BaseTestQuery {
   }
 
   @Test
+  public void showDatabasesWhereIn() throws Exception {
+      testBuilder()
+        .sqlQuery("SHOW DATABASES WHERE SCHEMA_NAME in ('dfs.tmp', 
'dfs.root')")
+        .unOrdered()
+        .baselineColumns("SCHEMA_NAME")
+        .baselineValues("dfs.tmp")
+        .baselineValues("dfs.root")
+        .go();
+  }
+
+  @Test
   public void showDatabasesLike() throws Exception{
     testBuilder()
         .sqlQuery("SHOW DATABASES LIKE '%y%'")
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
index 04a7635..008730f 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/ischema/TestInfoSchemaFilterPushDown.java
@@ -17,7 +17,14 @@
  */
 package org.apache.drill.exec.store.ischema;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
 import org.apache.drill.PlanTestBase;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.mock.MockBreakageStorage;
+import org.apache.drill.test.ClientFixture;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterMockStorageFixture;
 import org.junit.Test;
 
 public class TestInfoSchemaFilterPushDown extends PlanTestBase {
@@ -112,6 +119,34 @@ public class TestInfoSchemaFilterPushDown extends 
PlanTestBase {
     testHelper(query, scan, true);
   }
 
+  /**
+    * As reported in DRILL-8057, even queries against INFORMATION_SCHEMA that
+    * included a filter that could be pushed down would be penalised by eager
+    * schema registration executed on every enabled storage plugin instance.
+    * This resulted in slow or even failed INFORMATION_SCHEMA schema queries
+    * when some storage plugin was unresponsive, even if it was not needed for
+    * the query being run.  This test checks that lazy schema registration is
+    * working by installing a mock storage plugin to act as a canary.
+    */
+  @Test
+  public void testLazySchemaRegistration() throws Exception {
+    ClusterMockStorageFixture cluster = 
ClusterFixture.builder(dirTestWatcher).buildCustomMockStorage();
+    // This mock storage plugin throws an exception from registerSchemas which
+    // would have been enough to correctly make this test fail were it not that
+    // the exception gets swallowed by StoragePluginRegistryImpl so we need to
+    // inspect a counter instead.
+    cluster.insertMockStorage("registerSchema canary", true);
+    ClientFixture client = cluster.clientFixture();
+
+    final String query = "SELECT * FROM INFORMATION_SCHEMA.`TABLES` WHERE 
TABLE_SCHEMA='INFORMATION_SCHEMA'";
+    client.queryBuilder().sql(query).run();
+    StoragePluginRegistry spr = cluster.drillbit().getContext().getStorage();
+    MockBreakageStorage mbs = (MockBreakageStorage) 
spr.getPlugin("registerSchema canary");
+
+    // no attempt to register schemas on the canary should have been made
+    assertEquals(0, mbs.registerAttemptCount);
+  }
+
   private void testHelper(final String query, String filterInScan, boolean 
filterPrelExpected) throws Exception {
     String[] expectedPatterns;
     String[] excludedPatterns;

Reply via email to