Re: [PR] DRILL-8543: Add Support for Materialized Views (drill)

via GitHub Mon, 22 Jun 2026 14:24:04 -0700


cgivre commented on code in PR #3036:
URL: https://github.com/apache/drill/pull/3036#discussion_r3455525536



##########
exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java:
##########
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.dotdrill;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+
+/**
+ * Represents a materialized view definition stored as a JSON file with
+ * .materialized_view.drill extension. The actual data is stored separately
+ * in Parquet format in the workspace directory.
+ */
+@JsonTypeName("materialized_view")
+public class MaterializedView {
+
+  /**
+   * Represents the refresh status of the materialized view.
+   */
+  public enum RefreshStatus {
+    /** The materialized view data is complete and up-to-date with its 
definition */
+    COMPLETE,
+    /** The materialized view data needs to be refreshed */
+    INCOMPLETE
+  }
+
+  private final String name;
+  private String sql;
+  private List<View.Field> fields;
+
+  /** Current schema when materialized view is created (not the schema to 
which view belongs to) */
+  private List<String> workspaceSchemaPath;
+
+  /** The relative path where the materialized data is stored (typically the 
view name) */
+  @JsonInclude(Include.NON_NULL)
+  private String dataStoragePath;

Review Comment:
   Good catch — these were inconsistent. The data is physically written to 
`{name}_mv_data`, so that's the pattern we keep. `dataStoragePath` now defaults 
to `name + DATA_DIR_SUFFIX` (new constant) and is the single source of truth — 
every reader/writer goes through `getDataStoragePath()` instead of re-deriving 
the suffix. The stray `setDataStoragePath(viewName)` that stored the wrong 
value is gone.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java:
##########
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.dotdrill;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+
+/**
+ * Represents a materialized view definition stored as a JSON file with
+ * .materialized_view.drill extension. The actual data is stored separately
+ * in Parquet format in the workspace directory.
+ */
+@JsonTypeName("materialized_view")
+public class MaterializedView {
+
+  /**
+   * Represents the refresh status of the materialized view.
+   */
+  public enum RefreshStatus {
+    /** The materialized view data is complete and up-to-date with its 
definition */
+    COMPLETE,
+    /** The materialized view data needs to be refreshed */
+    INCOMPLETE
+  }
+
+  private final String name;
+  private String sql;
+  private List<View.Field> fields;
+
+  /** Current schema when materialized view is created (not the schema to 
which view belongs to) */
+  private List<String> workspaceSchemaPath;
+
+  /** The relative path where the materialized data is stored (typically the 
view name) */
+  @JsonInclude(Include.NON_NULL)
+  private String dataStoragePath;
+
+  /** Timestamp of the last successful refresh in milliseconds since epoch */
+  @JsonInclude(Include.NON_NULL)
+  private Long lastRefreshTime;
+
+  /** Current refresh status of the materialized view */
+  @JsonInclude(Include.NON_NULL)
+  private RefreshStatus refreshStatus;
+
+  public MaterializedView(String name, String sql, RelDataType rowType, 
List<String> workspaceSchemaPath) {
+    this(name,
+        sql,
+        rowType.getFieldList().stream()
+            .map(f -> new View.Field(f.getName(), f.getType()))
+            .collect(Collectors.toList()),
+        workspaceSchemaPath,
+        name,  // data storage path defaults to view name

Review Comment:
   Settled on `{name}_mv_data` (where the data is actually written). The 
default is now `name + DATA_DIR_SUFFIX` and all call sites use 
`getDataStoragePath()`.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java:
##########
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.plan.RelOptTable.ToRelContext;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.schema.Schema.TableType;
+import org.apache.calcite.schema.Statistic;
+import org.apache.calcite.schema.Statistics;
+import org.apache.calcite.schema.TranslatableTable;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.drill.exec.dotdrill.MaterializedView;
+import org.apache.drill.exec.ops.ViewExpansionContext;
+import org.apache.drill.exec.planner.sql.conversion.DrillViewExpander;
+
+/**
+ * Represents a materialized view in the Drill query planning.
+ * <p>
+ * A materialized view stores:
+ * <ul>
+ *   <li>Definition file (.materialized_view.drill) - JSON with name, SQL, 
schema info</li>
+ *   <li>Data directory ({name}_mv_data/) - Parquet files with pre-computed 
results</li>
+ * </ul>
+ * <p>
+ * <b>Behavior:</b>
+ * <ul>
+ *   <li>Before REFRESH: queries expand the SQL definition (like a view)</li>
+ *   <li>After REFRESH: queries scan from pre-computed Parquet data</li>
+ * </ul>
+ *
+ * @see org.apache.drill.exec.dotdrill.MaterializedView
+ */
+public class DrillMaterializedViewTable implements TranslatableTable, 
DrillViewInfoProvider {
+
+  private final MaterializedView materializedView;
+  private final String viewOwner;
+  private final ViewExpansionContext viewExpansionContext;
+  private final String workspaceLocation;
+
+  public DrillMaterializedViewTable(MaterializedView materializedView, String 
viewOwner,
+                                    ViewExpansionContext viewExpansionContext, 
String workspaceLocation) {
+    this.materializedView = materializedView;
+    this.viewOwner = viewOwner;
+    this.viewExpansionContext = viewExpansionContext;
+    this.workspaceLocation = workspaceLocation;
+  }
+
+  @Override
+  public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+    return materializedView.getRowType(typeFactory);
+  }
+
+  @Override
+  public Statistic getStatistic() {
+    return Statistics.UNKNOWN;
+  }
+
+  /**
+   * Converts this materialized view to a RelNode for query planning.
+   * <p>
+   * If the MV has been refreshed (data exists), scans from the pre-computed 
Parquet data.
+   * Otherwise, expands the SQL definition like a regular view.
+   */
+  @Override
+  public RelNode toRel(ToRelContext context, RelOptTable relOptTable) {
+    DrillViewExpander viewExpander = viewExpansionContext.getViewExpander();
+    ViewExpansionContext.ViewExpansionToken token = null;
+    try {
+      RelDataType rowType = relOptTable.getRowType();
+      RelNode rel;
+
+      // Check if materialized data exists (REFRESH has been called)
+      boolean hasData = materializedView.getRefreshStatus() == 
MaterializedView.RefreshStatus.COMPLETE;
+
+      // Build the SQL to execute - either scan data or expand definition
+      String sqlToExpand;
+      if (hasData) {
+        // Scan from the pre-computed data directory
+        sqlToExpand = buildDataScanSql();
+      } else {
+        // No data yet - expand the SQL definition like a view
+        sqlToExpand = materializedView.getSql();
+      }
+
+      // Always use the workspace schema path for context - needed for table 
resolution
+      List<String> schemaPath = materializedView.getWorkspaceSchemaPath();
+
+      if (viewExpansionContext.isImpersonationEnabled()) {
+        token = viewExpansionContext.reserveViewExpansionToken(viewOwner);
+        rel = viewExpander.expandView(sqlToExpand, token.getSchemaTree(), 
schemaPath).rel;
+      } else {
+        // When scanning data, pass null for rowType to let Parquet schema be 
inferred
+        // When expanding SQL definition, use the MV's row type
+        RelDataType typeHint = hasData ? null : rowType;
+        rel = viewExpander.expandView(typeHint, sqlToExpand, schemaPath, 
Collections.emptyList()).rel;
+      }
+
+      return rel;
+    } finally {
+      if (token != null) {
+        token.release();
+      }
+    }
+  }
+
+  /**
+   * Builds SQL to scan the materialized data directory.
+   * The data is stored in {workspace}/{mvName}_mv_data/ directory.
+   * We explicitly select the MV's columns to ensure proper schema matching.
+   */
+  private String buildDataScanSql() {
+    String dataTableName = materializedView.getName() + "_mv_data";

Review Comment:
   Done — `buildDataScanSql()` now uses `materializedView.getDataStoragePath()`.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java:
##########
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.plan.RelOptTable.ToRelContext;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.schema.Schema.TableType;
+import org.apache.calcite.schema.Statistic;
+import org.apache.calcite.schema.Statistics;
+import org.apache.calcite.schema.TranslatableTable;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.drill.exec.dotdrill.MaterializedView;
+import org.apache.drill.exec.ops.ViewExpansionContext;
+import org.apache.drill.exec.planner.sql.conversion.DrillViewExpander;
+
+/**
+ * Represents a materialized view in the Drill query planning.
+ * <p>
+ * A materialized view stores:
+ * <ul>
+ *   <li>Definition file (.materialized_view.drill) - JSON with name, SQL, 
schema info</li>
+ *   <li>Data directory ({name}_mv_data/) - Parquet files with pre-computed 
results</li>
+ * </ul>
+ * <p>
+ * <b>Behavior:</b>
+ * <ul>
+ *   <li>Before REFRESH: queries expand the SQL definition (like a view)</li>
+ *   <li>After REFRESH: queries scan from pre-computed Parquet data</li>
+ * </ul>
+ *
+ * @see org.apache.drill.exec.dotdrill.MaterializedView
+ */
+public class DrillMaterializedViewTable implements TranslatableTable, 
DrillViewInfoProvider {
+
+  private final MaterializedView materializedView;
+  private final String viewOwner;
+  private final ViewExpansionContext viewExpansionContext;
+  private final String workspaceLocation;
+
+  public DrillMaterializedViewTable(MaterializedView materializedView, String 
viewOwner,
+                                    ViewExpansionContext viewExpansionContext, 
String workspaceLocation) {
+    this.materializedView = materializedView;
+    this.viewOwner = viewOwner;
+    this.viewExpansionContext = viewExpansionContext;
+    this.workspaceLocation = workspaceLocation;
+  }
+
+  @Override
+  public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+    return materializedView.getRowType(typeFactory);
+  }
+
+  @Override
+  public Statistic getStatistic() {
+    return Statistics.UNKNOWN;
+  }
+
+  /**
+   * Converts this materialized view to a RelNode for query planning.
+   * <p>
+   * If the MV has been refreshed (data exists), scans from the pre-computed 
Parquet data.
+   * Otherwise, expands the SQL definition like a regular view.
+   */
+  @Override
+  public RelNode toRel(ToRelContext context, RelOptTable relOptTable) {
+    DrillViewExpander viewExpander = viewExpansionContext.getViewExpander();
+    ViewExpansionContext.ViewExpansionToken token = null;
+    try {
+      RelDataType rowType = relOptTable.getRowType();
+      RelNode rel;
+
+      // Check if materialized data exists (REFRESH has been called)
+      boolean hasData = materializedView.getRefreshStatus() == 
MaterializedView.RefreshStatus.COMPLETE;
+
+      // Build the SQL to execute - either scan data or expand definition
+      String sqlToExpand;
+      if (hasData) {
+        // Scan from the pre-computed data directory
+        sqlToExpand = buildDataScanSql();
+      } else {
+        // No data yet - expand the SQL definition like a view
+        sqlToExpand = materializedView.getSql();
+      }
+
+      // Always use the workspace schema path for context - needed for table 
resolution
+      List<String> schemaPath = materializedView.getWorkspaceSchemaPath();
+
+      if (viewExpansionContext.isImpersonationEnabled()) {
+        token = viewExpansionContext.reserveViewExpansionToken(viewOwner);
+        rel = viewExpander.expandView(sqlToExpand, token.getSchemaTree(), 
schemaPath).rel;
+      } else {
+        // When scanning data, pass null for rowType to let Parquet schema be 
inferred
+        // When expanding SQL definition, use the MV's row type
+        RelDataType typeHint = hasData ? null : rowType;
+        rel = viewExpander.expandView(typeHint, sqlToExpand, schemaPath, 
Collections.emptyList()).rel;
+      }
+
+      return rel;
+    } finally {
+      if (token != null) {
+        token.release();
+      }
+    }
+  }
+
+  /**
+   * Builds SQL to scan the materialized data directory.
+   * The data is stored in {workspace}/{mvName}_mv_data/ directory.
+   * We explicitly select the MV's columns to ensure proper schema matching.
+   */
+  private String buildDataScanSql() {
+    String dataTableName = materializedView.getName() + "_mv_data";
+
+    // Build explicit column list from the MV's field definitions
+    List<String> fieldNames = materializedView.getFields().stream()
+        .map(f -> f.getName())
+        .collect(java.util.stream.Collectors.toList());
+    if (fieldNames.isEmpty()) {
+      // Fallback to SELECT * if no fields defined (shouldn't happen for 
non-dynamic MVs)
+      return "SELECT * FROM `" + dataTableName + "`";

Review Comment:
   Fixed. Added `quoteIdentifier(Quoting, id)` which wraps using the session's 
configured quoting char (escaping embedded quotes); the session `Quoting` is 
threaded in from `WorkspaceSchemaFactory`. On the workspace-name point: this 
SQL is expanded against `materializedView.getWorkspaceSchemaPath()`, so the 
table is already resolved within the correct workspace and doesn't need 
re-qualifying here.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java:
##########
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.plan.RelOptTable.ToRelContext;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.schema.Schema.TableType;
+import org.apache.calcite.schema.Statistic;
+import org.apache.calcite.schema.Statistics;
+import org.apache.calcite.schema.TranslatableTable;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.drill.exec.dotdrill.MaterializedView;
+import org.apache.drill.exec.ops.ViewExpansionContext;
+import org.apache.drill.exec.planner.sql.conversion.DrillViewExpander;
+
+/**
+ * Represents a materialized view in the Drill query planning.
+ * <p>
+ * A materialized view stores:
+ * <ul>
+ *   <li>Definition file (.materialized_view.drill) - JSON with name, SQL, 
schema info</li>
+ *   <li>Data directory ({name}_mv_data/) - Parquet files with pre-computed 
results</li>
+ * </ul>
+ * <p>
+ * <b>Behavior:</b>
+ * <ul>
+ *   <li>Before REFRESH: queries expand the SQL definition (like a view)</li>
+ *   <li>After REFRESH: queries scan from pre-computed Parquet data</li>
+ * </ul>
+ *
+ * @see org.apache.drill.exec.dotdrill.MaterializedView
+ */
+public class DrillMaterializedViewTable implements TranslatableTable, 
DrillViewInfoProvider {
+
+  private final MaterializedView materializedView;
+  private final String viewOwner;
+  private final ViewExpansionContext viewExpansionContext;
+  private final String workspaceLocation;
+
+  public DrillMaterializedViewTable(MaterializedView materializedView, String 
viewOwner,
+                                    ViewExpansionContext viewExpansionContext, 
String workspaceLocation) {
+    this.materializedView = materializedView;
+    this.viewOwner = viewOwner;
+    this.viewExpansionContext = viewExpansionContext;
+    this.workspaceLocation = workspaceLocation;
+  }
+
+  @Override
+  public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+    return materializedView.getRowType(typeFactory);
+  }
+
+  @Override
+  public Statistic getStatistic() {
+    return Statistics.UNKNOWN;
+  }
+
+  /**
+   * Converts this materialized view to a RelNode for query planning.
+   * <p>
+   * If the MV has been refreshed (data exists), scans from the pre-computed 
Parquet data.
+   * Otherwise, expands the SQL definition like a regular view.
+   */
+  @Override
+  public RelNode toRel(ToRelContext context, RelOptTable relOptTable) {
+    DrillViewExpander viewExpander = viewExpansionContext.getViewExpander();
+    ViewExpansionContext.ViewExpansionToken token = null;
+    try {
+      RelDataType rowType = relOptTable.getRowType();
+      RelNode rel;
+
+      // Check if materialized data exists (REFRESH has been called)
+      boolean hasData = materializedView.getRefreshStatus() == 
MaterializedView.RefreshStatus.COMPLETE;
+
+      // Build the SQL to execute - either scan data or expand definition
+      String sqlToExpand;
+      if (hasData) {
+        // Scan from the pre-computed data directory
+        sqlToExpand = buildDataScanSql();
+      } else {
+        // No data yet - expand the SQL definition like a view
+        sqlToExpand = materializedView.getSql();
+      }
+
+      // Always use the workspace schema path for context - needed for table 
resolution
+      List<String> schemaPath = materializedView.getWorkspaceSchemaPath();
+
+      if (viewExpansionContext.isImpersonationEnabled()) {
+        token = viewExpansionContext.reserveViewExpansionToken(viewOwner);
+        rel = viewExpander.expandView(sqlToExpand, token.getSchemaTree(), 
schemaPath).rel;
+      } else {
+        // When scanning data, pass null for rowType to let Parquet schema be 
inferred
+        // When expanding SQL definition, use the MV's row type
+        RelDataType typeHint = hasData ? null : rowType;
+        rel = viewExpander.expandView(typeHint, sqlToExpand, schemaPath, 
Collections.emptyList()).rel;
+      }
+
+      return rel;
+    } finally {
+      if (token != null) {
+        token.release();
+      }
+    }
+  }
+
+  /**
+   * Builds SQL to scan the materialized data directory.
+   * The data is stored in {workspace}/{mvName}_mv_data/ directory.
+   * We explicitly select the MV's columns to ensure proper schema matching.
+   */
+  private String buildDataScanSql() {
+    String dataTableName = materializedView.getName() + "_mv_data";
+
+    // Build explicit column list from the MV's field definitions
+    List<String> fieldNames = materializedView.getFields().stream()
+        .map(f -> f.getName())
+        .collect(java.util.stream.Collectors.toList());
+    if (fieldNames.isEmpty()) {
+      // Fallback to SELECT * if no fields defined (shouldn't happen for 
non-dynamic MVs)
+      return "SELECT * FROM `" + dataTableName + "`";
+    }
+
+    StringBuilder sql = new StringBuilder("SELECT ");
+    for (int i = 0; i < fieldNames.size(); i++) {
+      if (i > 0) {
+        sql.append(", ");
+      }
+      sql.append("`").append(fieldNames.get(i)).append("`");
+    }
+    sql.append(" FROM `").append(dataTableName).append("`");

Review Comment:
   Same fix — now quoted via `quoteIdentifier(...)` using the session quoting 
character, so it no longer breaks under double-quote quoting.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java:
##########
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.plan.RelOptMaterializations;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelRoot;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.Pair;
+import org.apache.drill.exec.dotdrill.MaterializedView;
+import org.apache.drill.exec.ops.QueryContext;
+import org.apache.drill.exec.planner.sql.conversion.SqlConverter;
+import org.apache.drill.exec.store.AbstractSchema;
+import org.apache.drill.exec.store.StoragePlugin;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.StoragePluginRegistry.PluginException;
+import org.apache.drill.exec.store.dfs.FileSystemPlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class for materialized view query rewriting.
+ * <p>
+ * When enabled via planner.enable_materialized_view_rewrite, this class 
attempts
+ * to rewrite queries to use materialized views when beneficial.
+ * <p>
+ * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which
+ * normalizes both the query and MV definitions before performing structural
+ * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}.
+ * <p>
+ * Materialized views are discovered by iterating over enabled file-based
+ * storage plugins (the only plugin type that supports MVs) and force-loading
+ * their schemas to find .materialized_view.drill files.
+ */
+public class MaterializedViewRewriter {
+  private static final Logger logger = 
LoggerFactory.getLogger(MaterializedViewRewriter.class);
+
+  private final QueryContext context;
+  private final SchemaPlus rootSchema;
+  private final SqlConverter sqlConverter;
+
+  public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, 
SqlConverter sqlConverter) {
+    this.context = context;
+    this.rootSchema = rootSchema;
+    this.sqlConverter = sqlConverter;
+  }
+
+  /**
+   * Attempts to rewrite the given RelNode to use a materialized view.
+   *
+   * @param queryRel the query plan to potentially rewrite
+   * @return the rewritten plan using an MV, or the original plan if no 
rewrite is possible
+   */
+  public RelNode rewrite(RelNode queryRel) {
+    if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) {
+      return queryRel;
+    }
+
+    // Find all available materialized views that have been refreshed

Review Comment:
   Right — reworded the comment; the unrefreshed candidates are filtered out a 
few lines below before building materializations.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java:
##########
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.plan.RelOptMaterializations;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelRoot;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.Pair;
+import org.apache.drill.exec.dotdrill.MaterializedView;
+import org.apache.drill.exec.ops.QueryContext;
+import org.apache.drill.exec.planner.sql.conversion.SqlConverter;
+import org.apache.drill.exec.store.AbstractSchema;
+import org.apache.drill.exec.store.StoragePlugin;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.StoragePluginRegistry.PluginException;
+import org.apache.drill.exec.store.dfs.FileSystemPlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class for materialized view query rewriting.
+ * <p>
+ * When enabled via planner.enable_materialized_view_rewrite, this class 
attempts
+ * to rewrite queries to use materialized views when beneficial.
+ * <p>
+ * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which
+ * normalizes both the query and MV definitions before performing structural
+ * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}.
+ * <p>
+ * Materialized views are discovered by iterating over enabled file-based
+ * storage plugins (the only plugin type that supports MVs) and force-loading
+ * their schemas to find .materialized_view.drill files.
+ */
+public class MaterializedViewRewriter {
+  private static final Logger logger = 
LoggerFactory.getLogger(MaterializedViewRewriter.class);
+
+  private final QueryContext context;
+  private final SchemaPlus rootSchema;
+  private final SqlConverter sqlConverter;
+
+  public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, 
SqlConverter sqlConverter) {
+    this.context = context;
+    this.rootSchema = rootSchema;
+    this.sqlConverter = sqlConverter;
+  }
+
+  /**
+   * Attempts to rewrite the given RelNode to use a materialized view.
+   *
+   * @param queryRel the query plan to potentially rewrite
+   * @return the rewritten plan using an MV, or the original plan if no 
rewrite is possible
+   */
+  public RelNode rewrite(RelNode queryRel) {
+    if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) {
+      return queryRel;
+    }
+
+    // Find all available materialized views that have been refreshed
+    List<MaterializedViewCandidate> candidates = 
findCandidateMaterializedViews();
+
+    if (candidates.isEmpty()) {
+      logger.debug("No refreshed materialized views available for rewriting");
+      return queryRel;
+    }

Review Comment:
   Fixed the debug log wording to not imply the candidates are all refreshed.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java:
##########
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.plan.RelOptMaterializations;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelRoot;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.Pair;
+import org.apache.drill.exec.dotdrill.MaterializedView;
+import org.apache.drill.exec.ops.QueryContext;
+import org.apache.drill.exec.planner.sql.conversion.SqlConverter;
+import org.apache.drill.exec.store.AbstractSchema;
+import org.apache.drill.exec.store.StoragePlugin;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.StoragePluginRegistry.PluginException;
+import org.apache.drill.exec.store.dfs.FileSystemPlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class for materialized view query rewriting.
+ * <p>
+ * When enabled via planner.enable_materialized_view_rewrite, this class 
attempts
+ * to rewrite queries to use materialized views when beneficial.
+ * <p>
+ * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which
+ * normalizes both the query and MV definitions before performing structural
+ * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}.
+ * <p>
+ * Materialized views are discovered by iterating over enabled file-based
+ * storage plugins (the only plugin type that supports MVs) and force-loading
+ * their schemas to find .materialized_view.drill files.
+ */
+public class MaterializedViewRewriter {
+  private static final Logger logger = 
LoggerFactory.getLogger(MaterializedViewRewriter.class);
+
+  private final QueryContext context;
+  private final SchemaPlus rootSchema;
+  private final SqlConverter sqlConverter;
+
+  public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, 
SqlConverter sqlConverter) {
+    this.context = context;
+    this.rootSchema = rootSchema;
+    this.sqlConverter = sqlConverter;
+  }
+
+  /**
+   * Attempts to rewrite the given RelNode to use a materialized view.
+   *
+   * @param queryRel the query plan to potentially rewrite
+   * @return the rewritten plan using an MV, or the original plan if no 
rewrite is possible
+   */
+  public RelNode rewrite(RelNode queryRel) {
+    if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) {
+      return queryRel;
+    }
+
+    // Find all available materialized views that have been refreshed
+    List<MaterializedViewCandidate> candidates = 
findCandidateMaterializedViews();
+
+    if (candidates.isEmpty()) {
+      logger.debug("No refreshed materialized views available for rewriting");
+      return queryRel;
+    }
+
+    logger.debug("Found {} materialized view candidates for potential 
rewriting", candidates.size());
+
+    // Build Calcite RelOptMaterialization objects for each refreshed candidate
+    List<RelOptMaterialization> materializations = new ArrayList<>();
+    for (MaterializedViewCandidate candidate : candidates) {
+      if (!candidate.isRefreshed()) {
+        logger.debug("Skipping MV {} - not refreshed", candidate.getName());
+        continue;
+      }
+
+      try {
+        RelOptMaterialization mat = buildMaterialization(candidate);
+        if (mat != null) {
+          materializations.add(mat);
+        }
+      } catch (Exception e) {
+        logger.debug("Failed to build materialization for MV {}: {}", 
candidate.getName(), e.getMessage());
+      }
+    }
+
+    if (materializations.isEmpty()) {
+      logger.debug("No valid materializations could be built");
+      return queryRel;
+    }
+
+    // Use Calcite's materialized view matching API which normalizes both the
+    // query and MV definitions (trimming unused fields, converting 
Filter/Project
+    // to Calc, merging, etc.) before performing structural matching.
+    try {
+      List<Pair<RelNode, List<RelOptMaterialization>>> results =
+          RelOptMaterializations.useMaterializedViews(queryRel, 
materializations);
+
+      if (!results.isEmpty()) {
+        RelNode rewritten = results.get(0).left;
+        if (logger.isInfoEnabled()) {
+          List<RelOptMaterialization> usedMVs = results.get(0).right;
+          logger.info("Query rewritten to use materialized view(s): {}",
+              !usedMVs.isEmpty() ? usedMVs.get(0).qualifiedTableName : 
"unknown");
+        }
+        return rewritten;
+      }
+    } catch (Exception e) {
+      logger.debug("Materialized view rewriting failed: {}", e.getMessage());
+    }
+
+    logger.debug("No materialized view matched the query");
+    return queryRel;
+  }
+
+  /**
+   * Builds a Calcite {@link RelOptMaterialization} for a candidate MV.
+   */
+  private RelOptMaterialization buildMaterialization(MaterializedViewCandidate 
candidate) {
+    RelNode mvQueryRel = parseMvSql(candidate);
+    if (mvQueryRel == null) {
+      return null;
+    }
+
+    RelNode mvTableRel = buildMvScanRel(candidate);
+    if (mvTableRel == null) {
+      return null;
+    }
+
+    List<String> qualifiedTableName = java.util.Arrays.asList(
+        candidate.getSchemaPath().split("\\."));
+
+    return new RelOptMaterialization(
+        mvTableRel,
+        mvQueryRel,
+        null,
+        qualifiedTableName
+    );
+  }
+
+  /**
+   * Parses the MV's SQL definition into a RelNode.
+   */
+  private RelNode parseMvSql(MaterializedViewCandidate candidate) {
+    try {
+      String mvSql = candidate.getSql();
+      org.apache.calcite.sql.SqlNode parsedNode = sqlConverter.parse(mvSql);
+      org.apache.calcite.sql.SqlNode validatedNode = 
sqlConverter.validate(parsedNode);

Review Comment:
   Yes — intentional. The rewrite builds RelNodes against the root schema, so 
the MV data table must be referenced by its schema-qualified name; a 
bare/relative name wouldn't resolve at this stage.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java:
##########
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.plan.RelOptMaterializations;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelRoot;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.Pair;
+import org.apache.drill.exec.dotdrill.MaterializedView;
+import org.apache.drill.exec.ops.QueryContext;
+import org.apache.drill.exec.planner.sql.conversion.SqlConverter;
+import org.apache.drill.exec.store.AbstractSchema;
+import org.apache.drill.exec.store.StoragePlugin;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.StoragePluginRegistry.PluginException;
+import org.apache.drill.exec.store.dfs.FileSystemPlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class for materialized view query rewriting.
+ * <p>
+ * When enabled via planner.enable_materialized_view_rewrite, this class 
attempts
+ * to rewrite queries to use materialized views when beneficial.
+ * <p>
+ * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which
+ * normalizes both the query and MV definitions before performing structural
+ * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}.
+ * <p>
+ * Materialized views are discovered by iterating over enabled file-based
+ * storage plugins (the only plugin type that supports MVs) and force-loading
+ * their schemas to find .materialized_view.drill files.
+ */
+public class MaterializedViewRewriter {
+  private static final Logger logger = 
LoggerFactory.getLogger(MaterializedViewRewriter.class);
+
+  private final QueryContext context;
+  private final SchemaPlus rootSchema;
+  private final SqlConverter sqlConverter;
+
+  public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, 
SqlConverter sqlConverter) {
+    this.context = context;
+    this.rootSchema = rootSchema;
+    this.sqlConverter = sqlConverter;
+  }
+
+  /**
+   * Attempts to rewrite the given RelNode to use a materialized view.
+   *
+   * @param queryRel the query plan to potentially rewrite
+   * @return the rewritten plan using an MV, or the original plan if no 
rewrite is possible
+   */
+  public RelNode rewrite(RelNode queryRel) {
+    if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) {
+      return queryRel;
+    }
+
+    // Find all available materialized views that have been refreshed
+    List<MaterializedViewCandidate> candidates = 
findCandidateMaterializedViews();
+
+    if (candidates.isEmpty()) {
+      logger.debug("No refreshed materialized views available for rewriting");
+      return queryRel;
+    }
+
+    logger.debug("Found {} materialized view candidates for potential 
rewriting", candidates.size());
+
+    // Build Calcite RelOptMaterialization objects for each refreshed candidate
+    List<RelOptMaterialization> materializations = new ArrayList<>();
+    for (MaterializedViewCandidate candidate : candidates) {
+      if (!candidate.isRefreshed()) {
+        logger.debug("Skipping MV {} - not refreshed", candidate.getName());
+        continue;
+      }
+
+      try {
+        RelOptMaterialization mat = buildMaterialization(candidate);
+        if (mat != null) {
+          materializations.add(mat);
+        }
+      } catch (Exception e) {
+        logger.debug("Failed to build materialization for MV {}: {}", 
candidate.getName(), e.getMessage());
+      }
+    }
+
+    if (materializations.isEmpty()) {
+      logger.debug("No valid materializations could be built");
+      return queryRel;
+    }
+
+    // Use Calcite's materialized view matching API which normalizes both the
+    // query and MV definitions (trimming unused fields, converting 
Filter/Project
+    // to Calc, merging, etc.) before performing structural matching.
+    try {
+      List<Pair<RelNode, List<RelOptMaterialization>>> results =
+          RelOptMaterializations.useMaterializedViews(queryRel, 
materializations);
+
+      if (!results.isEmpty()) {
+        RelNode rewritten = results.get(0).left;
+        if (logger.isInfoEnabled()) {
+          List<RelOptMaterialization> usedMVs = results.get(0).right;
+          logger.info("Query rewritten to use materialized view(s): {}",
+              !usedMVs.isEmpty() ? usedMVs.get(0).qualifiedTableName : 
"unknown");
+        }
+        return rewritten;
+      }
+    } catch (Exception e) {
+      logger.debug("Materialized view rewriting failed: {}", e.getMessage());
+    }
+
+    logger.debug("No materialized view matched the query");
+    return queryRel;
+  }
+
+  /**
+   * Builds a Calcite {@link RelOptMaterialization} for a candidate MV.
+   */
+  private RelOptMaterialization buildMaterialization(MaterializedViewCandidate 
candidate) {
+    RelNode mvQueryRel = parseMvSql(candidate);
+    if (mvQueryRel == null) {
+      return null;
+    }
+
+    RelNode mvTableRel = buildMvScanRel(candidate);
+    if (mvTableRel == null) {
+      return null;
+    }
+
+    List<String> qualifiedTableName = java.util.Arrays.asList(

Review Comment:
   Intentional — same reason: the `qualifiedTableName` for 
`RelOptMaterialization` is split from the full schema path because the rewrite 
operates against the root schema.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] DRILL-8543: Add Support for Materialized Views (drill)

Reply via email to