cgivre commented on code in PR #3036: URL: https://github.com/apache/drill/pull/3036#discussion_r3455525536
########## exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java: ########## @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.dotdrill; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; + +/** + * Represents a materialized view definition stored as a JSON file with + * .materialized_view.drill extension. The actual data is stored separately + * in Parquet format in the workspace directory. + */ +@JsonTypeName("materialized_view") +public class MaterializedView { + + /** + * Represents the refresh status of the materialized view. + */ + public enum RefreshStatus { + /** The materialized view data is complete and up-to-date with its definition */ + COMPLETE, + /** The materialized view data needs to be refreshed */ + INCOMPLETE + } + + private final String name; + private String sql; + private List<View.Field> fields; + + /** Current schema when materialized view is created (not the schema to which view belongs to) */ + private List<String> workspaceSchemaPath; + + /** The relative path where the materialized data is stored (typically the view name) */ + @JsonInclude(Include.NON_NULL) + private String dataStoragePath; Review Comment: Good catch — these were inconsistent. The data is physically written to `{name}_mv_data`, so that's the pattern we keep. `dataStoragePath` now defaults to `name + DATA_DIR_SUFFIX` (new constant) and is the single source of truth — every reader/writer goes through `getDataStoragePath()` instead of re-deriving the suffix. The stray `setDataStoragePath(viewName)` that stored the wrong value is gone. ########## exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java: ########## @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.dotdrill; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; + +/** + * Represents a materialized view definition stored as a JSON file with + * .materialized_view.drill extension. The actual data is stored separately + * in Parquet format in the workspace directory. + */ +@JsonTypeName("materialized_view") +public class MaterializedView { + + /** + * Represents the refresh status of the materialized view. + */ + public enum RefreshStatus { + /** The materialized view data is complete and up-to-date with its definition */ + COMPLETE, + /** The materialized view data needs to be refreshed */ + INCOMPLETE + } + + private final String name; + private String sql; + private List<View.Field> fields; + + /** Current schema when materialized view is created (not the schema to which view belongs to) */ + private List<String> workspaceSchemaPath; + + /** The relative path where the materialized data is stored (typically the view name) */ + @JsonInclude(Include.NON_NULL) + private String dataStoragePath; + + /** Timestamp of the last successful refresh in milliseconds since epoch */ + @JsonInclude(Include.NON_NULL) + private Long lastRefreshTime; + + /** Current refresh status of the materialized view */ + @JsonInclude(Include.NON_NULL) + private RefreshStatus refreshStatus; + + public MaterializedView(String name, String sql, RelDataType rowType, List<String> workspaceSchemaPath) { + this(name, + sql, + rowType.getFieldList().stream() + .map(f -> new View.Field(f.getName(), f.getType())) + .collect(Collectors.toList()), + workspaceSchemaPath, + name, // data storage path defaults to view name Review Comment: Settled on `{name}_mv_data` (where the data is actually written). The default is now `name + DATA_DIR_SUFFIX` and all call sites use `getDataStoragePath()`. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java: ########## @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelOptTable.ToRelContext; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.Schema.TableType; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.schema.TranslatableTable; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.ViewExpansionContext; +import org.apache.drill.exec.planner.sql.conversion.DrillViewExpander; + +/** + * Represents a materialized view in the Drill query planning. + * <p> + * A materialized view stores: + * <ul> + * <li>Definition file (.materialized_view.drill) - JSON with name, SQL, schema info</li> + * <li>Data directory ({name}_mv_data/) - Parquet files with pre-computed results</li> + * </ul> + * <p> + * <b>Behavior:</b> + * <ul> + * <li>Before REFRESH: queries expand the SQL definition (like a view)</li> + * <li>After REFRESH: queries scan from pre-computed Parquet data</li> + * </ul> + * + * @see org.apache.drill.exec.dotdrill.MaterializedView + */ +public class DrillMaterializedViewTable implements TranslatableTable, DrillViewInfoProvider { + + private final MaterializedView materializedView; + private final String viewOwner; + private final ViewExpansionContext viewExpansionContext; + private final String workspaceLocation; + + public DrillMaterializedViewTable(MaterializedView materializedView, String viewOwner, + ViewExpansionContext viewExpansionContext, String workspaceLocation) { + this.materializedView = materializedView; + this.viewOwner = viewOwner; + this.viewExpansionContext = viewExpansionContext; + this.workspaceLocation = workspaceLocation; + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return materializedView.getRowType(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.UNKNOWN; + } + + /** + * Converts this materialized view to a RelNode for query planning. + * <p> + * If the MV has been refreshed (data exists), scans from the pre-computed Parquet data. + * Otherwise, expands the SQL definition like a regular view. + */ + @Override + public RelNode toRel(ToRelContext context, RelOptTable relOptTable) { + DrillViewExpander viewExpander = viewExpansionContext.getViewExpander(); + ViewExpansionContext.ViewExpansionToken token = null; + try { + RelDataType rowType = relOptTable.getRowType(); + RelNode rel; + + // Check if materialized data exists (REFRESH has been called) + boolean hasData = materializedView.getRefreshStatus() == MaterializedView.RefreshStatus.COMPLETE; + + // Build the SQL to execute - either scan data or expand definition + String sqlToExpand; + if (hasData) { + // Scan from the pre-computed data directory + sqlToExpand = buildDataScanSql(); + } else { + // No data yet - expand the SQL definition like a view + sqlToExpand = materializedView.getSql(); + } + + // Always use the workspace schema path for context - needed for table resolution + List<String> schemaPath = materializedView.getWorkspaceSchemaPath(); + + if (viewExpansionContext.isImpersonationEnabled()) { + token = viewExpansionContext.reserveViewExpansionToken(viewOwner); + rel = viewExpander.expandView(sqlToExpand, token.getSchemaTree(), schemaPath).rel; + } else { + // When scanning data, pass null for rowType to let Parquet schema be inferred + // When expanding SQL definition, use the MV's row type + RelDataType typeHint = hasData ? null : rowType; + rel = viewExpander.expandView(typeHint, sqlToExpand, schemaPath, Collections.emptyList()).rel; + } + + return rel; + } finally { + if (token != null) { + token.release(); + } + } + } + + /** + * Builds SQL to scan the materialized data directory. + * The data is stored in {workspace}/{mvName}_mv_data/ directory. + * We explicitly select the MV's columns to ensure proper schema matching. + */ + private String buildDataScanSql() { + String dataTableName = materializedView.getName() + "_mv_data"; Review Comment: Done — `buildDataScanSql()` now uses `materializedView.getDataStoragePath()`. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java: ########## @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelOptTable.ToRelContext; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.Schema.TableType; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.schema.TranslatableTable; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.ViewExpansionContext; +import org.apache.drill.exec.planner.sql.conversion.DrillViewExpander; + +/** + * Represents a materialized view in the Drill query planning. + * <p> + * A materialized view stores: + * <ul> + * <li>Definition file (.materialized_view.drill) - JSON with name, SQL, schema info</li> + * <li>Data directory ({name}_mv_data/) - Parquet files with pre-computed results</li> + * </ul> + * <p> + * <b>Behavior:</b> + * <ul> + * <li>Before REFRESH: queries expand the SQL definition (like a view)</li> + * <li>After REFRESH: queries scan from pre-computed Parquet data</li> + * </ul> + * + * @see org.apache.drill.exec.dotdrill.MaterializedView + */ +public class DrillMaterializedViewTable implements TranslatableTable, DrillViewInfoProvider { + + private final MaterializedView materializedView; + private final String viewOwner; + private final ViewExpansionContext viewExpansionContext; + private final String workspaceLocation; + + public DrillMaterializedViewTable(MaterializedView materializedView, String viewOwner, + ViewExpansionContext viewExpansionContext, String workspaceLocation) { + this.materializedView = materializedView; + this.viewOwner = viewOwner; + this.viewExpansionContext = viewExpansionContext; + this.workspaceLocation = workspaceLocation; + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return materializedView.getRowType(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.UNKNOWN; + } + + /** + * Converts this materialized view to a RelNode for query planning. + * <p> + * If the MV has been refreshed (data exists), scans from the pre-computed Parquet data. + * Otherwise, expands the SQL definition like a regular view. + */ + @Override + public RelNode toRel(ToRelContext context, RelOptTable relOptTable) { + DrillViewExpander viewExpander = viewExpansionContext.getViewExpander(); + ViewExpansionContext.ViewExpansionToken token = null; + try { + RelDataType rowType = relOptTable.getRowType(); + RelNode rel; + + // Check if materialized data exists (REFRESH has been called) + boolean hasData = materializedView.getRefreshStatus() == MaterializedView.RefreshStatus.COMPLETE; + + // Build the SQL to execute - either scan data or expand definition + String sqlToExpand; + if (hasData) { + // Scan from the pre-computed data directory + sqlToExpand = buildDataScanSql(); + } else { + // No data yet - expand the SQL definition like a view + sqlToExpand = materializedView.getSql(); + } + + // Always use the workspace schema path for context - needed for table resolution + List<String> schemaPath = materializedView.getWorkspaceSchemaPath(); + + if (viewExpansionContext.isImpersonationEnabled()) { + token = viewExpansionContext.reserveViewExpansionToken(viewOwner); + rel = viewExpander.expandView(sqlToExpand, token.getSchemaTree(), schemaPath).rel; + } else { + // When scanning data, pass null for rowType to let Parquet schema be inferred + // When expanding SQL definition, use the MV's row type + RelDataType typeHint = hasData ? null : rowType; + rel = viewExpander.expandView(typeHint, sqlToExpand, schemaPath, Collections.emptyList()).rel; + } + + return rel; + } finally { + if (token != null) { + token.release(); + } + } + } + + /** + * Builds SQL to scan the materialized data directory. + * The data is stored in {workspace}/{mvName}_mv_data/ directory. + * We explicitly select the MV's columns to ensure proper schema matching. + */ + private String buildDataScanSql() { + String dataTableName = materializedView.getName() + "_mv_data"; + + // Build explicit column list from the MV's field definitions + List<String> fieldNames = materializedView.getFields().stream() + .map(f -> f.getName()) + .collect(java.util.stream.Collectors.toList()); + if (fieldNames.isEmpty()) { + // Fallback to SELECT * if no fields defined (shouldn't happen for non-dynamic MVs) + return "SELECT * FROM `" + dataTableName + "`"; Review Comment: Fixed. Added `quoteIdentifier(Quoting, id)` which wraps using the session's configured quoting char (escaping embedded quotes); the session `Quoting` is threaded in from `WorkspaceSchemaFactory`. On the workspace-name point: this SQL is expanded against `materializedView.getWorkspaceSchemaPath()`, so the table is already resolved within the correct workspace and doesn't need re-qualifying here. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java: ########## @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelOptTable.ToRelContext; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.Schema.TableType; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.schema.TranslatableTable; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.ViewExpansionContext; +import org.apache.drill.exec.planner.sql.conversion.DrillViewExpander; + +/** + * Represents a materialized view in the Drill query planning. + * <p> + * A materialized view stores: + * <ul> + * <li>Definition file (.materialized_view.drill) - JSON with name, SQL, schema info</li> + * <li>Data directory ({name}_mv_data/) - Parquet files with pre-computed results</li> + * </ul> + * <p> + * <b>Behavior:</b> + * <ul> + * <li>Before REFRESH: queries expand the SQL definition (like a view)</li> + * <li>After REFRESH: queries scan from pre-computed Parquet data</li> + * </ul> + * + * @see org.apache.drill.exec.dotdrill.MaterializedView + */ +public class DrillMaterializedViewTable implements TranslatableTable, DrillViewInfoProvider { + + private final MaterializedView materializedView; + private final String viewOwner; + private final ViewExpansionContext viewExpansionContext; + private final String workspaceLocation; + + public DrillMaterializedViewTable(MaterializedView materializedView, String viewOwner, + ViewExpansionContext viewExpansionContext, String workspaceLocation) { + this.materializedView = materializedView; + this.viewOwner = viewOwner; + this.viewExpansionContext = viewExpansionContext; + this.workspaceLocation = workspaceLocation; + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return materializedView.getRowType(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.UNKNOWN; + } + + /** + * Converts this materialized view to a RelNode for query planning. + * <p> + * If the MV has been refreshed (data exists), scans from the pre-computed Parquet data. + * Otherwise, expands the SQL definition like a regular view. + */ + @Override + public RelNode toRel(ToRelContext context, RelOptTable relOptTable) { + DrillViewExpander viewExpander = viewExpansionContext.getViewExpander(); + ViewExpansionContext.ViewExpansionToken token = null; + try { + RelDataType rowType = relOptTable.getRowType(); + RelNode rel; + + // Check if materialized data exists (REFRESH has been called) + boolean hasData = materializedView.getRefreshStatus() == MaterializedView.RefreshStatus.COMPLETE; + + // Build the SQL to execute - either scan data or expand definition + String sqlToExpand; + if (hasData) { + // Scan from the pre-computed data directory + sqlToExpand = buildDataScanSql(); + } else { + // No data yet - expand the SQL definition like a view + sqlToExpand = materializedView.getSql(); + } + + // Always use the workspace schema path for context - needed for table resolution + List<String> schemaPath = materializedView.getWorkspaceSchemaPath(); + + if (viewExpansionContext.isImpersonationEnabled()) { + token = viewExpansionContext.reserveViewExpansionToken(viewOwner); + rel = viewExpander.expandView(sqlToExpand, token.getSchemaTree(), schemaPath).rel; + } else { + // When scanning data, pass null for rowType to let Parquet schema be inferred + // When expanding SQL definition, use the MV's row type + RelDataType typeHint = hasData ? null : rowType; + rel = viewExpander.expandView(typeHint, sqlToExpand, schemaPath, Collections.emptyList()).rel; + } + + return rel; + } finally { + if (token != null) { + token.release(); + } + } + } + + /** + * Builds SQL to scan the materialized data directory. + * The data is stored in {workspace}/{mvName}_mv_data/ directory. + * We explicitly select the MV's columns to ensure proper schema matching. + */ + private String buildDataScanSql() { + String dataTableName = materializedView.getName() + "_mv_data"; + + // Build explicit column list from the MV's field definitions + List<String> fieldNames = materializedView.getFields().stream() + .map(f -> f.getName()) + .collect(java.util.stream.Collectors.toList()); + if (fieldNames.isEmpty()) { + // Fallback to SELECT * if no fields defined (shouldn't happen for non-dynamic MVs) + return "SELECT * FROM `" + dataTableName + "`"; + } + + StringBuilder sql = new StringBuilder("SELECT "); + for (int i = 0; i < fieldNames.size(); i++) { + if (i > 0) { + sql.append(", "); + } + sql.append("`").append(fieldNames.get(i)).append("`"); + } + sql.append(" FROM `").append(dataTableName).append("`"); Review Comment: Same fix — now quoted via `quoteIdentifier(...)` using the session quoting character, so it no longer breaks under double-quote quoting. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java: ########## @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptMaterializations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.util.Pair; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.planner.sql.conversion.SqlConverter; +import org.apache.drill.exec.store.AbstractSchema; +import org.apache.drill.exec.store.StoragePlugin; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistry.PluginException; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for materialized view query rewriting. + * <p> + * When enabled via planner.enable_materialized_view_rewrite, this class attempts + * to rewrite queries to use materialized views when beneficial. + * <p> + * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which + * normalizes both the query and MV definitions before performing structural + * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}. + * <p> + * Materialized views are discovered by iterating over enabled file-based + * storage plugins (the only plugin type that supports MVs) and force-loading + * their schemas to find .materialized_view.drill files. + */ +public class MaterializedViewRewriter { + private static final Logger logger = LoggerFactory.getLogger(MaterializedViewRewriter.class); + + private final QueryContext context; + private final SchemaPlus rootSchema; + private final SqlConverter sqlConverter; + + public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, SqlConverter sqlConverter) { + this.context = context; + this.rootSchema = rootSchema; + this.sqlConverter = sqlConverter; + } + + /** + * Attempts to rewrite the given RelNode to use a materialized view. + * + * @param queryRel the query plan to potentially rewrite + * @return the rewritten plan using an MV, or the original plan if no rewrite is possible + */ + public RelNode rewrite(RelNode queryRel) { + if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { + return queryRel; + } + + // Find all available materialized views that have been refreshed Review Comment: Right — reworded the comment; the unrefreshed candidates are filtered out a few lines below before building materializations. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java: ########## @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptMaterializations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.util.Pair; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.planner.sql.conversion.SqlConverter; +import org.apache.drill.exec.store.AbstractSchema; +import org.apache.drill.exec.store.StoragePlugin; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistry.PluginException; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for materialized view query rewriting. + * <p> + * When enabled via planner.enable_materialized_view_rewrite, this class attempts + * to rewrite queries to use materialized views when beneficial. + * <p> + * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which + * normalizes both the query and MV definitions before performing structural + * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}. + * <p> + * Materialized views are discovered by iterating over enabled file-based + * storage plugins (the only plugin type that supports MVs) and force-loading + * their schemas to find .materialized_view.drill files. + */ +public class MaterializedViewRewriter { + private static final Logger logger = LoggerFactory.getLogger(MaterializedViewRewriter.class); + + private final QueryContext context; + private final SchemaPlus rootSchema; + private final SqlConverter sqlConverter; + + public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, SqlConverter sqlConverter) { + this.context = context; + this.rootSchema = rootSchema; + this.sqlConverter = sqlConverter; + } + + /** + * Attempts to rewrite the given RelNode to use a materialized view. + * + * @param queryRel the query plan to potentially rewrite + * @return the rewritten plan using an MV, or the original plan if no rewrite is possible + */ + public RelNode rewrite(RelNode queryRel) { + if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { + return queryRel; + } + + // Find all available materialized views that have been refreshed + List<MaterializedViewCandidate> candidates = findCandidateMaterializedViews(); + + if (candidates.isEmpty()) { + logger.debug("No refreshed materialized views available for rewriting"); + return queryRel; + } Review Comment: Fixed the debug log wording to not imply the candidates are all refreshed. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java: ########## @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptMaterializations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.util.Pair; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.planner.sql.conversion.SqlConverter; +import org.apache.drill.exec.store.AbstractSchema; +import org.apache.drill.exec.store.StoragePlugin; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistry.PluginException; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for materialized view query rewriting. + * <p> + * When enabled via planner.enable_materialized_view_rewrite, this class attempts + * to rewrite queries to use materialized views when beneficial. + * <p> + * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which + * normalizes both the query and MV definitions before performing structural + * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}. + * <p> + * Materialized views are discovered by iterating over enabled file-based + * storage plugins (the only plugin type that supports MVs) and force-loading + * their schemas to find .materialized_view.drill files. + */ +public class MaterializedViewRewriter { + private static final Logger logger = LoggerFactory.getLogger(MaterializedViewRewriter.class); + + private final QueryContext context; + private final SchemaPlus rootSchema; + private final SqlConverter sqlConverter; + + public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, SqlConverter sqlConverter) { + this.context = context; + this.rootSchema = rootSchema; + this.sqlConverter = sqlConverter; + } + + /** + * Attempts to rewrite the given RelNode to use a materialized view. + * + * @param queryRel the query plan to potentially rewrite + * @return the rewritten plan using an MV, or the original plan if no rewrite is possible + */ + public RelNode rewrite(RelNode queryRel) { + if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { + return queryRel; + } + + // Find all available materialized views that have been refreshed + List<MaterializedViewCandidate> candidates = findCandidateMaterializedViews(); + + if (candidates.isEmpty()) { + logger.debug("No refreshed materialized views available for rewriting"); + return queryRel; + } + + logger.debug("Found {} materialized view candidates for potential rewriting", candidates.size()); + + // Build Calcite RelOptMaterialization objects for each refreshed candidate + List<RelOptMaterialization> materializations = new ArrayList<>(); + for (MaterializedViewCandidate candidate : candidates) { + if (!candidate.isRefreshed()) { + logger.debug("Skipping MV {} - not refreshed", candidate.getName()); + continue; + } + + try { + RelOptMaterialization mat = buildMaterialization(candidate); + if (mat != null) { + materializations.add(mat); + } + } catch (Exception e) { + logger.debug("Failed to build materialization for MV {}: {}", candidate.getName(), e.getMessage()); + } + } + + if (materializations.isEmpty()) { + logger.debug("No valid materializations could be built"); + return queryRel; + } + + // Use Calcite's materialized view matching API which normalizes both the + // query and MV definitions (trimming unused fields, converting Filter/Project + // to Calc, merging, etc.) before performing structural matching. + try { + List<Pair<RelNode, List<RelOptMaterialization>>> results = + RelOptMaterializations.useMaterializedViews(queryRel, materializations); + + if (!results.isEmpty()) { + RelNode rewritten = results.get(0).left; + if (logger.isInfoEnabled()) { + List<RelOptMaterialization> usedMVs = results.get(0).right; + logger.info("Query rewritten to use materialized view(s): {}", + !usedMVs.isEmpty() ? usedMVs.get(0).qualifiedTableName : "unknown"); + } + return rewritten; + } + } catch (Exception e) { + logger.debug("Materialized view rewriting failed: {}", e.getMessage()); + } + + logger.debug("No materialized view matched the query"); + return queryRel; + } + + /** + * Builds a Calcite {@link RelOptMaterialization} for a candidate MV. + */ + private RelOptMaterialization buildMaterialization(MaterializedViewCandidate candidate) { + RelNode mvQueryRel = parseMvSql(candidate); + if (mvQueryRel == null) { + return null; + } + + RelNode mvTableRel = buildMvScanRel(candidate); + if (mvTableRel == null) { + return null; + } + + List<String> qualifiedTableName = java.util.Arrays.asList( + candidate.getSchemaPath().split("\\.")); + + return new RelOptMaterialization( + mvTableRel, + mvQueryRel, + null, + qualifiedTableName + ); + } + + /** + * Parses the MV's SQL definition into a RelNode. + */ + private RelNode parseMvSql(MaterializedViewCandidate candidate) { + try { + String mvSql = candidate.getSql(); + org.apache.calcite.sql.SqlNode parsedNode = sqlConverter.parse(mvSql); + org.apache.calcite.sql.SqlNode validatedNode = sqlConverter.validate(parsedNode); Review Comment: Yes — intentional. The rewrite builds RelNodes against the root schema, so the MV data table must be referenced by its schema-qualified name; a bare/relative name wouldn't resolve at this stage. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java: ########## @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptMaterializations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.util.Pair; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.planner.sql.conversion.SqlConverter; +import org.apache.drill.exec.store.AbstractSchema; +import org.apache.drill.exec.store.StoragePlugin; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistry.PluginException; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for materialized view query rewriting. + * <p> + * When enabled via planner.enable_materialized_view_rewrite, this class attempts + * to rewrite queries to use materialized views when beneficial. + * <p> + * Uses Calcite's {@link RelOptMaterializations#useMaterializedViews} API which + * normalizes both the query and MV definitions before performing structural + * matching via {@link org.apache.calcite.plan.SubstitutionVisitor}. + * <p> + * Materialized views are discovered by iterating over enabled file-based + * storage plugins (the only plugin type that supports MVs) and force-loading + * their schemas to find .materialized_view.drill files. + */ +public class MaterializedViewRewriter { + private static final Logger logger = LoggerFactory.getLogger(MaterializedViewRewriter.class); + + private final QueryContext context; + private final SchemaPlus rootSchema; + private final SqlConverter sqlConverter; + + public MaterializedViewRewriter(QueryContext context, SchemaPlus rootSchema, SqlConverter sqlConverter) { + this.context = context; + this.rootSchema = rootSchema; + this.sqlConverter = sqlConverter; + } + + /** + * Attempts to rewrite the given RelNode to use a materialized view. + * + * @param queryRel the query plan to potentially rewrite + * @return the rewritten plan using an MV, or the original plan if no rewrite is possible + */ + public RelNode rewrite(RelNode queryRel) { + if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { + return queryRel; + } + + // Find all available materialized views that have been refreshed + List<MaterializedViewCandidate> candidates = findCandidateMaterializedViews(); + + if (candidates.isEmpty()) { + logger.debug("No refreshed materialized views available for rewriting"); + return queryRel; + } + + logger.debug("Found {} materialized view candidates for potential rewriting", candidates.size()); + + // Build Calcite RelOptMaterialization objects for each refreshed candidate + List<RelOptMaterialization> materializations = new ArrayList<>(); + for (MaterializedViewCandidate candidate : candidates) { + if (!candidate.isRefreshed()) { + logger.debug("Skipping MV {} - not refreshed", candidate.getName()); + continue; + } + + try { + RelOptMaterialization mat = buildMaterialization(candidate); + if (mat != null) { + materializations.add(mat); + } + } catch (Exception e) { + logger.debug("Failed to build materialization for MV {}: {}", candidate.getName(), e.getMessage()); + } + } + + if (materializations.isEmpty()) { + logger.debug("No valid materializations could be built"); + return queryRel; + } + + // Use Calcite's materialized view matching API which normalizes both the + // query and MV definitions (trimming unused fields, converting Filter/Project + // to Calc, merging, etc.) before performing structural matching. + try { + List<Pair<RelNode, List<RelOptMaterialization>>> results = + RelOptMaterializations.useMaterializedViews(queryRel, materializations); + + if (!results.isEmpty()) { + RelNode rewritten = results.get(0).left; + if (logger.isInfoEnabled()) { + List<RelOptMaterialization> usedMVs = results.get(0).right; + logger.info("Query rewritten to use materialized view(s): {}", + !usedMVs.isEmpty() ? usedMVs.get(0).qualifiedTableName : "unknown"); + } + return rewritten; + } + } catch (Exception e) { + logger.debug("Materialized view rewriting failed: {}", e.getMessage()); + } + + logger.debug("No materialized view matched the query"); + return queryRel; + } + + /** + * Builds a Calcite {@link RelOptMaterialization} for a candidate MV. + */ + private RelOptMaterialization buildMaterialization(MaterializedViewCandidate candidate) { + RelNode mvQueryRel = parseMvSql(candidate); + if (mvQueryRel == null) { + return null; + } + + RelNode mvTableRel = buildMvScanRel(candidate); + if (mvTableRel == null) { + return null; + } + + List<String> qualifiedTableName = java.util.Arrays.asList( Review Comment: Intentional — same reason: the `qualifiedTableName` for `RelOptMaterialization` is split from the full schema path because the rewrite operates against the root schema. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
