Copilot commented on code in PR #6322: URL: https://github.com/apache/hive/pull/6322#discussion_r2818922401
########## itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestQueryRewrite.java: ########## @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriverSilently; + +public class TestQueryRewrite extends CompactorOnTezTest { + + private static final String DB = "default"; + private static final String TABLE1 = "t1"; + private static final String MV1 = "mat1"; + + private static final List<String> ORIGINAL_QUERY_PLAN = Arrays.asList( + "CBO PLAN:", + "HiveProject(a=[$0], b=[$1])", + " HiveFilter(condition=[>($0, 0)])", + " HiveTableScan(table=[[" + DB + ", " + TABLE1 + "]], table:alias=[" + TABLE1 + "])", + "" + ); + + @Override + public void setup() throws Exception { + super.setup(); + + executeStatementOnDriverSilently("drop materialized view if exists " + MV1, driver); + executeStatementOnDriverSilently("drop table if exists" + TABLE1 , driver); + + executeStatementOnDriver("create table " + TABLE1 + "(a int, b string, c float) stored as orc TBLPROPERTIES ('transactional'='true')", driver); + executeStatementOnDriver("insert into " + TABLE1 + "(a,b, c) values (1, 'one', 1.1), (2, 'two', 2.2), (NULL, NULL, NULL)", driver); + executeStatementOnDriver("create materialized view " + MV1 + " stored by iceberg tblproperties('format-version'='2') as " + + "select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver); + } + + @Override + public void tearDown() { + executeStatementOnDriverSilently("drop materialized view " + MV1, driver); + executeStatementOnDriverSilently("drop table " + TABLE1 , driver); + + super.tearDown(); + } + + @Test + public void testQueryIsNotRewrittenWhenMVIsDropped() throws Exception { + + // Simulate a multi HS2 cluster. + // Drop the MV using a direct API call to HMS. This is similar what happening when the drop MV is executed by + // another HS2. + // In this case the MV is not removed from HiveMaterializedViewsRegistry of HS2 which runs the explain query. + msClient.dropTable(DB, MV1); + + List<String> result = execSelectAndDumpData("explain cbo select a, b from " + TABLE1 + " where a > 0", driver, ""); + Assert.assertEquals(ORIGINAL_QUERY_PLAN, result); + } Review Comment: The test testQueryIsNotRewrittenWhenMVIsDropped simulates a multi-HS2 scenario where an MV is dropped via HMS but the registry is not updated. However, the test doesn't explicitly call HiveMaterializedViewsRegistry.get().refresh(Hive.get()) to verify that the refresh mechanism correctly removes the dropped MV from the registry. Without this explicit refresh call, the test may not be validating the core functionality added by this PR. Consider adding a refresh call before the query execution to ensure the registry cleanup logic is properly tested. ########## itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestQueryRewrite.java: ########## @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriverSilently; + +public class TestQueryRewrite extends CompactorOnTezTest { + + private static final String DB = "default"; + private static final String TABLE1 = "t1"; + private static final String MV1 = "mat1"; + + private static final List<String> ORIGINAL_QUERY_PLAN = Arrays.asList( + "CBO PLAN:", + "HiveProject(a=[$0], b=[$1])", + " HiveFilter(condition=[>($0, 0)])", + " HiveTableScan(table=[[" + DB + ", " + TABLE1 + "]], table:alias=[" + TABLE1 + "])", + "" + ); + + @Override + public void setup() throws Exception { + super.setup(); + + executeStatementOnDriverSilently("drop materialized view if exists " + MV1, driver); + executeStatementOnDriverSilently("drop table if exists" + TABLE1 , driver); + + executeStatementOnDriver("create table " + TABLE1 + "(a int, b string, c float) stored as orc TBLPROPERTIES ('transactional'='true')", driver); + executeStatementOnDriver("insert into " + TABLE1 + "(a,b, c) values (1, 'one', 1.1), (2, 'two', 2.2), (NULL, NULL, NULL)", driver); + executeStatementOnDriver("create materialized view " + MV1 + " stored by iceberg tblproperties('format-version'='2') as " + + "select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver); + } + + @Override + public void tearDown() { + executeStatementOnDriverSilently("drop materialized view " + MV1, driver); + executeStatementOnDriverSilently("drop table " + TABLE1 , driver); + + super.tearDown(); + } + + @Test + public void testQueryIsNotRewrittenWhenMVIsDropped() throws Exception { + + // Simulate a multi HS2 cluster. + // Drop the MV using a direct API call to HMS. This is similar what happening when the drop MV is executed by + // another HS2. + // In this case the MV is not removed from HiveMaterializedViewsRegistry of HS2 which runs the explain query. + msClient.dropTable(DB, MV1); + + List<String> result = execSelectAndDumpData("explain cbo select a, b from " + TABLE1 + " where a > 0", driver, ""); + Assert.assertEquals(ORIGINAL_QUERY_PLAN, result); + } + + @Test + public void testQueryIsNotRewrittenWhenMVIsDisabledForRewrite() throws Exception { + Table mvTable = Hive.get().getTable(DB, MV1); + mvTable.setRewriteEnabled(false); + + EnvironmentContext environmentContext = new EnvironmentContext(); + environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); + Hive.get().alterTable(mvTable, false, environmentContext, true); + + List<String> result = execSelectAndDumpData("explain cbo select a, b from " + TABLE1 + " where a > 0", driver, ""); + Assert.assertEquals(ORIGINAL_QUERY_PLAN, result); + } Review Comment: The test testQueryIsNotRewrittenWhenMVIsDisabledForRewrite disables an MV for rewrite but doesn't explicitly call HiveMaterializedViewsRegistry.get().refresh(Hive.get()) to trigger the registry update. The test may be relying on implicit behavior or scheduled refresh. Consider adding an explicit refresh call to ensure the test validates that the registry correctly removes MVs disabled for rewrite, which is a key feature of this PR. ########## itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestQueryRewrite.java: ########## @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriverSilently; + +public class TestQueryRewrite extends CompactorOnTezTest { + + private static final String DB = "default"; + private static final String TABLE1 = "t1"; + private static final String MV1 = "mat1"; + + private static final List<String> ORIGINAL_QUERY_PLAN = Arrays.asList( + "CBO PLAN:", + "HiveProject(a=[$0], b=[$1])", + " HiveFilter(condition=[>($0, 0)])", + " HiveTableScan(table=[[" + DB + ", " + TABLE1 + "]], table:alias=[" + TABLE1 + "])", + "" + ); + + @Override + public void setup() throws Exception { + super.setup(); + + executeStatementOnDriverSilently("drop materialized view if exists " + MV1, driver); + executeStatementOnDriverSilently("drop table if exists" + TABLE1 , driver); Review Comment: Missing space in SQL statement. The string should be "drop table if exists " + TABLE1 with a space after "exists" to ensure proper SQL syntax. Currently it would generate "drop table if existst1" instead of "drop table if exists t1". ```suggestion executeStatementOnDriverSilently("drop table if exists " + TABLE1, driver); ``` ########## itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestHiveMaterializedViewRegistry.java: ########## @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.common.MaterializationSnapshot; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; +import org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization; +import org.apache.hadoop.hive.ql.metadata.MaterializedViewMetadata; +import org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import static java.util.Arrays.asList; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriverSilently; + +public class TestHiveMaterializedViewRegistry extends CompactorOnTezTest { + + private static final String DB = "default"; + private static final String TABLE1 = "t1"; + private static final String MV1 = "mat1"; + + @Override + public void setup() throws Exception { + super.setup(); + + executeStatementOnDriverSilently("drop materialized view if exists " + MV1, driver); + executeStatementOnDriverSilently("drop table if exists" + TABLE1 , driver); Review Comment: Missing space in SQL statement. The string should be "drop table if exists " + TABLE1 with a space after "exists" to ensure proper SQL syntax. Currently it would generate "drop table if existst1" instead of "drop table if exists t1". ```suggestion executeStatementOnDriverSilently("drop table if exists " + TABLE1 , driver); ``` ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java: ########## @@ -168,44 +171,60 @@ private Loader(Hive db) { @Override public void run() { - PerfLogger perfLogger = SessionState.getPerfLogger(); - try { - DriverUtils.setUpAndStartSessionState(db.getConf()); - perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.MATERIALIZED_VIEWS_REGISTRY_REFRESH); - if (initialized.get()) { - for (Table mvTable : db.getAllMaterializedViewObjectsForRewriting()) { - RelOptMaterialization existingMV = getRewritingMaterializedView( - mvTable.getDbName(), mvTable.getTableName(), ALL); - if (existingMV != null) { - // We replace if the existing MV is not newer - Table existingMVTable = HiveMaterializedViewUtils.extractTable(existingMV); - if (existingMVTable.getCreateTime() < mvTable.getCreateTime() || - (existingMVTable.getCreateTime() == mvTable.getCreateTime() && - existingMVTable.getMVMetadata().getMaterializationTime() <= mvTable.getMVMetadata().getMaterializationTime())) { - refreshMaterializedView(db.getConf(), existingMVTable, mvTable); - } - } else { - // Simply replace if it still does not exist - refreshMaterializedView(db.getConf(), null, mvTable); + DriverUtils.setUpAndStartSessionState(db.getConf()); + refresh(db); + } + } + + public void refresh(Hive db) { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.MATERIALIZED_VIEWS_REGISTRY_REFRESH); + try { + if (initialized.get()) { + Set<TableName> remaining = getRewritingMaterializedViews().stream() + .map(materialization -> new TableName( + "hive", materialization.qualifiedTableName.get(0), materialization.qualifiedTableName.get(1))) + .collect(Collectors.toSet()); + + for (Table mvTable : db.getAllMaterializedViewObjectsForRewriting()) { + RelOptMaterialization existingMV = getRewritingMaterializedView( + mvTable.getDbName(), mvTable.getTableName(), ALL); + if (existingMV != null) { + // We replace if the existing MV is not newer + Table existingMVTable = HiveMaterializedViewUtils.extractTable(existingMV); + remaining.remove(new TableName( + existingMVTable.getCatName(), existingMVTable.getDbName(), existingMVTable.getTableName())); Review Comment: There's an inconsistency in how the 'remaining' set is populated versus how items are removed from it. On line 186, new MVs are added with a hardcoded catalog name "hive", but on line 195-196, existing MVs are removed using the actual catalog name from the table (existingMVTable.getCatName()). If the catalog name is not "hive", the removal won't find a match, and these MVs will be incorrectly dropped from the registry. Consider using the actual catalog name consistently or verifying that all MVs in the registry have the same catalog name. ```suggestion "hive", existingMVTable.getDbName(), existingMVTable.getTableName())); ``` ########## itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestQueryRewrite.java: ########## @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriverSilently; + +public class TestQueryRewrite extends CompactorOnTezTest { + + private static final String DB = "default"; + private static final String TABLE1 = "t1"; + private static final String MV1 = "mat1"; + + private static final List<String> ORIGINAL_QUERY_PLAN = Arrays.asList( + "CBO PLAN:", + "HiveProject(a=[$0], b=[$1])", + " HiveFilter(condition=[>($0, 0)])", + " HiveTableScan(table=[[" + DB + ", " + TABLE1 + "]], table:alias=[" + TABLE1 + "])", + "" + ); + + @Override + public void setup() throws Exception { + super.setup(); + + executeStatementOnDriverSilently("drop materialized view if exists " + MV1, driver); + executeStatementOnDriverSilently("drop table if exists" + TABLE1 , driver); + + executeStatementOnDriver("create table " + TABLE1 + "(a int, b string, c float) stored as orc TBLPROPERTIES ('transactional'='true')", driver); + executeStatementOnDriver("insert into " + TABLE1 + "(a,b, c) values (1, 'one', 1.1), (2, 'two', 2.2), (NULL, NULL, NULL)", driver); + executeStatementOnDriver("create materialized view " + MV1 + " stored by iceberg tblproperties('format-version'='2') as " + + "select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver); + } + + @Override + public void tearDown() { + executeStatementOnDriverSilently("drop materialized view " + MV1, driver); + executeStatementOnDriverSilently("drop table " + TABLE1 , driver); + + super.tearDown(); + } + + @Test + public void testQueryIsNotRewrittenWhenMVIsDropped() throws Exception { + + // Simulate a multi HS2 cluster. + // Drop the MV using a direct API call to HMS. This is similar what happening when the drop MV is executed by Review Comment: Grammar error in comment. "This is similar what happening" should be "This is similar to what happens" for proper English grammar. ```suggestion // Drop the MV using a direct API call to HMS. This is similar to what happens when the drop MV is executed by ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
