maCloudera commented on code in PR #6545:
URL: https://github.com/apache/hive/pull/6545#discussion_r3427771209


##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java:
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.tools.schematool;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.metastore.HiveMetaException;
+
+/**
+ * Postgres implementation of {@link IndexRebuilder}.
+ *
+ * <p>Uses catalog-sourced DDL via {@code pg_get_indexdef} / {@code 
pg_get_constraintdef}.
+ * Constraint-backed indexes use DROP/ADD CONSTRAINT; standalone indexes use 
DROP/CREATE INDEX.
+ */
+class PostgresIndexRebuilder extends AbstractIndexRebuilder {
+
+  // ic = index class, tc = table class. Restrict to btree and use 
pg_constraint to
+  // distinguish constraint-backed indexes from standalone indexes.
+  // relkind: 'i' = index, 'r' = ordinary table (excludes views, partitions, 
etc.).
+  private static final String QUERY_INDEXES = """
+    SELECT ic.relname AS indexname, tc.relname AS tablename, ix.indisunique,
+           (con.conname IS NOT NULL) AS constraint_backed,  -- true when index 
is owned by a table constraint
+           pg_get_indexdef(ic.oid)       AS index_def,      -- 
catalog-generated CREATE INDEX statement
+           con.conname                   AS constraint_name,
+           pg_get_constraintdef(con.oid) AS constraint_def  -- 
catalog-generated constraint definition fragment
+      FROM pg_index ix
+      JOIN pg_class ic ON ic.oid = ix.indexrelid AND ic.relkind = 'i'
+      JOIN pg_class tc ON tc.oid = ix.indrelid   AND tc.relkind = 'r'
+      JOIN pg_am am    ON am.oid = ic.relam      AND am.amname = 'btree'  -- 
restrict to btree indexes only
+ LEFT JOIN pg_constraint con ON con.conindid = ic.oid  -- links index to 
PK/UNIQUE constraint when present
+     WHERE ic.relnamespace = current_schema()::regnamespace  -- only objects 
in the active schema
+    """;
+
+private static final String QUERY_INDEX_COLUMNS = """
+    SELECT a.attname
+      FROM pg_index ix
+      JOIN pg_class ic    ON ic.oid = ix.indexrelid AND ic.relkind = 'i'
+      JOIN pg_attribute a ON a.attrelid = ix.indrelid  -- read columns from 
the base table of the index
+                         AND a.attnum = ANY(ix.indkey) -- keep attrs whose 
attnum appears in index key vector
+                         AND a.attnum > 0  -- system columns have negative 
attnum; exclude them
+     WHERE ic.relname = ? AND ic.relnamespace = current_schema()::regnamespace 
 -- scope name lookup to active schema
+     ORDER BY array_position(ix.indkey, a.attnum)  -- ix.indkey stores attr 
numbers in key order
+    """;
+
+  private record PgDdl(String dropDdl, String createDdl) {}
+
+  private final Map<String, PgDdl> ddlMap = new LinkedHashMap<>();
+
+  PostgresIndexRebuilder(Connection conn, boolean needsQuotedIdentifier, 
String quoteCharacter) {
+    super(conn, needsQuotedIdentifier, quoteCharacter);
+  }
+
+  @Override
+  public List<IndexInfo> loadIndexes() throws HiveMetaException {
+    ddlMap.clear();
+    try (Statement stmt = conn.createStatement();
+        ResultSet rs = stmt.executeQuery(QUERY_INDEXES)) {
+      List<IndexInfo> indexes = new ArrayList<>();
+      while (rs.next()) {
+        String indexName = rs.getString("indexname");
+        String tableName = rs.getString("tablename");
+        boolean isUnique = rs.getBoolean("indisunique");
+        boolean isConstraintBacked = rs.getBoolean("constraint_backed");
+        String indexDef = rs.getString("index_def");
+        String constraintName = rs.getString("constraint_name");
+        String constraintDef = rs.getString("constraint_def");
+        List<String> columns = loadIndexColumns(indexName);
+
+        String dropDdl;
+        String createDdl;
+        if (isConstraintBacked) {
+          dropDdl = MetastoreSchemaTool.quote(
+              "ALTER TABLE ONLY <qa>" + tableName + "<qa>"
+              + " DROP CONSTRAINT <qa>" + constraintName + "<qa>",
+              needsQuotedIdentifier, quoteCharacter);
+          createDdl = MetastoreSchemaTool.quote(
+              "ALTER TABLE ONLY <qa>" + tableName + "<qa>"
+              + " ADD CONSTRAINT <qa>" + constraintName + "<qa> " + 
constraintDef,
+              needsQuotedIdentifier, quoteCharacter);
+        } else {
+          dropDdl = MetastoreSchemaTool.quote(
+              "DROP INDEX IF EXISTS <qa>" + indexName + "<qa>",
+              needsQuotedIdentifier, quoteCharacter);
+          createDdl = indexDef;
+        }
+        ddlMap.put(indexName, new PgDdl(dropDdl, createDdl));
+        indexes.add(new IndexInfo(indexName, tableName, isUnique, 
isConstraintBacked, columns));
+      }
+      return indexes;
+    } catch (SQLException e) {
+      throw new HiveMetaException("Failed to load indexes from Postgres 
catalog", e);
+    }
+  }
+
+  @Override
+  public void rebuildIndex(IndexInfo index) throws HiveMetaException {
+    PgDdl ddl = ddlMap.get(index.indexName());
+    executeRebuild(index, ddl.dropDdl(), ddl.createDdl());

Review Comment:
   I think, drop+create ddl should be atomic.
   For constraint-backed indexes (PKs and UNIQUE constraints), the Postgres 
path runs two separate DDL statements: DROP CONSTRAINT followed by ADD 
CONSTRAINT. If the connection has autocommit enabled and the second statement 
fails, the constraint is permanently dropped with no rollback. It would be 
safer to ensure these two steps run inside an explicit BEGIN/COMMIT block, or 
autocommit=false.



##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java:
##########
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.tools.schematool;
+
+import java.util.List;
+import org.jetbrains.annotations.NotNull;
+
+/**
+ * Record for a DB index, used by {@link IndexRebuilder}.
+ */
+public record IndexInfo(
+    String indexName,
+    String tableName,
+    boolean unique,
+    boolean constraintBacked,
+    List<String> columns) {
+
+  public IndexInfo {
+    columns = List.copyOf(columns);
+  }
+
+  @Override
+  public @NotNull String toString() {

Review Comment:
   Do we need @NotNull annotation here ? I think, record will never return null.



##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java:
##########
@@ -82,6 +82,8 @@ private Options createOptions(OptionGroup additionalOptions) {
       .hasArg()
       .withDescription("Create table for Hive warehouse/compute logs")
       .create("createLogsTable");
+    Option rebuildIndexesOpt = new Option("rebuildIndexes",
+        "Detect and rebuild corrupt indexes in the metastore backend DB 
(Postgres only).");

Review Comment:
   I think we are supporting all DB's, we can remove this "Postgres only" 
otherwise it can confuse.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to