wecharyu commented on code in PR #5539: URL: https://github.com/apache/hive/pull/5539#discussion_r1890470526
########## ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/ShowTableStatusFormatter.java: ########## @@ -22,9 +22,15 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; Review Comment: nit: remove unused imports. ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: ########## @@ -4466,6 +4526,81 @@ public List<Partition> getPartitionsByFilter(Table tbl, String filter) return convertFromMetastore(tbl, tParts); } + public List<Partition> getPartitionsWithSpecs(Table tbl, GetPartitionsRequest request) + throws HiveException, TException { + + if (!tbl.isPartitioned()) { + throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); + } + int batchSize= MetastoreConf.getIntVar(Hive.get().getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX); + if(batchSize > 0){ + return new ArrayList<>(getAllPartitionsWithSpecsInBatches(tbl, batchSize, DEFAULT_BATCH_DECAYING_FACTOR, MetastoreConf.getIntVar( + Hive.get().getConf(), MetastoreConf.ConfVars.GETPARTITIONS_BATCH_MAX_RETRIES), request)); + }else{ + return getPartitionsWithSpecsInternal(tbl, request); + } + } + + public List<Partition> getPartitionsWithSpecsInternal(Table tbl, GetPartitionsRequest request) + throws HiveException, TException { + + if (!tbl.isPartitioned()) { + throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); + } + GetPartitionsResponse response = getMSC().getPartitionsWithSpecs(request); + List<org.apache.hadoop.hive.metastore.api.PartitionSpec> partitionSpecs = response.getPartitionSpec(); + List<Partition> partitions = new ArrayList<>(); + partitions.addAll(convertFromPartSpec(partitionSpecs.iterator(), tbl)); + + return partitions; + } + + List<Partition> getPartitionsWithSpecsByNames(Table tbl, List<String> partNames, GetPartitionsRequest request) + throws HiveException, TException { + + if (!tbl.isPartitioned()) { + throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); + } + List<Partition> partitions = new ArrayList<Partition>(partNames.size()); + + int batchSize = HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX); + int nParts = partNames.size(); + int nBatches = nParts / batchSize; + // I do not want to modify the original request when implementing batching, hence we will know what actual request was being made + GetPartitionsRequest req = request; + if (!req.isSetFilterSpec()) { + req.setFilterSpec(new GetPartitionsFilterSpec()); + } + + try { + for (int i = 0; i < nBatches; ++i) { + req.getFilterSpec().setFilters(partNames.subList(i*batchSize, (i+1)*batchSize)); + req.getFilterSpec().setFilterMode(PartitionFilterMode.BY_NAMES); + List<Partition> tParts = getPartitionsWithSpecsInternal(tbl, request); + + if (tParts != null) { + for (Partition tpart: tParts) { + partitions.add(new Partition(tbl, tpart.getTPartition())); Review Comment: can we use `partitions.addAll(tParts)` directly here? ########## ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java: ########## @@ -97,15 +100,22 @@ void dataLocationDump(Table table, FileList fileList, HashMap<String, Boolean> s } if (table.isPartitioned()) { List<Partition> partitions; + GetPartitionsRequest request = new GetPartitionsRequest(table.getDbName(), table.getTableName(), + null, null); + request.setCatName(table.getCatName()); + request.setProjectionSpec( + new org.apache.hadoop.hive.metastore.client.builder.GetPartitionProjectionsSpecBuilder() + .addProjectField("catName").addProjectField("tableName") Review Comment: use `addProjectFieldList()` here. ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: ########## @@ -4086,6 +4091,37 @@ public List<String> getPartitionNames(String dbName, String tblName, return names; } + // get partition names from provided partition values + public List<String> getPartitionNames(String dbName, String tblName, + short max, List<String> pVals) throws HiveException { + List<String> names = null; + Table t = getTable(dbName, tblName); + + try { + GetPartitionNamesPsRequest req = new GetPartitionNamesPsRequest(); + req.setTblName(tblName); + req.setDbName(dbName); + req.setPartValues(pVals); + req.setMaxParts(max); + if (AcidUtils.isTransactionalTable(t)) { + ValidWriteIdList validWriteIdList = getValidWriteIdList(dbName, tblName); + req.setValidWriteIdList(validWriteIdList != null ? validWriteIdList.toString() : null); + req.setId(t.getTTable().getId()); + } + GetPartitionNamesPsResponse res = getMSC().listPartitionNamesRequest(req); + names = res.getNames(); + } catch (NoSuchObjectException nsoe) { + // this means no partition exists for the given partition spec + // key value pairs - thrift cannot handle null return values, hence + // listPartitionNames() throws NoSuchObjectException to indicate null partitions + return Lists.newArrayList(); Review Comment: The `NoSuchObjectException` is thrown when the table does not exist, it does not mean the partition does not exist. ########## ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java: ########## @@ -77,9 +85,24 @@ protected void analyzeCommand(TableName tableName, Map<String, String> partition if (AcidUtils.isTransactionalTable(sourceTable) || AcidUtils.isTransactionalTable(destTable)) { throw new SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg()); } + List<String> sourceProjectFilters = MetaStoreUtils.getPvals(sourceTable.getPartCols(), partitionSpecs); // check if source partition exists - PartitionUtils.getPartitions(db, sourceTable, partitionSpecs, true); + GetPartitionsFilterSpec getSourcePartitionsFilterSpec = new GetPartitionsFilterSpec(); + getSourcePartitionsFilterSpec.setFilters(sourceProjectFilters); + getSourcePartitionsFilterSpec.setFilterMode(PartitionFilterMode.BY_VALUES); + + GetProjectionsSpec getProjectionsSpec = new GetPartitionProjectionsSpecBuilder() + .addProjectFieldList(Arrays.asList("values","lastAccessTime")).build(); + + GetPartitionsRequest request = new GetPartitionsRequest(sourceTable.getDbName(), sourceTable.getTableName(), + getProjectionsSpec, getSourcePartitionsFilterSpec); + request.setCatName(sourceTable.getCatName()); + try { + PartitionUtils.getPartitionsWithSpecs(db, sourceTable, request, true); + } catch (SemanticException ex) { Review Comment: The `try ... catch()` clause is unnecessary here. ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/client/builder/GetPartitionProjectionsSpecBuilder.java: ########## @@ -41,6 +42,12 @@ public GetPartitionProjectionsSpecBuilder addProjectField(String field) { return this; } + public GetPartitionProjectionsSpecBuilder addProjectFieldList(List<String> fields) { + fieldList.addAll(Arrays.asList("catName","dbName","tableName")); Review Comment: why need these fields here? ########## ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java: ########## @@ -77,9 +85,24 @@ protected void analyzeCommand(TableName tableName, Map<String, String> partition if (AcidUtils.isTransactionalTable(sourceTable) || AcidUtils.isTransactionalTable(destTable)) { throw new SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg()); } + List<String> sourceProjectFilters = MetaStoreUtils.getPvals(sourceTable.getPartCols(), partitionSpecs); // check if source partition exists - PartitionUtils.getPartitions(db, sourceTable, partitionSpecs, true); + GetPartitionsFilterSpec getSourcePartitionsFilterSpec = new GetPartitionsFilterSpec(); + getSourcePartitionsFilterSpec.setFilters(sourceProjectFilters); + getSourcePartitionsFilterSpec.setFilterMode(PartitionFilterMode.BY_VALUES); + + GetProjectionsSpec getProjectionsSpec = new GetPartitionProjectionsSpecBuilder() + .addProjectFieldList(Arrays.asList("values","lastAccessTime")).build(); Review Comment: the project field list seems unnecessary since we are just checking if partition exists? ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: ########## @@ -4466,6 +4526,81 @@ public List<Partition> getPartitionsByFilter(Table tbl, String filter) return convertFromMetastore(tbl, tParts); } + public List<Partition> getPartitionsWithSpecs(Table tbl, GetPartitionsRequest request) + throws HiveException, TException { + + if (!tbl.isPartitioned()) { + throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); + } + int batchSize= MetastoreConf.getIntVar(Hive.get().getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX); + if(batchSize > 0){ + return new ArrayList<>(getAllPartitionsWithSpecsInBatches(tbl, batchSize, DEFAULT_BATCH_DECAYING_FACTOR, MetastoreConf.getIntVar( + Hive.get().getConf(), MetastoreConf.ConfVars.GETPARTITIONS_BATCH_MAX_RETRIES), request)); + }else{ + return getPartitionsWithSpecsInternal(tbl, request); + } + } + + public List<Partition> getPartitionsWithSpecsInternal(Table tbl, GetPartitionsRequest request) + throws HiveException, TException { + + if (!tbl.isPartitioned()) { + throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); + } + GetPartitionsResponse response = getMSC().getPartitionsWithSpecs(request); + List<org.apache.hadoop.hive.metastore.api.PartitionSpec> partitionSpecs = response.getPartitionSpec(); + List<Partition> partitions = new ArrayList<>(); + partitions.addAll(convertFromPartSpec(partitionSpecs.iterator(), tbl)); + + return partitions; + } + + List<Partition> getPartitionsWithSpecsByNames(Table tbl, List<String> partNames, GetPartitionsRequest request) + throws HiveException, TException { + + if (!tbl.isPartitioned()) { + throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); + } + List<Partition> partitions = new ArrayList<Partition>(partNames.size()); + + int batchSize = HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX); + int nParts = partNames.size(); + int nBatches = nParts / batchSize; + // I do not want to modify the original request when implementing batching, hence we will know what actual request was being made + GetPartitionsRequest req = request; + if (!req.isSetFilterSpec()) { + req.setFilterSpec(new GetPartitionsFilterSpec()); + } + + try { + for (int i = 0; i < nBatches; ++i) { Review Comment: use `org.apache.hadoop.hive.metastore.Batchable` to run batches. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org