codope commented on code in PR #12653:
URL: https://github.com/apache/hudi/pull/12653#discussion_r1926587331
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java:
##########
@@ -460,4 +473,66 @@ public static <R> HoodieRecord<R>
createNewTaggedHoodieRecord(HoodieRecord<R> ol
throw new HoodieIndexException("Unsupported record type: " +
recordType);
}
}
+
+ /**
+ * Register a metadata index.
+ * Index definitions are stored in user-specified path or, by default, in
.hoodie/.index_defs/index.json.
+ * For the first time, the index definition file will be created if not
exists.
+ * For the second time, the index definition file will be updated if exists.
+ * Table Config is updated if necessary.
+ */
+ public static void register(HoodieTableMetaClient metaClient,
HoodieIndexDefinition indexDefinition) {
+ LOG.info("Registering index {} of using {}",
indexDefinition.getIndexName(), indexDefinition.getIndexType());
+ // build HoodieIndexMetadata and then add to index definition file
+ boolean indexDefnUpdated =
metaClient.buildIndexDefinition(indexDefinition);
+ if (indexDefnUpdated) {
+ String indexMetaPath = metaClient.getIndexDefinitionPath();
+ // update table config if necessary
+ if
(!metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.RELATIVE_INDEX_DEFINITION_PATH.key())
+ ||
!metaClient.getTableConfig().getRelativeIndexDefinitionPath().isPresent()) {
+
metaClient.getTableConfig().setValue(HoodieTableConfig.RELATIVE_INDEX_DEFINITION_PATH,
FSUtils.getRelativePartitionPath(metaClient.getBasePath(), new
StoragePath(indexMetaPath)));
+ HoodieTableConfig.update(metaClient.getStorage(),
metaClient.getMetaPath(), metaClient.getTableConfig().getProps());
+ }
+ }
+ }
+
+ public static HoodieIndexDefinition
getSecondaryOrExpressionIndexDefinition(HoodieTableMetaClient metaClient,
String userIndexName, String indexType, Map<String, Map<String, String>>
columns,
+
Map<String, String> options, Map<String, String> tableProperties) throws
Exception {
+ String fullIndexName = indexType.equals(PARTITION_NAME_SECONDARY_INDEX)
+ ? PARTITION_NAME_SECONDARY_INDEX_PREFIX + userIndexName
+ : PARTITION_NAME_EXPRESSION_INDEX_PREFIX + userIndexName;
+ if (indexExists(metaClient, fullIndexName)) {
+ throw new HoodieMetadataIndexException("Index already exists: " +
userIndexName);
+ }
+ checkArgument(columns.size() == 1, "Only one column can be indexed for
functional or secondary index.");
+
+ if (!isEligibleForIndexing(metaClient, indexType, tableProperties,
columns)) {
+ throw new HoodieMetadataIndexException("Not eligible for indexing: " +
indexType + ", indexName: " + userIndexName);
+ }
+
+ return HoodieIndexDefinition.newBuilder()
+ .withIndexName(fullIndexName)
+ .withIndexType(indexType)
+ .withIndexFunction(options.getOrDefault(EXPRESSION_OPTION,
IDENTITY_TRANSFORM))
+ .withSourceFields(new ArrayList<>(columns.keySet()))
+ .withIndexOptions(options)
+ .build();
+ }
+
+ public static boolean indexExists(HoodieTableMetaClient metaClient, String
indexName) {
+ return
metaClient.getTableConfig().getMetadataPartitions().stream().anyMatch(partition
-> partition.equals(indexName));
+ }
+
+ private static boolean isEligibleForIndexing(HoodieTableMetaClient
metaClient, String indexType, Map<String, String> options, Map<String,
Map<String, String>> columns) throws Exception {
+ if (!validateDataTypeForSecondaryIndex(new ArrayList<>(columns.keySet()),
new TableSchemaResolver(metaClient).getTableAvroSchema())) {
+ return false;
+ }
+ // for secondary index, record index is a must
+ if (indexType.equals(PARTITION_NAME_SECONDARY_INDEX)) {
+ // either record index is enabled or record index partition is already
present
+ return
metaClient.getTableConfig().getMetadataPartitions().stream().anyMatch(partition
-> partition.equals(MetadataPartitionType.RECORD_INDEX.getPartitionPath()))
Review Comment:
applies to EI as well. I will rename methods accordingly
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]