codope commented on code in PR #12266:
URL: https://github.com/apache/hudi/pull/12266#discussion_r1844936596
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala:
##########
@@ -80,40 +83,46 @@ class TestSecondaryIndex extends HoodieSparkSqlTestBase {
spark.sql(s"insert into $tableName values(3, 'a3', 10, 1002)")
checkAnswer(s"show indexes from default.$tableName")()
- checkAnswer(s"create index idx_name on $tableName using lucene
(name) options(block_size=1024)")()
- checkAnswer(s"create index idx_price on $tableName using lucene
(price options(order='desc')) options(block_size=512)")()
-
- // Create an index with multiple columns
- checkException(s"create index idx_id_ts on $tableName using lucene
(id, ts)")("Lucene index only support single column")
+ spark.sql(s"create index idx_name on $tableName using
secondary_index(name)")
+ checkAnswer(s"show indexes from default.$tableName")(
Review Comment:
since record index is also enabled, shouldn't we show that as well?
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/functional/BaseHoodieIndexClient.java:
##########
@@ -44,13 +43,10 @@ public BaseHoodieIndexClient() {
*/
public void register(HoodieTableMetaClient metaClient, String indexName,
String indexType, Map<String, Map<String, String>> columns, Map<String, String>
options) {
LOG.info("Registering index {} of using {}", indexName, indexType);
- String indexMetaPath = metaClient.getTableConfig().getIndexDefinitionPath()
- .orElseGet(() -> metaClient.getMetaPath()
- + StoragePath.SEPARATOR +
HoodieTableMetaClient.INDEX_DEFINITION_FOLDER_NAME
- + StoragePath.SEPARATOR +
HoodieTableMetaClient.INDEX_DEFINITION_FILE_NAME);
// build HoodieFunctionalIndexMetadata and then add to index definition
file
Review Comment:
while you're at it, maybe you can also change the comment to reflect correct
classname - HoodieIndexMetadata
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/IndexCommands.scala:
##########
@@ -90,31 +94,33 @@ case class DropIndexCommand(table: CatalogTable,
}
}
+/**
+ * Command to show available indexes in hudi. The corresponding logical plan
is available at
+ * org.apache.spark.sql.catalyst.plans.logical.ShowIndexes
+ */
case class ShowIndexesCommand(table: CatalogTable,
override val output: Seq[Attribute]) extends
IndexBaseCommand {
override def run(sparkSession: SparkSession): Seq[Row] = {
val metaClient = createHoodieTableMetaClient(table.identifier,
sparkSession)
- val secondaryIndexes = SecondaryIndexManager.getInstance().show(metaClient)
-
- val mapper = JsonUtils.getObjectMapper
- toScalaOption(secondaryIndexes).map(x =>
- x.asScala.map(i => {
- val colOptions =
- if (i.getColumns.values().asScala.forall(_.isEmpty)) "" else
mapper.writeValueAsString(i.getColumns)
- val options = if (i.getOptions.isEmpty) "" else
mapper.writeValueAsString(i.getOptions)
- Row(i.getIndexName, i.getColumns.keySet().asScala.mkString(","),
- i.getIndexType.name().toLowerCase, colOptions, options)
- }).toSeq).getOrElse(Seq.empty[Row])
+ // need to ensure that the index name is for a valid partition type
+ val indexMetadataOpt = metaClient.getIndexMetadata
Review Comment:
we should first get the indexes available from table config, show those
index (excluding files). For secondary and functional index, we can populate
the relevant details as you have done here. But, we should not miss record
index, partition stats and others if those are available (exept files).
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala:
##########
@@ -80,40 +83,46 @@ class TestSecondaryIndex extends HoodieSparkSqlTestBase {
spark.sql(s"insert into $tableName values(3, 'a3', 10, 1002)")
checkAnswer(s"show indexes from default.$tableName")()
- checkAnswer(s"create index idx_name on $tableName using lucene
(name) options(block_size=1024)")()
- checkAnswer(s"create index idx_price on $tableName using lucene
(price options(order='desc')) options(block_size=512)")()
-
- // Create an index with multiple columns
- checkException(s"create index idx_id_ts on $tableName using lucene
(id, ts)")("Lucene index only support single column")
+ spark.sql(s"create index idx_name on $tableName using
secondary_index(name)")
+ checkAnswer(s"show indexes from default.$tableName")(
+ Seq("secondary_index_idx_name", "secondary_index", "name")
+ )
+ spark.sql(s"create index idx_price on $tableName using
secondary_index(price)")
// Create an index with the occupied name
- checkException(s"create index idx_price on $tableName using lucene
(price)")(
Review Comment:
Do we have a validation on keywords following `USING` in create index
command? If someone indeed tries to create index .. using lucene(price), then
we should throw an error.
##########
hudi-common/src/main/java/org/apache/hudi/common/model/HoodieIndexDefinition.java:
##########
@@ -81,6 +81,10 @@ public String getIndexType() {
return indexType;
}
+ public String getIndexNameWithoutPrefix() {
+ return indexName.substring(indexType.length() + 1);
Review Comment:
usually the prefix is MetadataPartitionType.getPartitionPath. Should we move
the method to that enum?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]