This is an automated email from the ASF dual-hosted git repository.
codope pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new fda52a0cfd3 [HUDI-7706] Improve validation of partition stats index
include key (#11158)
fda52a0cfd3 is described below
commit fda52a0cfd3b8cad251bef7f890b782993e32ea8
Author: Sagar Sumit <[email protected]>
AuthorDate: Mon May 6 16:11:29 2024 +0530
[HUDI-7706] Improve validation of partition stats index include key (#11158)
---
.../hudi/metadata/HoodieMetadataPayload.java | 2 +-
.../hudi/dml/TestHoodieTableValuedFunction.scala | 22 +++++++---------------
2 files changed, 8 insertions(+), 16 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index 1d1c7778057..a0be17fb49a 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -674,7 +674,7 @@ public class HoodieMetadataPayload implements
HoodieRecordPayload<HoodieMetadata
});
}
- private static String getPartitionStatsIndexKey(String partitionPath, String
columnName) {
+ public static String getPartitionStatsIndexKey(String partitionPath, String
columnName) {
final PartitionIndexID partitionIndexID = new
PartitionIndexID(HoodieTableMetadataUtil.getColumnStatsIndexPartitionIdentifier(partitionPath));
final ColumnIndexID columnIndexID = new ColumnIndexID(columnName);
return
columnIndexID.asBase64EncodedString().concat(partitionIndexID.asBase64EncodedString());
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala
index 21527d297ff..17b5b558b0a 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala
@@ -19,9 +19,7 @@ package org.apache.spark.sql.hudi.dml
import org.apache.hudi.DataSourceWriteOptions.SPARK_SQL_INSERT_INTO_OPERATION
import org.apache.hudi.HoodieSparkUtils
-import org.apache.hudi.common.util.hash.PartitionIndexID
-import org.apache.hudi.metadata.HoodieTableMetadataUtil
-
+import org.apache.hudi.metadata.HoodieMetadataPayload.getPartitionStatsIndexKey
import org.apache.spark.sql.functions.{col, from_json}
import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
@@ -679,25 +677,19 @@ class TestHoodieTableValuedFunction extends
HoodieSparkSqlTestBase {
s"select * from hudi_metadata('$identifier') where type=3"
)
assert(result4DF.count() == 3)
- // TODO(HUDI-7705): use partition column name to generate the prefix
after the bug fix
- val columnNameKeyPrefix = "XV1ds8/f890="
checkAnswer(s"select key, ColumnStatsMetadata.minValue.member1.value
from hudi_metadata('$identifier') where type=3")(
- Seq(columnNameKeyPrefix + getPartitionIndexKey("ts=10"), 1000),
- Seq(columnNameKeyPrefix + getPartitionIndexKey("ts=20"), 2000),
- Seq(columnNameKeyPrefix + getPartitionIndexKey("ts=30"), 3000)
+ Seq(getPartitionStatsIndexKey("ts=10", "price"), 1000),
+ Seq(getPartitionStatsIndexKey("ts=20", "price"), 2000),
+ Seq(getPartitionStatsIndexKey("ts=30", "price"), 3000)
)
checkAnswer(s"select key, ColumnStatsMetadata.maxValue.member1.value
from hudi_metadata('$identifier') where type=3")(
- Seq(columnNameKeyPrefix + getPartitionIndexKey("ts=10"), 2000),
- Seq(columnNameKeyPrefix + getPartitionIndexKey("ts=20"), 3000),
- Seq(columnNameKeyPrefix + getPartitionIndexKey("ts=30"), 4000)
+ Seq(getPartitionStatsIndexKey("ts=10", "price"), 2000),
+ Seq(getPartitionStatsIndexKey("ts=20", "price"), 3000),
+ Seq(getPartitionStatsIndexKey("ts=30", "price"), 4000)
)
}
}
}
spark.sessionState.conf.unsetConf(SPARK_SQL_INSERT_INTO_OPERATION.key)
}
-
- def getPartitionIndexKey(partitionPath: String): String = {
- new
PartitionIndexID(HoodieTableMetadataUtil.getColumnStatsIndexPartitionIdentifier(partitionPath)).asBase64EncodedString
- }
}