zhangyue19921010 commented on code in PR #13017:
URL: https://github.com/apache/hudi/pull/13017#discussion_r2011357105
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/BucketPartitionUtils.scala:
##########
@@ -21,16 +21,27 @@ package org.apache.spark.sql
import org.apache.hudi.common.model.HoodieRecord
import org.apache.hudi.common.util.Functions
import org.apache.hudi.common.util.hash.BucketIndexUtil
-import org.apache.hudi.index.bucket.BucketIdentifier
+import org.apache.hudi.index.bucket.{BucketIdentifier,
PartitionBucketIndexCalculator}
import org.apache.spark.Partitioner
import org.apache.spark.sql.catalyst.InternalRow
object BucketPartitionUtils {
- def createDataFrame(df: DataFrame, indexKeyFields: String, bucketNum: Int,
partitionNum: Int): DataFrame = {
+ def createDataFrame(df: DataFrame, indexKeyFields: String, bucketNum: Int,
partitionNum: Int,
Review Comment:
as mentioned after use `numBucketsFunction` to capsulate access of the
regular buckets and the new one
##########
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java:
##########
@@ -474,6 +474,28 @@ private FlinkOptions() {
.defaultValue(4) // default 4 buckets per partition
.withDescription("Hudi bucket number per partition. Only affected if
using Hudi bucket index.");
+ @AdvancedConfig
+ public static final ConfigOption<String> BUCKET_INDEX_PARTITION_RULE =
ConfigOptions
+ .key(HoodieIndexConfig.BUCKET_INDEX_PARTITION_RULE_TYPE.key())
+ .stringType()
+
.defaultValue(HoodieIndexConfig.BUCKET_INDEX_PARTITION_RULE_TYPE.defaultValue())
+ .withDescription("Rule parser for expressions when using partition level
bucket index, default regex.");
+
+ @AdvancedConfig
+ public static final ConfigOption<String> BUCKET_INDEX_PARTITION_EXPRESSIONS
= ConfigOptions
+ .key(HoodieIndexConfig.BUCKET_INDEX_PARTITION_EXPRESSIONS.key())
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Users can use this parameter to specify expression and
the corresponding bucket "
+ + "numbers (separated by commas).Multiple rules are separated by
semicolons like "
+ +
"hoodie.bucket.index.partition.expressions=expression1,bucket-number1;expression2,bucket-number2");
+
+ public static final ConfigOption<String> BUCKET_INDEX_PARTITION_LOAD_INSTANT
= ConfigOptions
+ .key(HoodieIndexConfig.BUCKET_INDEX_PARTITION_LOAD_INSTANT.key())
Review Comment:
done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]