This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 819788f8651 [MINOR] Remove repetitive words in docs (#10844)
819788f8651 is described below

commit 819788f865160ab6445b8ad8c707631a82577c13
Author: studystill <[email protected]>
AuthorDate: Mon Mar 11 08:34:32 2024 +0800

    [MINOR] Remove repetitive words in docs (#10844)
    
    Signed-off-by: studystill <[email protected]>
---
 .../src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala  | 2 +-
 .../src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java | 2 +-
 .../src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java      | 2 +-
 .../src/main/scala/org/apache/hudi/HoodieFileIndex.scala                | 2 +-
 rfc/rfc-76/rfc-76.md                                                    | 2 +-
 scripts/pr_compliance.py                                                | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git 
a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
 
b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
index b9110f1ed93..40e62ddd0ef 100644
--- 
a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
+++ 
b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
@@ -144,7 +144,7 @@ trait HoodieCatalystPlansUtils {
   def createMITJoin(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, 
condition: Option[Expression], hint: String): LogicalPlan
 
   /**
-   * true if both plans produce the same attributes in the the same order
+   * true if both plans produce the same attributes in the same order
    */
   def produceSameOutput(a: LogicalPlan, b: LogicalPlan): Boolean
 }
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
index ac93de2d58f..7ef766a2a3c 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
@@ -199,7 +199,7 @@ public class InternalBloomFilter extends InternalFilter {
   }
 
   /**
-   * @return size of the the bloomfilter
+   * @return size of the bloomfilter
    */
   public int getVectorSize() {
     return this.vectorSize;
diff --git 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
index e91535a2473..357bc07160d 100644
--- 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
+++ 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
@@ -100,7 +100,7 @@ public class SortOperator extends 
TableStreamOperator<RowData>
 
     collector = new StreamRecordCollector<>(output);
 
-    // register the the metrics.
+    // register the metrics.
     getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) 
sorter::getUsedMemoryInBytes);
     getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) 
sorter::getNumSpillFiles);
     getMetricGroup().gauge("spillInBytes", (Gauge<Long>) 
sorter::getSpillInBytes);
diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
index 3444feaecff..affed871cad 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -63,7 +63,7 @@ import scala.util.{Failure, Success, Try}
  * who's directory level is 3).We can still read it as a partitioned table. We 
will mapping the
  * partition path (e.g. 2021/03/10) to the only partition column (e.g. "dt").
  *
- * 3、Else the the partition columns size is not equal to the partition 
directory level and the
+ * 3、Else the partition columns size is not equal to the partition directory 
level and the
  * size is great than "1" (e.g. partition column is "dt,hh", the partition 
path is "2021/03/10/12")
  * , we read it as a Non-Partitioned table because we cannot know how to 
mapping the partition
  * path with the partition columns in this case.
diff --git a/rfc/rfc-76/rfc-76.md b/rfc/rfc-76/rfc-76.md
index 1ddc107b5ce..e9f176f1d5f 100644
--- a/rfc/rfc-76/rfc-76.md
+++ b/rfc/rfc-76/rfc-76.md
@@ -61,7 +61,7 @@ Let's consider following scenario: while persisting the 
dataset, writing one of
 To provide for aforementioned requirement of the records obtaining globally 
unique synthetic keys either of the 2 following properties have to hold true:
 Key generation has to be deterministic and reproducible (so that upon Spark 
retries we could be certain same records will be obtaining the identity value 
they did during previous pass)
 Records have to be getting globally unique identity value every time (such 
that key collisions are simply impossible)
-Note that, deterministic and reproducible identity value association is is 
only feasible for the incoming datasets represented as "determinate" RDDs. 
However, It's worth pointing out that other RDD classes (such as "unordered", 
"indeterminate") are very rare occurrences involving some inherent 
non-determinism (varying content, order, etc), and pose challenges in terms of 
their respective handling by Hudi even w/o auto-generation (for ex, for such 
RDDs Hudi can't provide for uniqueness gu [...]
+Note that, deterministic and reproducible identity value association is only 
feasible for the incoming datasets represented as "determinate" RDDs. However, 
It's worth pointing out that other RDD classes (such as "unordered", 
"indeterminate") are very rare occurrences involving some inherent 
non-determinism (varying content, order, etc), and pose challenges in terms of 
their respective handling by Hudi even w/o auto-generation (for ex, for such 
RDDs Hudi can't provide for uniqueness guara [...]
 For achieving our goal of providing globally unique keys we're planning on 
relying on the following synthetic key format comprised of 2 components
 (Reserved) Commit timestamp: Use reserved commit timestamp as prefix (to 
provide for global uniqueness of rows)
 Row id: unique identifier of the row (record) w/in the provided batch
diff --git a/scripts/pr_compliance.py b/scripts/pr_compliance.py
index b9a7aaffe57..dcd3c4c0caf 100644
--- a/scripts/pr_compliance.py
+++ b/scripts/pr_compliance.py
@@ -108,7 +108,7 @@ def test_title():
 #                                                                             #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
#                                            
     
-#Enums for the the outcome of parsing a single line
+#Enums for the outcome of parsing a single line
 class Outcomes:
     #error was found so we should stop parsing and exit with error
     ERROR = 0

Reply via email to