This is an automated email from the ASF dual-hosted git repository.
codope pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new d0c78bf6752 Reduce the data amount collected on spark driver (#9995)
d0c78bf6752 is described below
commit d0c78bf6752e6e18d7b19602618b26918161155c
Author: Lin Liu <[email protected]>
AuthorDate: Mon Nov 6 22:28:03 2023 -0800
Reduce the data amount collected on spark driver (#9995)
When building profile, the spark driver should only care data
distribution on (partition, instant_time, file_id), instead of
(partition, instant_time, file_id, record_position).
---
.../main/java/org/apache/hudi/common/model/HoodieRecordLocation.java | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordLocation.java
b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordLocation.java
index 15f72918096..16417db63da 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordLocation.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordLocation.java
@@ -61,13 +61,12 @@ public class HoodieRecordLocation implements Serializable,
KryoSerializable {
return false;
}
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
- return Objects.equals(instantTime, otherLoc.instantTime) &&
Objects.equals(fileId, otherLoc.fileId)
- && Objects.equals(position, otherLoc.position);
+ return Objects.equals(instantTime, otherLoc.instantTime) &&
Objects.equals(fileId, otherLoc.fileId);
}
@Override
public int hashCode() {
- return Objects.hash(instantTime, fileId, position);
+ return Objects.hash(instantTime, fileId);
}
@Override