zeroshade commented on code in PR #622:
URL: https://github.com/apache/iceberg-go/pull/622#discussion_r2529114279
##########
table/partitioned_fanout_writer.go:
##########
@@ -210,22 +211,103 @@ func (p *partitionedFanoutWriter) getPartitionMap(record
arrow.RecordBatch) (map
transformedLiteral :=
sourceField.Transform.Apply(iceberg.Optional[iceberg.Literal]{Valid: true, Val:
val})
if transformedLiteral.Valid {
partitionRec[i] =
transformedLiteral.Val.Any()
- partitionValues[sourceField.FieldID] =
transformedLiteral.Val.Any()
} else {
- partitionRec[i],
partitionValues[sourceField.FieldID] = nil, nil
+ partitionRec[i] = nil
}
} else {
- partitionRec[i],
partitionValues[partitionFieldsInfo[i].fieldID] = nil, nil
+ partitionRec[i] = nil
}
}
- partitionKey := p.partitionPath(partitionRec)
- partVal := partitionMap[partitionKey]
- partVal.rows = append(partitionMap[partitionKey].rows, row)
- partVal.partitionValues = partitionValues
- partitionMap[partitionKey] = partVal
+
+ // Get or create partition info for this partition key
+ partVal := partitionMap.getOrCreate(partitionRec,
partitionFieldsInfo)
+ partVal.rows = append(partVal.rows, row)
+ }
+
+ return partitionMap.collectPartitions(p.partitionSpec, p.schema), nil
+}
+
+// partitionMapNode represents a simple tree structure for storing
partitionInfo.
+//
+// Each key is the partition value at that level of the tree, and the key
hierarchy
+// is in the order of the partition spec.
+// The value is either a *partitionMapNode or a *partitionInfo.
+type partitionMapNode struct {
+ children map[any]any
+ leafCount int
+}
+
+func newPartitionMapNode() *partitionMapNode {
+ return &partitionMapNode{
+ children: make(map[any]any),
+ leafCount: 0,
+ }
+}
+
+// getOrCreate navigates the tree and returns the partitionInfo for the given
partition key,
+// creating nodes along the way if they don't exist
+func (n *partitionMapNode) getOrCreate(partitionRec partitionRecord, fieldInfo
[]struct {
+ sourceField *iceberg.PartitionField
+ fieldID int
+},
+) *partitionInfo {
+ // Navigate through all but the last partition field
+ node := n
+ i := 0
+ for ; i < len(partitionRec)-1; i++ {
+ val, ok := node.children[partitionRec[i]]
+ if !ok {
+ newNode := newPartitionMapNode()
+ node.children[partitionRec[i]] = newNode
+ node = newNode
+ } else {
+ node = val.(*partitionMapNode)
+ }
+ }
Review Comment:
can we just use `for _, part := range partitionRec[:len(partitionRec)-1] {` ?
Then below for the last key you can just use `lastKey :=
partitionRec[len(partitionRec)-1]`
This would simplify the the loop and logic
##########
table/partitioned_fanout_writer.go:
##########
Review Comment:
Given the additional cases we added above, can we reduce the cases here at
all?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]