This is an automated email from the ASF dual-hosted git repository.
laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 4950803d feat(cli): add partition-stats command (#1065)
4950803d is described below
commit 4950803db409505bf310fd144d78f22c5b716057
Author: Tanmay Rauth <[email protected]>
AuthorDate: Tue May 12 13:41:11 2026 -0700
feat(cli): add partition-stats command (#1065)
Add `iceberg partition-stats TABLE_ID` to list partition statistics
files with snapshot ID, path, and size. Supports --snapshot-id filter
and --all flag.
Related: #957
Depends On: #1073
---
cmd/iceberg/partition_stats.go | 82 +++++++++++++++--
cmd/iceberg/partition_stats_test.go | 177 ++++++++++++++++++++++++++++++++++++
2 files changed, 253 insertions(+), 6 deletions(-)
diff --git a/cmd/iceberg/partition_stats.go b/cmd/iceberg/partition_stats.go
index f4a30ae7..738a9d9d 100644
--- a/cmd/iceberg/partition_stats.go
+++ b/cmd/iceberg/partition_stats.go
@@ -19,17 +19,87 @@ package main
import (
"context"
- "errors"
+ "encoding/json"
"os"
+ "strconv"
"github.com/apache/iceberg-go/catalog"
"github.com/apache/iceberg-go/table"
+ "github.com/pterm/pterm"
)
-func runPartitionStats(_ context.Context, output Output, _ catalog.Catalog, _
*PartitionStatsCmd) {
- output.Error(errors.New("partition-stats: not yet implemented"))
- os.Exit(1)
+func runPartitionStats(ctx context.Context, output Output, cat
catalog.Catalog, cmd *PartitionStatsCmd) {
+ tbl := loadTable(ctx, output, cat, cmd.TableID)
+ output.PartitionStats(tbl, cmd.SnapshotID, cmd.All)
}
-func (textOutput) PartitionStats(_ *table.Table, _ *int64, _ bool) {}
-func (jsonOutput) PartitionStats(_ *table.Table, _ *int64, _ bool) {}
+func buildPartitionStatsEntries(tbl *table.Table, snapshotID *int64, all bool)
[]PartitionStatsEntry {
+ var entries []PartitionStatsEntry
+
+ meta := tbl.Metadata()
+
+ if !all && snapshotID == nil {
+ snap := meta.CurrentSnapshot()
+ if snap == nil {
+ return entries
+ }
+
+ sid := snap.SnapshotID
+ snapshotID = &sid
+ }
+
+ for ps := range meta.PartitionStatistics() {
+ if snapshotID != nil && ps.SnapshotID != *snapshotID {
+ continue
+ }
+
+ entries = append(entries, PartitionStatsEntry{
+ SnapshotID: ps.SnapshotID,
+ Path: ps.StatisticsPath,
+ SizeBytes: ps.FileSizeInBytes,
+ })
+ }
+
+ return entries
+}
+
+func (t textOutput) PartitionStats(tbl *table.Table, snapshotID *int64, all
bool) {
+ entries := buildPartitionStatsEntries(tbl, snapshotID, all)
+
+ if len(entries) == 0 {
+ pterm.Println("No partition statistics files found.")
+
+ return
+ }
+
+ data := pterm.TableData{{"SNAPSHOT ID", "PATH", "SIZE"}}
+
+ for _, e := range entries {
+ data = append(data, []string{
+ strconv.FormatInt(e.SnapshotID, 10),
+ e.Path,
+ formatBytes(e.SizeBytes),
+ })
+ }
+
+ pterm.DefaultTable.
+ WithHasHeader(true).
+ WithHeaderRowSeparator("-").
+ WithData(data).Render()
+}
+
+func (j jsonOutput) PartitionStats(tbl *table.Table, snapshotID *int64, all
bool) {
+ entries := buildPartitionStatsEntries(tbl, snapshotID, all)
+
+ result := struct {
+ Table string `json:"table"`
+ PartitionStatisticsFiles []PartitionStatsEntry
`json:"partition_statistics_files"`
+ }{
+ Table: tableIDString(tbl),
+ PartitionStatisticsFiles: entries,
+ }
+
+ if err := json.NewEncoder(os.Stdout).Encode(result); err != nil {
+ j.Error(err)
+ }
+}
diff --git a/cmd/iceberg/partition_stats_test.go
b/cmd/iceberg/partition_stats_test.go
new file mode 100644
index 00000000..ec185529
--- /dev/null
+++ b/cmd/iceberg/partition_stats_test.go
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package main
+
+import (
+ "bytes"
+ "os"
+ "testing"
+
+ "github.com/apache/iceberg-go/table"
+ "github.com/pterm/pterm"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+const partitionStatsTestMetadata = `{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 2,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 1,
+ "current-schema-id": 0,
+ "schemas": [
+ {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x",
"required": true, "type": "long"}]}
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [{"spec-id": 0, "fields": []}],
+ "last-partition-id": 0,
+ "default-sort-order-id": 0,
+ "sort-orders": [{"order-id": 0, "fields": []}],
+ "properties": {},
+ "current-snapshot-id": 2000,
+ "snapshots": [
+ {"snapshot-id": 1000, "timestamp-ms": 1615100955770,
"sequence-number": 1, "summary": {"operation": "append"}, "manifest-list":
"s3://a/b/1.avro", "schema-id": 0},
+ {"snapshot-id": 2000, "parent-snapshot-id": 1000, "timestamp-ms":
1625100955770, "sequence-number": 2, "summary": {"operation": "append"},
"manifest-list": "s3://a/b/2.avro", "schema-id": 0}
+ ],
+ "snapshot-log": [],
+ "metadata-log": [],
+ "refs": {"main": {"snapshot-id": 2000, "type": "branch"}},
+ "partition-statistics": [
+ {"snapshot-id": 1000, "statistics-path": "s3://bucket/stats/1000.bin",
"file-size-in-bytes": 1024},
+ {"snapshot-id": 2000, "statistics-path": "s3://bucket/stats/2000.bin",
"file-size-in-bytes": 2048}
+ ]
+}`
+
+func TestBuildPartitionStatsEntriesAll(t *testing.T) {
+ meta, err :=
table.ParseMetadataBytes([]byte(partitionStatsTestMetadata))
+ require.NoError(t, err)
+
+ tbl := table.New([]string{"db", "events"}, meta, "", nil, nil)
+ entries := buildPartitionStatsEntries(tbl, nil, true)
+
+ assert.Len(t, entries, 2)
+}
+
+func TestBuildPartitionStatsEntriesCurrentOnly(t *testing.T) {
+ meta, err :=
table.ParseMetadataBytes([]byte(partitionStatsTestMetadata))
+ require.NoError(t, err)
+
+ tbl := table.New([]string{"db", "events"}, meta, "", nil, nil)
+ entries := buildPartitionStatsEntries(tbl, nil, false)
+
+ require.Len(t, entries, 1)
+ assert.Equal(t, int64(2000), entries[0].SnapshotID)
+ assert.Equal(t, "s3://bucket/stats/2000.bin", entries[0].Path)
+ assert.Equal(t, int64(2048), entries[0].SizeBytes)
+}
+
+func TestBuildPartitionStatsEntriesBySnapshotID(t *testing.T) {
+ meta, err :=
table.ParseMetadataBytes([]byte(partitionStatsTestMetadata))
+ require.NoError(t, err)
+
+ tbl := table.New([]string{"db", "events"}, meta, "", nil, nil)
+ sid := int64(1000)
+ entries := buildPartitionStatsEntries(tbl, &sid, false)
+
+ require.Len(t, entries, 1)
+ assert.Equal(t, int64(1000), entries[0].SnapshotID)
+}
+
+func TestTextOutputPartitionStatsEmpty(t *testing.T) {
+ var buf bytes.Buffer
+ pterm.SetDefaultOutput(&buf)
+ pterm.DisableColor()
+
+ const emptyMeta = `{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 0,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 1,
+ "current-schema-id": 0,
+ "schemas": [{"type": "struct", "schema-id": 0, "fields": [{"id": 1,
"name": "x", "required": true, "type": "long"}]}],
+ "default-spec-id": 0,
+ "partition-specs": [{"spec-id": 0, "fields": []}],
+ "last-partition-id": 0,
+ "default-sort-order-id": 0,
+ "sort-orders": [{"order-id": 0, "fields": []}],
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+ "refs": {}
+ }`
+
+ meta, err := table.ParseMetadataBytes([]byte(emptyMeta))
+ require.NoError(t, err)
+
+ tbl := table.New([]string{"db", "empty"}, meta, "", nil, nil)
+ buf.Reset()
+
+ textOutput{}.PartitionStats(tbl, nil, false)
+
+ assert.Contains(t, buf.String(), "No partition statistics files found.")
+}
+
+func TestTextOutputPartitionStats(t *testing.T) {
+ var buf bytes.Buffer
+ pterm.SetDefaultOutput(&buf)
+ pterm.DisableColor()
+
+ meta, err :=
table.ParseMetadataBytes([]byte(partitionStatsTestMetadata))
+ require.NoError(t, err)
+
+ tbl := table.New([]string{"db", "events"}, meta, "", nil, nil)
+ buf.Reset()
+
+ textOutput{}.PartitionStats(tbl, nil, true)
+
+ output := buf.String()
+ assert.Contains(t, output, "SNAPSHOT ID")
+ assert.Contains(t, output, "PATH")
+ assert.Contains(t, output, "SIZE")
+ assert.Contains(t, output, "1000")
+ assert.Contains(t, output, "2000")
+}
+
+func TestJSONOutputPartitionStats(t *testing.T) {
+ oldStdout := os.Stdout
+ r, w, _ := os.Pipe()
+ os.Stdout = w
+ defer func() { os.Stdout = oldStdout }()
+
+ meta, err :=
table.ParseMetadataBytes([]byte(partitionStatsTestMetadata))
+ require.NoError(t, err)
+
+ tbl := table.New([]string{"db", "events"}, meta, "", nil, nil)
+
+ jsonOutput{}.PartitionStats(tbl, nil, true)
+
+ w.Close()
+ var buf bytes.Buffer
+ _, _ = buf.ReadFrom(r)
+
+ output := buf.String()
+ assert.Contains(t, output, `"table":"db.events"`)
+ assert.Contains(t, output, `"partition_statistics_files":[`)
+ assert.Contains(t, output, `"snapshot_id":1000`)
+}