This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 8be5e69 feat(arrow/_examples): enhance library examples (#394)
8be5e69 is described below
commit 8be5e697a21578d61d4b4ea75516099d4a34a451
Author: Kristofer Gaudel <[email protected]>
AuthorDate: Thu Jun 12 12:47:04 2025 -0400
feat(arrow/_examples): enhance library examples (#394)
### Rationale for this change
Enhancing the library examples
### What changes are included in this PR?
3 new examples on how to use the `arrow-go` library:
- Reading from a CSV file
- Type conversions
- Managing performance
### Are these changes tested?
Not applicable
### Are there any user-facing changes?
No, just enhancing examples
---
arrow/array/example_type_conversion_test.go | 147 +++++++++++++++++++++
arrow/csv/example_csv_test.go | 112 ++++++++++++++++
.../main.go => example_table_creation_test.go} | 46 +++++--
.../helloworld => examples/table_creation}/main.go | 30 +++--
arrow/memory/example_memory_perf_test.go | 71 ++++++++++
ci/scripts/tinygo_example.sh | 2 +-
6 files changed, 389 insertions(+), 19 deletions(-)
diff --git a/arrow/array/example_type_conversion_test.go
b/arrow/array/example_type_conversion_test.go
new file mode 100644
index 0000000..94516dc
--- /dev/null
+++ b/arrow/array/example_type_conversion_test.go
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+ "fmt"
+
+ "github.com/apache/arrow-go/v18/arrow"
+ "github.com/apache/arrow-go/v18/arrow/array"
+ "github.com/apache/arrow-go/v18/arrow/memory"
+)
+
+func Example_typeConversion() {
+ pool := memory.NewGoAllocator()
+
+ // 1. Basic type conversion (Int32 to Int64)
+ fmt.Println("Example 1: Converting Int32 to Int64")
+ int32Builder := array.NewInt32Builder(pool)
+ defer int32Builder.Release()
+
+ int32Builder.AppendValues([]int32{1, 2, 3, 4, 5}, nil)
+ int32Array := int32Builder.NewInt32Array()
+ defer int32Array.Release()
+
+ // Convert to Int64
+ int64Builder := array.NewInt64Builder(pool)
+ defer int64Builder.Release()
+
+ for i := 0; i < int32Array.Len(); i++ {
+ int64Builder.Append(int64(int32Array.Value(i)))
+ }
+ int64Array := int64Builder.NewInt64Array()
+ defer int64Array.Release()
+
+ fmt.Printf("Original Int32 values: %v\n", int32Array.Int32Values())
+ fmt.Printf("Converted Int64 values: %v\n", int64Array.Int64Values())
+
+ // 2. Handling nullable fields
+ fmt.Println("\nExample 2: Working with nullable fields")
+ float64Builder := array.NewFloat64Builder(pool)
+ defer float64Builder.Release()
+
+ values := []float64{1.1, 2.2, 3.3, 4.4, 5.5}
+ valid := []bool{true, true, false, true, false}
+ float64Builder.AppendValues(values, valid)
+ float64Array := float64Builder.NewFloat64Array()
+ defer float64Array.Release()
+
+ stringBuilder := array.NewStringBuilder(pool)
+ defer stringBuilder.Release()
+
+ for i := 0; i < float64Array.Len(); i++ {
+ if float64Array.IsNull(i) {
+ stringBuilder.AppendNull()
+ } else {
+ stringBuilder.Append(fmt.Sprintf("%.2f",
float64Array.Value(i)))
+ }
+ }
+ stringArray := stringBuilder.NewStringArray()
+ defer stringArray.Release()
+
+ fmt.Println("Original Float64 values (with nulls):")
+ for i := 0; i < float64Array.Len(); i++ {
+ if float64Array.IsNull(i) {
+ fmt.Printf(" [%d]: null\n", i)
+ } else {
+ fmt.Printf(" [%d]: %.2f\n", i, float64Array.Value(i))
+ }
+ }
+
+ fmt.Println("\nConverted String values (with nulls):")
+ for i := 0; i < stringArray.Len(); i++ {
+ if stringArray.IsNull(i) {
+ fmt.Printf(" [%d]: null\n", i)
+ } else {
+ fmt.Printf(" [%d]: %s\n", i, stringArray.Value(i))
+ }
+ }
+
+ // 3. Working with nested types (List)
+ fmt.Println("\nExample 3: Working with nested types (List)")
+ listBuilder := array.NewListBuilder(pool, arrow.PrimitiveTypes.Int32)
+ defer listBuilder.Release()
+
+ valueBuilder := listBuilder.ValueBuilder().(*array.Int32Builder)
+
+ listBuilder.Append(true)
+ valueBuilder.AppendValues([]int32{1, 2}, nil)
+
+ listBuilder.Append(true)
+ valueBuilder.AppendValues([]int32{3, 4, 5}, nil)
+
+ listBuilder.Append(true)
+ valueBuilder.AppendValues([]int32{6}, nil)
+
+ listArray := listBuilder.NewListArray()
+ defer listArray.Release()
+
+ // Convert list to string representation
+ fmt.Println("List of lists:")
+ for i := 0; i < listArray.Len(); i++ {
+ values := listArray.ListValues().(*array.Int32).Int32Values()
+ offset := listArray.Offsets()[i]
+ length := listArray.Offsets()[i+1] - offset
+ fmt.Printf(" List %d: %v\n", i, values[offset:offset+length])
+ }
+
+ // Output:
+ // Example 1: Converting Int32 to Int64
+ // Original Int32 values: [1 2 3 4 5]
+ // Converted Int64 values: [1 2 3 4 5]
+ //
+ // Example 2: Working with nullable fields
+ // Original Float64 values (with nulls):
+ // [0]: 1.10
+ // [1]: 2.20
+ // [2]: null
+ // [3]: 4.40
+ // [4]: null
+ //
+ // Converted String values (with nulls):
+ // [0]: 1.10
+ // [1]: 2.20
+ // [2]: null
+ // [3]: 4.40
+ // [4]: null
+ //
+ // Example 3: Working with nested types (List)
+ // List of lists:
+ // List 0: [1 2]
+ // List 1: [3 4 5]
+ // List 2: [6]
+}
diff --git a/arrow/csv/example_csv_test.go b/arrow/csv/example_csv_test.go
new file mode 100644
index 0000000..1fc3b18
--- /dev/null
+++ b/arrow/csv/example_csv_test.go
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package csv_test
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/apache/arrow-go/v18/arrow"
+ "github.com/apache/arrow-go/v18/arrow/array"
+ arrowcsv "github.com/apache/arrow-go/v18/arrow/csv"
+)
+
+func Example_reader() {
+ filePath := "../../arrow-testing/data/csv/aggregate_test_100.csv" //
Test csv file
+ f, err := os.Open(filePath)
+ if err != nil {
+ fmt.Printf("Failed to open file: %v\n", err)
+ return
+ }
+ defer f.Close()
+
+ // Schema defined in the csv file
+ schema := arrow.NewSchema([]arrow.Field{
+ {Name: "c1", Type: arrow.BinaryTypes.String, Nullable: true},
+ {Name: "c2", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c3", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c4", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c5", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c6", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c7", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c8", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c9", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c10", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c11", Type: arrow.PrimitiveTypes.Float64, Nullable:
true},
+ {Name: "c12", Type: arrow.PrimitiveTypes.Float64, Nullable:
true},
+ {Name: "c13", Type: arrow.BinaryTypes.String, Nullable: true},
+ }, nil)
+
+ reader := arrowcsv.NewReader(f, schema, arrowcsv.WithHeader(true),
arrowcsv.WithChunk(-1))
+ defer reader.Release()
+
+ // Read the first record
+ if !reader.Next() {
+ if err := reader.Err(); err != nil {
+ fmt.Printf("Error reading CSV: %v\n", err)
+ return
+ }
+ fmt.Println("No records found")
+ return
+ }
+
+ // Get the record but don't release it - the reader will handle that
+ record := reader.Record()
+
+ fmt.Printf("Number of rows: %d\n", record.NumRows())
+ fmt.Printf("Number of columns: %d\n", record.NumCols())
+ fmt.Println()
+
+ fmt.Println("Basic statistics for numeric columns:")
+ for i := 1; i < 10; i++ { // cols c2 through c10 are Int64
+ col := record.Column(i).(*array.Int64)
+ var sum int64
+ for j := 0; j < col.Len(); j++ {
+ sum += col.Value(j)
+ }
+ avg := float64(sum) / float64(col.Len())
+ fmt.Printf("Column c%d: Average = %.2f\n", i+1, avg)
+ }
+
+ for i := 10; i < 12; i++ { // cols c11 and c12 are Float64
+ col := record.Column(i).(*array.Float64)
+ var sum float64
+ for j := 0; j < col.Len(); j++ {
+ sum += col.Value(j)
+ }
+ avg := sum / float64(col.Len())
+ fmt.Printf("Column c%d: Average = %.4f\n", i+1, avg)
+ }
+
+ // Output:
+ // Number of rows: 100
+ // Number of columns: 13
+ //
+ // Basic statistics for numeric columns:
+ // Column c2: Average = 2.85
+ // Column c3: Average = 7.81
+ // Column c4: Average = 2319.97
+ // Column c5: Average = 158626279.61
+ // Column c6: Average = 59276376114661656.00
+ // Column c7: Average = 130.60
+ // Column c8: Average = 30176.41
+ // Column c9: Average = 2220897700.60
+ // Column c10: Average = -86834033398685392.00
+ // Column c11: Average = 0.4793
+ // Column c12: Average = 0.5090
+}
diff --git a/arrow/_examples/helloworld/main.go
b/arrow/example_table_creation_test.go
similarity index 58%
copy from arrow/_examples/helloworld/main.go
copy to arrow/example_table_creation_test.go
index c45c31b..4de47eb 100644
--- a/arrow/_examples/helloworld/main.go
+++ b/arrow/example_table_creation_test.go
@@ -8,16 +8,17 @@
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
-package main
+package arrow_test
import (
- "os"
+ "fmt"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
@@ -25,28 +26,53 @@ import (
"github.com/apache/arrow-go/v18/arrow/memory"
)
-func main() {
+func Example_tableCreation() {
+ // Create a schema with three fields
schema := arrow.NewSchema([]arrow.Field{
{Name: "intField", Type: arrow.PrimitiveTypes.Int64, Nullable:
false},
{Name: "stringField", Type: arrow.BinaryTypes.String, Nullable:
false},
{Name: "floatField", Type: arrow.PrimitiveTypes.Float64,
Nullable: true},
}, nil)
+ // Create a record builder
builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
defer builder.Release()
+ // Append values to each field
builder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3, 4,
5}, nil)
builder.Field(1).(*array.StringBuilder).AppendValues([]string{"a", "b",
"c", "d", "e"}, nil)
builder.Field(2).(*array.Float64Builder).AppendValues([]float64{1, 0,
3, 0, 5}, []bool{true, false, true, false, true})
+ // Create a record
rec := builder.NewRecord()
defer rec.Release()
+ // Create a table from the record
tbl := array.NewTableFromRecords(schema, []arrow.Record{rec})
defer tbl.Release()
+ // Calculate sum of floatField
sum := math.Float64.Sum(tbl.Column(2).Data().Chunk(0).(*array.Float64))
- if sum != 9 {
- defer os.Exit(1)
+ fmt.Printf("Sum of floatField: %v\n", sum)
+
+ // Print the table contents
+ fmt.Println("\nTable contents:")
+ fmt.Printf("Number of rows: %d\n", tbl.NumRows())
+ fmt.Printf("Number of columns: %d\n", tbl.NumCols())
+ fmt.Println("\nColumn names:")
+ for i := 0; i < int(tbl.NumCols()); i++ {
+ fmt.Printf(" %s\n", tbl.Column(i).Name())
}
+
+ // Output:
+ // Sum of floatField: 9
+ //
+ // Table contents:
+ // Number of rows: 5
+ // Number of columns: 3
+ //
+ // Column names:
+ // intField
+ // stringField
+ // floatField
}
diff --git a/arrow/_examples/helloworld/main.go
b/arrow/examples/table_creation/main.go
similarity index 64%
rename from arrow/_examples/helloworld/main.go
rename to arrow/examples/table_creation/main.go
index c45c31b..52e1782 100644
--- a/arrow/_examples/helloworld/main.go
+++ b/arrow/examples/table_creation/main.go
@@ -8,16 +8,17 @@
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
package main
import (
- "os"
+ "fmt"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
@@ -26,27 +27,40 @@ import (
)
func main() {
+ // Create a schema with three fields
schema := arrow.NewSchema([]arrow.Field{
{Name: "intField", Type: arrow.PrimitiveTypes.Int64, Nullable:
false},
{Name: "stringField", Type: arrow.BinaryTypes.String, Nullable:
false},
{Name: "floatField", Type: arrow.PrimitiveTypes.Float64,
Nullable: true},
}, nil)
+ // Create a record builder
builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
defer builder.Release()
+ // Append values to each field
builder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3, 4,
5}, nil)
builder.Field(1).(*array.StringBuilder).AppendValues([]string{"a", "b",
"c", "d", "e"}, nil)
builder.Field(2).(*array.Float64Builder).AppendValues([]float64{1, 0,
3, 0, 5}, []bool{true, false, true, false, true})
+ // Create a record
rec := builder.NewRecord()
defer rec.Release()
+ // Create a table from the record
tbl := array.NewTableFromRecords(schema, []arrow.Record{rec})
defer tbl.Release()
+ // Calculate sum of floatField
sum := math.Float64.Sum(tbl.Column(2).Data().Chunk(0).(*array.Float64))
- if sum != 9 {
- defer os.Exit(1)
+ fmt.Printf("Sum of floatField: %v\n", sum)
+
+ // Print the table contents
+ fmt.Println("\nTable contents:")
+ fmt.Printf("Number of rows: %d\n", tbl.NumRows())
+ fmt.Printf("Number of columns: %d\n", tbl.NumCols())
+ fmt.Println("\nColumn names:")
+ for i := 0; i < int(tbl.NumCols()); i++ {
+ fmt.Printf(" %s\n", tbl.Column(i).Name())
}
}
diff --git a/arrow/memory/example_memory_perf_test.go
b/arrow/memory/example_memory_perf_test.go
new file mode 100644
index 0000000..c7047e8
--- /dev/null
+++ b/arrow/memory/example_memory_perf_test.go
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package memory_test
+
+import (
+ "fmt"
+
+ "github.com/apache/arrow-go/v18/arrow/array"
+ "github.com/apache/arrow-go/v18/arrow/memory"
+)
+
+func Example_memoryPerf() {
+ // Create a memory allocator
+ pool := memory.NewGoAllocator()
+
+ // Create a large Arrow array
+ fmt.Println("Creating a large Arrow Int64 array...")
+ const N = 10_000_000
+ builder := array.NewInt64Builder(pool)
+ defer builder.Release()
+
+ for i := 0; i < N; i++ {
+ builder.Append(int64(i))
+ }
+ arr := builder.NewInt64Array()
+ defer arr.Release()
+
+ fmt.Printf("Built array of %d elements\n", N)
+
+ // Batch processing
+ fmt.Println("\nBatch processing large data in chunks...")
+ batchSize := 1_000_000
+ for batch := 0; batch < N; batch += batchSize {
+ end := batch + batchSize
+ if end > N {
+ end = N
+ }
+ b := array.NewInt64Builder(pool)
+ for i := batch; i < end; i++ {
+ b.Append(int64(i))
+ }
+ chunk := b.NewInt64Array()
+ // Simulate processing
+ _ = chunk.Value(0)
+ chunk.Release()
+ b.Release()
+ }
+ fmt.Printf("Processed %d elements in batches of %d\n", N, batchSize)
+
+ // Output:
+ // Creating a large Arrow Int64 array...
+ // Built array of 10000000 elements
+ //
+ // Batch processing large data in chunks...
+ // Processed 10000000 elements in batches of 1000000
+}
diff --git a/ci/scripts/tinygo_example.sh b/ci/scripts/tinygo_example.sh
index 1b75a46..c811c8d 100755
--- a/ci/scripts/tinygo_example.sh
+++ b/ci/scripts/tinygo_example.sh
@@ -21,7 +21,7 @@ set -eux
cd ~
pushd /src
-tinygo build -tags noasm -o ~/example_tinygo arrow/_examples/helloworld/main.go
+tinygo build -tags noasm -o ~/example_tinygo
arrow/examples/table_creation/main.go
popd
./example_tinygo