This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 3160eef9 doc(parquet): document arrow parquet mappings (#561)
3160eef9 is described below

commit 3160eef9c227d94db67bfaf5225a2d6c1f48bc76
Author: Bryce Mecum <[email protected]>
AuthorDate: Sat Nov 8 11:43:23 2025 -0800

    doc(parquet): document arrow parquet mappings (#561)
    
    ### Rationale for this change
    
    Documents the Arrow to Parquet type conversions and documents which
    types have no conversion. Closes #403
    
    ### What changes are included in this PR?
    
    I'm sneaking this into the doc.go file for the parquet package so this
    PR just adds text tehre.
    
    ### Are these changes tested?
    
    Visually and partially with some unit tests (not included).
    
    ### Are there any user-facing changes?
    
    No
---
 parquet/doc.go | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/parquet/doc.go b/parquet/doc.go
index 751b792b..13fc3f43 100644
--- a/parquet/doc.go
+++ b/parquet/doc.go
@@ -75,6 +75,60 @@
 //
 // Tip: Some platforms don't necessarily support all kinds of encodings. If 
you're not
 // sure what to use, just use Plain and Dictionary encoding.
+//
+// # Arrow to Parquet Type Mappings
+//
+// When reading and writing Parquet, the parquet package converts between Arrow
+// and Parquet types in the manner described in the table below.
+//
+// When converting a Parquet type where a large and non-large offset Arrow type
+// would work, the non-large variant is chosen. If the Parquet file is written
+// with `WithStoreSchema`, types will be preserved and dictionaries will be
+// restored when round-tripping.
+//
+//     Arrow Type              Parquet Physical Type     Parquet Logical Type
+//     ----------              ---------------------     --------------------
+//     NULL                    Int32                     Null
+//     BOOL                    Boolean                   -
+//     INT8                    Int32                     Int(8, signed)
+//     UINT8                   Int32                     Int(8, unsigned)
+//     INT16                   Int32                     Int(16, signed)
+//     UINT16                  Int32                     Int(16, unsigned)
+//     INT32                   Int32                     Int(32, signed)
+//     UINT32                  Int32                     Int(32, unsigned)
+//     INT64                   Int64                     Int(64, signed)
+//     UINT64                  Int64                     Int(64, unsigned)
+//     FLOAT16                 FixedLenByteArray(2)      Float16
+//     FLOAT32                 Float                     -
+//     FLOAT64                 Double                    -
+//     STRING                  ByteArray                 String
+//     LARGE_STRING            ByteArray                 String
+//     BINARY                  ByteArray                 -
+//     LARGE_BINARY            ByteArray                 -
+//     FIXED_SIZE_BINARY       FixedLenByteArray         -
+//     DECIMAL128              Int32/Int64/FLBA*         Decimal
+//     DECIMAL256              Int32/Int64/FLBA*         Decimal
+//     DATE32                  Int32                     Date
+//     DATE64                  Int32                     Date
+//     TIMESTAMP               Int64 or Int96            Timestamp
+//     TIME32                  Int32                     Time(millis)
+//     TIME64                  Int64                     Time(micros/nanos)
+//     LIST                    Group (LIST)              -
+//     FIXED_SIZE_LIST         Group (LIST)              -
+//     STRUCT                  Group                     -
+//     MAP                     Group (MAP)               -
+//     DICTIONARY              (converted to value type) -
+//     EXTENSION               (depends on storage)      (may be custom)
+//
+// * FLBA means FixedLenByteArray
+//
+// Unsupported Arrow Types (will return arrow.ErrNotImplemented):
+//
+//     DURATION, INTERVAL_MONTHS, INTERVAL_DAY_TIME, INTERVAL_MONTH_DAY_NANO
+//     SPARSE_UNION, DENSE_UNION
+//     STRING_VIEW, BINARY_VIEW, LIST_VIEW, LARGE_LIST_VIEW
+//     LARGE_LIST, RUN_END_ENCODED
+//     DECIMAL32, DECIMAL64
 package parquet
 
 //go:generate go run golang.org/x/tools/cmd/stringer -type=Version -linecomment

Reply via email to