zeroshade commented on a change in pull request #10951:
URL: https://github.com/apache/arrow/pull/10951#discussion_r700342916



##########
File path: go/parquet/metadata/file.go
##########
@@ -0,0 +1,478 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package metadata
+
+import (
+       "bytes"
+       "context"
+       "io"
+       "reflect"
+
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/compress"
+       "github.com/apache/arrow/go/parquet/internal/encryption"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/thrift"
+       "github.com/apache/arrow/go/parquet/schema"
+       "golang.org/x/xerrors"
+)
+
+// DefaultCompressionType is used unless a different compression is specified
+// in the properties
+var DefaultCompressionType = compress.Codecs.Uncompressed
+
+// FileMetaDataBuilder is a proxy for more easily constructing file metadata
+// particularly used when writing a file out.
+type FileMetaDataBuilder struct {
+       metadata       *format.FileMetaData
+       props          *parquet.WriterProperties
+       schema         *schema.Schema
+       rowGroups      []*format.RowGroup
+       currentRgBldr  *RowGroupMetaDataBuilder
+       kvmeta         KeyValueMetadata
+       cryptoMetadata *format.FileCryptoMetaData
+}
+
+// NewFileMetadataBuilder will use the default writer properties if nil is 
passed for
+// the writer properties and nil is allowable for the key value metadata.
+func NewFileMetadataBuilder(schema *schema.Schema, props 
*parquet.WriterProperties, kvmeta KeyValueMetadata) *FileMetaDataBuilder {
+       var crypto *format.FileCryptoMetaData
+       if props.FileEncryptionProperties() != nil && 
props.FileEncryptionProperties().EncryptedFooter() {
+               crypto = format.NewFileCryptoMetaData()
+       }
+       return &FileMetaDataBuilder{
+               metadata:       format.NewFileMetaData(),
+               props:          props,
+               schema:         schema,
+               kvmeta:         kvmeta,
+               cryptoMetadata: crypto,
+       }
+}
+
+// GetFileCryptoMetaData returns the cryptographic information for encrypting/
+// decrypting the file.
+func (f *FileMetaDataBuilder) GetFileCryptoMetaData() *FileCryptoMetadata {
+       if f.cryptoMetadata == nil {
+               return nil
+       }
+
+       props := f.props.FileEncryptionProperties()
+       f.cryptoMetadata.EncryptionAlgorithm = props.Algorithm().ToThrift()
+       keyMetadata := props.FooterKeyMetadata()
+       if keyMetadata != "" {
+               f.cryptoMetadata.KeyMetadata = []byte(keyMetadata)
+       }
+
+       return &FileCryptoMetadata{f.cryptoMetadata, 0}
+}
+
+// AppendRowGroup adds a rowgroup to the list and returns a builder
+// for that row group
+func (f *FileMetaDataBuilder) AppendRowGroup() *RowGroupMetaDataBuilder {
+       if f.rowGroups == nil {
+               f.rowGroups = make([]*format.RowGroup, 0, 1)
+       }
+
+       rg := format.NewRowGroup()
+       f.rowGroups = append(f.rowGroups, rg)
+       f.currentRgBldr = NewRowGroupMetaDataBuilder(f.props, f.schema, rg)
+       return f.currentRgBldr
+}
+
+// Finish will finalize the metadata of the number of rows, row groups,
+// version etc. This will clear out this filemetadatabuilder so it can
+// be re-used
+func (f *FileMetaDataBuilder) Finish() (*FileMetaData, error) {
+       totalRows := int64(0)
+       for _, rg := range f.rowGroups {
+               totalRows += rg.NumRows
+       }
+       f.metadata.NumRows = totalRows
+       f.metadata.RowGroups = f.rowGroups
+       switch f.props.Version() {
+       case parquet.V1:
+               f.metadata.Version = 1
+       case parquet.V2:
+               f.metadata.Version = 2
+       default:
+               f.metadata.Version = 0
+       }
+       createdBy := f.props.CreatedBy()
+       f.metadata.CreatedBy = &createdBy
+
+       // Users cannot set the `ColumnOrder` since we donot not have user 
defined sort order
+       // in the spec yet.
+       // We always default to `TYPE_DEFINED_ORDER`. We can expose it in
+       // the API once we have user defined sort orders in the Parquet format.
+       // TypeDefinedOrder implies choose SortOrder based on 
ConvertedType/PhysicalType
+       typeDefined := format.NewTypeDefinedOrder()
+       colOrder := &format.ColumnOrder{TYPE_ORDER: typeDefined}
+       f.metadata.ColumnOrders = make([]*format.ColumnOrder, 
f.schema.NumColumns())
+       for idx := range f.metadata.ColumnOrders {
+               f.metadata.ColumnOrders[idx] = colOrder
+       }
+
+       fileEncProps := f.props.FileEncryptionProperties()
+       if fileEncProps != nil && !fileEncProps.EncryptedFooter() {
+               var signingAlgo parquet.Algorithm
+               algo := fileEncProps.Algorithm()
+               signingAlgo.Aad.AadFileUnique = algo.Aad.AadFileUnique
+               signingAlgo.Aad.SupplyAadPrefix = algo.Aad.SupplyAadPrefix
+               if !algo.Aad.SupplyAadPrefix {
+                       signingAlgo.Aad.AadPrefix = algo.Aad.AadPrefix
+               }
+               signingAlgo.Algo = parquet.AesGcm
+               f.metadata.EncryptionAlgorithm = signingAlgo.ToThrift()
+               footerSigningMetadata := 
f.props.FileEncryptionProperties().FooterKeyMetadata()
+               if footerSigningMetadata != "" {
+                       f.metadata.FooterSigningKeyMetadata = 
[]byte(footerSigningMetadata)
+               }
+       }
+
+       f.metadata.Schema = schema.ToThrift(f.schema.Root())
+       f.metadata.KeyValueMetadata = f.kvmeta
+
+       out := &FileMetaData{
+               FileMetaData: f.metadata,
+               version:      NewAppVersion(f.metadata.GetCreatedBy()),

Review comment:
       at the top of `go/parquet/writer_properties.go` there is a constant 
`DefaultCreatedBy` which references `parquet-go`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to