zeroshade commented on code in PR #37599:
URL: https://github.com/apache/arrow/pull/37599#discussion_r1339017226


##########
go/parquet/file/column_writer_types.gen.go.tmpl:
##########
@@ -212,7 +221,16 @@ func (w *{{.Name}}ColumnChunkWriter) 
writeValuesSpaced(spacedValues []{{.name}},
   }
   if w.pageStatistics != nil {
     nulls := numValues - numRead
+{{- if ne .Name "FixedLenByteArray"}}
     
w.pageStatistics.(*metadata.{{.Name}}Statistics).UpdateSpaced(spacedValues, 
validBits, validBitsOffset, nulls)
+{{- else}}
+    s, ok := w.pageStatistics.(*metadata.{{.Name}}Statistics)
+    if ok {
+      s.UpdateSpaced(spacedValues, validBits, validBitsOffset, nulls)
+    } else {
+      
w.pageStatistics.(*metadata.Float16Statistics).UpdateSpaced(spacedValues, 
validBits, validBitsOffset, nulls)
+    }
+{{- end}}

Review Comment:
   same as before



##########
go/parquet/file/column_writer_types.gen.go.tmpl:
##########
@@ -193,14 +193,23 @@ func (w *{{.Name}}ColumnChunkWriter) 
WriteDictIndices(indices arrow.Array, defLe
 
     valueOffset += info.numSpaced()
   })
-  
+
   return
 }
 
 func (w *{{.Name}}ColumnChunkWriter) writeValues(values []{{.name}}, numNulls 
int64) {
   w.currentEncoder.(encoding.{{.Name}}Encoder).Put(values)
   if w.pageStatistics != nil {
+{{- if ne .Name "FixedLenByteArray"}}
     w.pageStatistics.(*metadata.{{.Name}}Statistics).Update(values, numNulls)
+{{- else}}
+    s, ok := w.pageStatistics.(*metadata.{{.Name}}Statistics)
+    if ok {
+      s.Update(values, numNulls)
+    } else {
+      w.pageStatistics.(*metadata.Float16Statistics).Update(values, numNulls)
+    }
+{{- end}}

Review Comment:
   would probably be better to check `w.Descr().LogicalType()` and check if 
it's a Float16 column rather than make this assumption which might bite us in 
the future.



##########
go/arrow/float16/float16.go:
##########
@@ -161,5 +172,25 @@ func (n Num) Sign() int {
        return -1
 }
 
+func (n Num) Signbit() bool { return (n.bits & 0x8000) != 0 }

Review Comment:
   Should we use this to improve the `Sign` method?



##########
go/parquet/internal/testutils/random.go:
##########
@@ -369,6 +370,18 @@ func randFloat64(r *rand.Rand) float64 {
        }
 }
 
+// randFloat16 creates a random float value with a normal distribution
+// to better spread the values out and ensure we do not return any NaN or Inf 
values.
+func randFloat16(r *rand.Rand) float16.Num {
+       for {
+               f16 := float16.FromBits(uint16(r.Uint64n(math.MaxUint16 + 1)))
+               f64 := float64(f16.Float32())
+               if !math.IsNaN(f64) && !math.IsInf(f64, 0) {

Review Comment:
   It might make more sense to leave the NaN's and infinities to ensure we're 
covering those cases for testing. Alternately we could just make sure we test 
those cases separately



##########
go/arrow/float16/float16.go:
##########
@@ -29,6 +30,11 @@ type Num struct {
        bits uint16
 }
 
+var (
+       MaxNum = Num{bits: 0b0111101111111111}
+       MinNum = MaxNum.Negate()
+)

Review Comment:
   might make sense to add `func Inf() float16.Num` and `func NaN() 
float16.Num` functions to this package



##########
go/parquet/schema/reflection_test.go:
##########
@@ -152,6 +152,7 @@ func ExampleNewSchemaFromStruct_logicaltypes() {
                JSON                  string   `parquet:"logical=json"`
                BSON                  []byte   `parquet:"logical=BSON"`
                UUID                  [16]byte `parquet:"logical=uuid"`
+               Float16               [2]byte  `parquet:"logical=float16"`

Review Comment:
   can we add a test for an optional one (instead of required) which would use 
`*[2]byte` as the type?
   can we also add a test that uses `float16.Num` as the type?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to