daniel-adam-tfs commented on code in PR #547:
URL: https://github.com/apache/arrow-go/pull/547#discussion_r2716904592
##########
parquet/file/column_reader_test.go:
##########
@@ -813,6 +826,145 @@ func TestFullSeekRow(t *testing.T) {
}
}
+func checkDecryptedValues(t *testing.T, writerProps *parquet.WriterProperties,
readProps *parquet.ReaderProperties) {
+ sc := arrow.NewSchema([]arrow.Field{
+ {Name: "c0", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c1", Type: arrow.BinaryTypes.String, Nullable: true},
+ {Name: "c2", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64),
Nullable: true},
+ }, nil)
+
+ tbl, err := array.TableFromJSON(mem, sc, []string{`[
+ {"c0": 1, "c1": "a", "c2": [1]},
+ {"c0": 2, "c1": "b", "c2": [1, 2]},
+ {"c0": 3, "c1": "c", "c2": [null]},
+ {"c0": null, "c1": "d", "c2": []},
+ {"c0": 5, "c1": null, "c2": [3, 3, 3]},
+ {"c0": 6, "c1": "f", "c2": null}
+ ]`})
+ require.NoError(t, err)
+ defer tbl.Release()
+
+ schema := tbl.Schema()
+ arrWriterProps := pqarrow.NewArrowWriterProperties()
+
+ var buf bytes.Buffer
+ wr, err := pqarrow.NewFileWriter(schema, &buf, writerProps,
arrWriterProps)
+ require.NoError(t, err)
+
+ require.NoError(t, wr.WriteTable(tbl, tbl.NumRows()))
+ require.NoError(t, wr.Close())
+
+ rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()),
file.WithReadProps(readProps))
+ require.NoError(t, err)
+ defer rdr.Close()
+
+ rgr := rdr.RowGroup(0)
+ col0, err := rgr.Column(0)
+ require.NoError(t, err)
+
+ icr := col0.(*file.Int64ColumnChunkReader)
+ // require.NoError(t, icr.SeekToRow(3)) // TODO: this causes a panic
currently
Review Comment:
It seems to be related to this line:
https://github.com/apache/arrow-go/blob/38dc64bb93890aa0c4b292f58ac3344ced9d12da/parquet/file/page_reader.go#L699-L702
I think this is intended to skip past the dictionary page to the data page.
But for some reason after this Seek the
https://github.com/apache/arrow-go/blob/38dc64bb93890aa0c4b292f58ac3344ced9d12da/parquet/file/page_reader.go#L663
call fails. The offset to the data page is correct, so I'm thinking that
some internal state of the decryptor or the page reader is affected by skip
past the parsing of the dictionary page.
But let me park this for now in
https://github.com/apache/arrow-go/issues/566. Because we are benchmarking
various formats + readers of unencrypted data, so I wanna make some
optimization (including this PR, which according to my benchmarks speeds things
up a little).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]