daniel-adam-tfs commented on code in PR #547:
URL: https://github.com/apache/arrow-go/pull/547#discussion_r2479063958
##########
parquet/file/column_reader_test.go:
##########
@@ -813,6 +826,145 @@ func TestFullSeekRow(t *testing.T) {
}
}
+func checkDecryptedValues(t *testing.T, writerProps *parquet.WriterProperties,
readProps *parquet.ReaderProperties) {
+ sc := arrow.NewSchema([]arrow.Field{
+ {Name: "c0", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+ {Name: "c1", Type: arrow.BinaryTypes.String, Nullable: true},
+ {Name: "c2", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64),
Nullable: true},
+ }, nil)
+
+ tbl, err := array.TableFromJSON(mem, sc, []string{`[
+ {"c0": 1, "c1": "a", "c2": [1]},
+ {"c0": 2, "c1": "b", "c2": [1, 2]},
+ {"c0": 3, "c1": "c", "c2": [null]},
+ {"c0": null, "c1": "d", "c2": []},
+ {"c0": 5, "c1": null, "c2": [3, 3, 3]},
+ {"c0": 6, "c1": "f", "c2": null}
+ ]`})
+ require.NoError(t, err)
+ defer tbl.Release()
+
+ schema := tbl.Schema()
+ arrWriterProps := pqarrow.NewArrowWriterProperties()
+
+ var buf bytes.Buffer
+ wr, err := pqarrow.NewFileWriter(schema, &buf, writerProps,
arrWriterProps)
Review Comment:
@zeroshade I think there is a bug in pqarrow writer. For the data page v2
buffer, it first writes the levels (definition and repetition) and then the
values. Only values are compressed. However, this whole buffer is then
encrypted. And unless ChatGPT is hallucinating on me then only the compressed
values should also be encrypted, levels should stay unencrypted and
uncompressed.
I'll check with some encrypted parquet create in a different way on Monday
and see what happens.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]