pitrou commented on code in PR #12829:
URL: https://github.com/apache/arrow/pull/12829#discussion_r860676066


##########
cpp/src/parquet/arrow/reader_internal.cc:
##########
@@ -434,14 +480,26 @@ Status TransferBinary(RecordReader* reader, MemoryPool* 
pool,
   DCHECK(binary_reader);
   auto chunks = binary_reader->GetBuilderChunks();
   for (auto& chunk : chunks) {
-    if (!chunk->type()->Equals(*logical_value_type)) {
+    if (!chunk->type()->Equals(*logical_type_field->type())) {
       // XXX: if a LargeBinary chunk is larger than 2GB, the MSBs of offsets
       // will be lost because they are first created as int32 and then cast to 
int64.
       ARROW_ASSIGN_OR_RAISE(
-          chunk, ::arrow::compute::Cast(*chunk, logical_value_type, 
cast_options, &ctx));
+          chunk,
+          ::arrow::compute::Cast(*chunk, logical_type_field->type(), 
cast_options, &ctx));
     }
   }
-  *out = std::make_shared<ChunkedArray>(chunks, logical_value_type);
+  if (!logical_type_field->nullable()) {

Review Comment:
   Perhaps make a helper function for this operation and refactor?



##########
cpp/src/parquet/arrow/reader_internal.cc:
##########
@@ -409,22 +441,36 @@ Status TransferDate64(RecordReader* reader, MemoryPool* 
pool,
 
 Status TransferDictionary(RecordReader* reader,
                           const std::shared_ptr<DataType>& logical_value_type,
-                          std::shared_ptr<ChunkedArray>* out) {
+                          bool nullable, std::shared_ptr<ChunkedArray>* out) {
   auto dict_reader = dynamic_cast<DictionaryRecordReader*>(reader);
   DCHECK(dict_reader);
   *out = dict_reader->GetResult();
   if (!logical_value_type->Equals(*(*out)->type())) {
     ARROW_ASSIGN_OR_RAISE(*out, (*out)->View(logical_value_type));
   }
+  if (!nullable) {
+    // Reconstruct each chunk without nulls.

Review Comment:
   Is this necessary for correctness or is it just an optimization of eliding 
the null buffer?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to