pitrou commented on code in PR #41346:
URL: https://github.com/apache/arrow/pull/41346#discussion_r1603740836


##########
cpp/src/parquet/column_reader.cc:
##########
@@ -1401,26 +1423,22 @@ class TypedRecordReader : public 
TypedColumnReaderImpl<DType>,
         int16_t* def_levels = this->def_levels() + levels_written_;
         int16_t* rep_levels = this->rep_levels() + levels_written_;
 
-        // Not present for non-repeated fields
-        int64_t levels_read = 0;
+        int64_t levels_read = this->ReadDefinitionLevels(batch_size, 
def_levels);
         if (this->max_rep_level_ > 0) {
-          levels_read = this->ReadDefinitionLevels(batch_size, def_levels);
-          if (this->ReadRepetitionLevels(batch_size, rep_levels) != 
levels_read) {
-            throw ParquetException("Number of decoded rep / def levels did not 
match");
+          int64_t rep_levels_read = this->ReadRepetitionLevels(batch_size, 
rep_levels);
+          if (rep_levels_read != levels_read) {
+            throw ParquetException(kErrorRepDefLevelNotMatchesNumValues);
           }
-        } else if (this->max_def_level_ > 0) {
-          levels_read = this->ReadDefinitionLevels(batch_size, def_levels);
         }
 
-        // Exhausted column chunk
-        if (levels_read == 0) {
-          break;
+        if (ARROW_PREDICT_FALSE(batch_size != levels_read)) {
+          throw ParquetException(kErrorRepDefLevelNotMatchesNumValues);
         }
 
         levels_written_ += levels_read;

Review Comment:
   Can also it be `levels_written_ += batch_size`? i.e. we don't need 
`levels_read` at all.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to