emkornfield commented on code in PR #41346:
URL: https://github.com/apache/arrow/pull/41346#discussion_r1597546313


##########
cpp/src/parquet/column_reader.cc:
##########
@@ -1026,28 +1034,34 @@ class TypedColumnReaderImpl : public 
TypedColumnReader<DType>,
 
   // Read definition and repetition levels. Also return the number of 
definition levels
   // and number of values to read. This function is called before reading 
values.
+  //
+  // ReadLevels will throw exception when any num-levels read is not equal to 
the number
+  // of the levels can be read.
   void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
-                  int64_t* num_def_levels, int64_t* values_to_read) {
-    batch_size =
-        std::min(batch_size, this->num_buffered_values_ - 
this->num_decoded_values_);
+                  int64_t* num_def_levels, int64_t* non_null_values_to_read) {
+    batch_size = std::min(batch_size, this->available_values_current_page());
 
     // If the field is required and non-repeated, there are no definition 
levels
     if (this->max_def_level_ > 0 && def_levels != nullptr) {
       *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
+      if (ARROW_PREDICT_FALSE(*num_def_levels != batch_size)) {
+        throw ParquetException(kErrorRepDefLevelNotMatchesNumValues);
+      }
       // TODO(wesm): this tallying of values-to-decode can be performed with 
better
       // cache-efficiency if fused with the level decoding.
-      *values_to_read +=
+      *non_null_values_to_read +=
           std::count(def_levels, def_levels + *num_def_levels, 
this->max_def_level_);
     } else {
       // Required field, read all values
-      *values_to_read = batch_size;
+      *num_def_levels = 0;

Review Comment:
   do all call-sties pass in a valid pointer if max_def_level_ == 0?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to