n3world commented on a change in pull request #10790:
URL: https://github.com/apache/arrow/pull/10790#discussion_r700693691



##########
File path: cpp/src/arrow/csv/parser.h
##########
@@ -61,25 +62,41 @@ class ARROW_EXPORT DataBatch {
   int32_t num_cols() const { return num_cols_; }
   /// \brief Return the total size in bytes of parsed data
   uint32_t num_bytes() const { return parsed_size_; }
+  /// \brief Return the total number of rows skipped
+  int32_t num_skipped_rows() const {
+    if (ARROW_PREDICT_TRUE(skipped_rows_.empty())) {
+      return 0;
+    }
+    int32_t skipped = 0;
+    for (const auto& skip_range : skipped_rows_) {
+      skipped += skip_range.second - skip_range.first + 1;
+    }
+    return skipped;
+  }
 
   template <typename Visitor>
   Status VisitColumn(int32_t col_index, int64_t first_row, Visitor&& visit) 
const {
     using detail::ParsedValueDesc;
 
-    int64_t row = first_row;
+    int32_t batch_row = 0;
     for (size_t buf_index = 0; buf_index < values_buffers_.size(); 
++buf_index) {
       const auto& values_buffer = values_buffers_[buf_index];
       const auto values = reinterpret_cast<const 
ParsedValueDesc*>(values_buffer->data());
       const auto max_pos =
           static_cast<int32_t>(values_buffer->size() / 
sizeof(ParsedValueDesc)) - 1;
-      for (int32_t pos = col_index; pos < max_pos; pos += num_cols_, ++row) {
+      for (int32_t pos = col_index; pos < max_pos; pos += num_cols_, 
++batch_row) {
         auto start = values[pos].offset;
         auto stop = values[pos + 1].offset;
         auto quoted = values[pos + 1].quoted;
         Status status = visit(parsed_ + start, stop - start, quoted);
         if (ARROW_PREDICT_FALSE(!status.ok())) {
           if (first_row >= 0) {

Review comment:
       Done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to