bkietz commented on code in PR #39963:
URL: https://github.com/apache/arrow/pull/39963#discussion_r1481900836


##########
cpp/src/arrow/csv/reader.cc:
##########
@@ -719,66 +726,25 @@ class ReaderMixin {
     return Status::OK();
   }
 
-  struct ParseResult {
-    std::shared_ptr<BlockParser> parser;
-    int64_t parsed_bytes;
-  };
-
-  Result<ParseResult> Parse(const std::shared_ptr<Buffer>& partial,
-                            const std::shared_ptr<Buffer>& completion,
-                            const std::shared_ptr<Buffer>& block, int64_t 
block_index,
-                            bool is_final) {
-    static constexpr int32_t max_num_rows = 
std::numeric_limits<int32_t>::max();
-    auto parser = std::make_shared<BlockParser>(
-        io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, 
max_num_rows);
-
-    std::shared_ptr<Buffer> straddling;
-    std::vector<std::string_view> views;
-    if (partial->size() != 0 || completion->size() != 0) {
-      if (partial->size() == 0) {
-        straddling = completion;
-      } else if (completion->size() == 0) {
-        straddling = partial;
-      } else {
-        ARROW_ASSIGN_OR_RAISE(
-            straddling, ConcatenateBuffers({partial, completion}, 
io_context_.pool()));
-      }
-      views = {std::string_view(*straddling), std::string_view(*block)};
-    } else {
-      views = {std::string_view(*block)};
-    }
-    uint32_t parsed_size;
-    if (is_final) {
-      RETURN_NOT_OK(parser->ParseFinal(views, &parsed_size));
-    } else {
-      RETURN_NOT_OK(parser->Parse(views, &parsed_size));
-    }
-    // See BlockParsingOperator for explanation.
-    const int64_t bytes_before_buffer = partial->size() + completion->size();
-    if (static_cast<int64_t>(parsed_size) < bytes_before_buffer) {
-      return Status::Invalid(
-          "CSV parser got out of sync with chunker. This can mean the data 
file "
-          "contains cell values spanning multiple lines; please consider 
enabling "
-          "the option 'newlines_in_values'.");
-    }
+  Result<ParsedBlock> Parse(const CSVBlock& block) {
+    DCHECK(parsing_operator_.has_value());

Review Comment:
   It might be desirable to hoist the assertion from ReadAsync()
   ```suggestion
       DCHECK(parsing_operator_.has_value());
       DCHECK(!block.consume_bytes);
   ```
   
   and maybe also to add a comment clarifying when consume_bytes will be null 
(IIUC, when dealing with a final block where we don't need to track consumption 
anymore?)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to