morningman commented on a change in pull request #5774:
URL: https://github.com/apache/incubator-doris/pull/5774#discussion_r631911962



##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -270,27 +339,20 @@ void JsonReader::_close() {
     if (_closed) {
         return;
     }
-    if (typeid(*_file_reader) == typeid(doris::BrokerReader) ||
-        typeid(*_file_reader) == typeid(doris::LocalFileReader)) {
-        _file_reader->close();
-        delete _file_reader;
-    }
     _closed = true;
 }
 
-// read one json string from file read and parse it to json doc.
+// read one json string from line read and parse it to json doc.
 // return Status::DataQualityError() if data has quality error.
 // return other error if encounter other problemes.
 // return Status::OK() if parse succeed or reach EOF.
-Status JsonReader::_parse_json_doc(bool* eof) {
-    // read a whole message, must be delete json_str by `delete[]`
+Status JsonReader::_parse_json_doc(size_t* size, bool* eof) {
+    // read a whole message
     SCOPED_TIMER(_file_read_timer);
-    std::unique_ptr<uint8_t[]> json_str;
-    size_t length = 0;
-    RETURN_IF_ERROR(_file_reader->read_one_message(&json_str, &length));
-    _bytes_read_counter += length;
-    if (length == 0) {
-        *eof = true;
+    const uint8_t* json_str = nullptr;
+    RETURN_IF_ERROR(_line_reader->read_line(&json_str, size, eof));

Review comment:
       What if the content of load file is a json object with multi line? like:
   ```
   {
       "name" : "zs",
       "sex" : "male"
   }
   ```
   the first call of `read_line()` will return `"{"`?

##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -141,13 +172,43 @@ Status JsonScanner::open_next_reader() {
         return Status::InternalError(ss.str());
     }
     }
+    return Status::OK();
+}
+
+Status JsonScanner::open_line_reader() {
+    if (_cur_line_reader != nullptr) {
+        delete _cur_line_reader;
+        _cur_line_reader = nullptr;
+    }
+
+    const TBrokerRangeDesc& range = _ranges[_next_range];
+    int64_t size = range.size;
+    if (range.start_offset != 0) {
+        size += 1;

Review comment:
       Add comment to explain why +1 for size




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to