This is an automated email from the ASF dual-hosted git repository.
jamesge pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-brpc.git
The following commit(s) were added to refs/heads/master by this push:
new 958a4c3a json2pb::JsonToProtoMessage() supports parsing multiple jsons
958a4c3a is described below
commit 958a4c3a0c2b5811400e042a216d651fd11e02a1
Author: jamesge <[email protected]>
AuthorDate: Sun Jun 5 17:59:30 2022 +0800
json2pb::JsonToProtoMessage() supports parsing multiple jsons
---
src/butil/iobuf.h | 6 +-
src/butil/strings/string_util.h | 4 ++
src/butil/third_party/rapidjson/reader.h | 5 ++
src/json2pb/json_to_pb.cpp | 106 +++++++++++++++++++++----------
src/json2pb/json_to_pb.h | 33 ++++++++--
src/json2pb/rapidjson.h | 1 +
test/addressbook.proto | 2 +-
test/brpc_protobuf_json_unittest.cpp | 106 +++++++++++++++++++++++++++++++
8 files changed, 219 insertions(+), 44 deletions(-)
diff --git a/src/butil/iobuf.h b/src/butil/iobuf.h
index c2743096..8e2bfe47 100644
--- a/src/butil/iobuf.h
+++ b/src/butil/iobuf.h
@@ -296,9 +296,9 @@ public:
// Returns bytes copied.
size_t copy_to(void* buf, size_t n = (size_t)-1L, size_t pos = 0) const;
- // NOTE: first parameter is not std::string& because user may passes
- // a pointer of std::string by mistake, in which case, compiler would
- // call the void* version which crashes definitely.
+ // NOTE: first parameter is not std::string& because user may pass in
+ // a pointer of std::string by mistake, in which case, the void* overload
+ // would be wrongly called.
size_t copy_to(std::string* s, size_t n = (size_t)-1L, size_t pos = 0)
const;
size_t append_to(std::string* s, size_t n = (size_t)-1L, size_t pos = 0)
const;
diff --git a/src/butil/strings/string_util.h b/src/butil/strings/string_util.h
index 23bc01e4..4d78e20f 100644
--- a/src/butil/strings/string_util.h
+++ b/src/butil/strings/string_util.h
@@ -377,6 +377,10 @@ inline bool IsWhitespace(wchar_t c) {
return wcschr(butil::kWhitespaceWide, c) != NULL;
}
+inline bool IsBlankString(const butil::StringPiece &s) {
+ return butil::ContainsOnlyChars(s, " \r\n\t");
+}
+
// Return a byte string in human-readable format with a unit suffix. Not
// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
// highly recommended instead. TODO(avi): Figure out how to get callers to use
diff --git a/src/butil/third_party/rapidjson/reader.h
b/src/butil/third_party/rapidjson/reader.h
index 552eca03..fd6398b5 100644
--- a/src/butil/third_party/rapidjson/reader.h
+++ b/src/butil/third_party/rapidjson/reader.h
@@ -430,6 +430,11 @@ public:
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
}
+ } else {
+ // jge: Update parseResult_.Offset() when
kParseStopwhendoneflag
+ // is set which means the user needs to know where to resume
+ // parsing in next calls to JsonToProtoMessage()
+ SetParseError(kParseErrorNone, is.Tell());
}
}
diff --git a/src/json2pb/json_to_pb.cpp b/src/json2pb/json_to_pb.cpp
index e87c3922..fc9a31a0 100644
--- a/src/json2pb/json_to_pb.cpp
+++ b/src/json2pb/json_to_pb.cpp
@@ -25,6 +25,8 @@
#include <limits>
#include <google/protobuf/descriptor.h>
#include "butil/strings/string_number_conversions.h"
+#include "butil/third_party/rapidjson/error/error.h"
+#include "butil/third_party/rapidjson/rapidjson.h"
#include "json_to_pb.h"
#include "zero_copy_stream_reader.h" // ZeroCopyStreamReader
#include "encode_decode.h"
@@ -33,12 +35,19 @@
#include "protobuf_map.h"
#include "rapidjson.h"
-#define J2PERROR(perr, fmt, ...) \
+
+#define J2PERROR(perr, fmt, ...) \
+ J2PERROR_WITH_PB((::google::protobuf::Message*)nullptr, perr, fmt,
##__VA_ARGS__)
+
+#define J2PERROR_WITH_PB(pb, perr, fmt, ...) \
if (perr) { \
if (!perr->empty()) { \
perr->append(", ", 2); \
} \
- butil::string_appendf(perr, fmt, ##__VA_ARGS__); \
+ butil::string_appendf(perr, fmt, ##__VA_ARGS__); \
+ if ((pb) != nullptr) { \
+ butil::string_appendf(perr, " [%s]",
(pb)->GetDescriptor()->name().c_str()); \
+ } \
} else { }
namespace json2pb {
@@ -49,7 +58,8 @@ Json2PbOptions::Json2PbOptions()
#else
: base64_to_bytes(true)
#endif
- , array_to_single_repeated(false) {
+ , array_to_single_repeated(false)
+ , allow_remaining_bytes_after_parsing(false) {
}
enum MatchType {
@@ -412,7 +422,7 @@ static bool JsonValueToProtoField(const
BUTIL_RAPIDJSON_NAMESPACE::Value& value,
options.base64_to_bytes) {
std::string str_decoded;
if (!butil::Base64Decode(str, &str_decoded)) {
- J2PERROR(err, "Fail to decode base64 string=%s",
str.c_str());
+ J2PERROR_WITH_PB(message, err, "Fail to decode
base64 string=%s", str.c_str());
return false;
}
str = str_decoded;
@@ -426,7 +436,7 @@ static bool JsonValueToProtoField(const
BUTIL_RAPIDJSON_NAMESPACE::Value& value,
options.base64_to_bytes) {
std::string str_decoded;
if (!butil::Base64Decode(str, &str_decoded)) {
- J2PERROR(err, "Fail to decode base64 string=%s",
str.c_str());
+ J2PERROR_WITH_PB(message, err, "Fail to decode base64
string=%s", str.c_str());
return false;
}
str = str_decoded;
@@ -509,7 +519,7 @@ bool JsonValueToProtoMessage(const
BUTIL_RAPIDJSON_NAMESPACE::Value& json_value,
std::string* err) {
const google::protobuf::Descriptor* descriptor = message->GetDescriptor();
if (!json_value.IsObject() && !(json_value.IsArray() &&
options.array_to_single_repeated)) {
- J2PERROR(err, "`json_value' is not a json object. %s",
descriptor->name().c_str());
+ J2PERROR_WITH_PB(message, err, "The input is not a json object");
return false;
}
@@ -538,7 +548,7 @@ bool JsonValueToProtoMessage(const
BUTIL_RAPIDJSON_NAMESPACE::Value& json_value,
return JsonValueToProtoField(json_value, fields.front(), message,
options, err);
}
- J2PERROR(err, "`json_value' of type array is not allowed here.");
+ J2PERROR_WITH_PB(message, err, "the input json can't be array here");
return false;
}
@@ -589,55 +599,89 @@ bool JsonValueToProtoMessage(const
BUTIL_RAPIDJSON_NAMESPACE::Value& json_value,
return true;
}
-bool ZeroCopyStreamToJson(BUTIL_RAPIDJSON_NAMESPACE::Document *dest,
- google::protobuf::io::ZeroCopyInputStream *stream) {
- ZeroCopyStreamReader stream_reader(stream);
- dest->ParseStream<0, BUTIL_RAPIDJSON_NAMESPACE::UTF8<> >(stream_reader);
- return !dest->HasParseError();
-}
-
inline bool JsonToProtoMessageInline(const std::string& json_string,
google::protobuf::Message* message,
const Json2PbOptions& options,
- std::string* error) {
+ std::string* error,
+ size_t* parsed_offset) {
if (error) {
error->clear();
}
BUTIL_RAPIDJSON_NAMESPACE::Document d;
- d.Parse<0>(json_string.c_str());
+ if (options.allow_remaining_bytes_after_parsing) {
+
d.Parse<BUTIL_RAPIDJSON_NAMESPACE::kParseStopWhenDoneFlag>(json_string.c_str());
+ if (parsed_offset != nullptr) {
+ *parsed_offset = d.GetErrorOffset();
+ }
+ } else {
+ d.Parse<0>(json_string.c_str());
+ }
if (d.HasParseError()) {
- J2PERROR(error, "Invalid json format");
+ if (options.allow_remaining_bytes_after_parsing) {
+ if (d.GetParseError() ==
BUTIL_RAPIDJSON_NAMESPACE::kParseErrorDocumentEmpty) {
+ // This is usual when parsing multiple jsons, don't waste time
+ // on setting the `empty error'
+ return false;
+ }
+ }
+ J2PERROR_WITH_PB(message, error, "Invalid json: %s",
BUTIL_RAPIDJSON_NAMESPACE::GetParseError_En(d.GetParseError()));
return false;
}
- return json2pb::JsonValueToProtoMessage(d, message, options, error);
+ return JsonValueToProtoMessage(d, message, options, error);
}
bool JsonToProtoMessage(const std::string& json_string,
google::protobuf::Message* message,
const Json2PbOptions& options,
- std::string* error) {
- return JsonToProtoMessageInline(json_string, message, options, error);
+ std::string* error,
+ size_t* parsed_offset) {
+ return JsonToProtoMessageInline(json_string, message, options, error,
parsed_offset);
}
bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream* stream,
google::protobuf::Message* message,
const Json2PbOptions& options,
- std::string* error) {
+ std::string* error,
+ size_t* parsed_offset) {
+ ZeroCopyStreamReader reader(stream);
+ return JsonToProtoMessage(&reader, message, options, error, parsed_offset);
+}
+
+bool JsonToProtoMessage(ZeroCopyStreamReader* reader,
+ google::protobuf::Message* message,
+ const Json2PbOptions& options,
+ std::string* error,
+ size_t* parsed_offset) {
if (error) {
error->clear();
}
BUTIL_RAPIDJSON_NAMESPACE::Document d;
- if (!json2pb::ZeroCopyStreamToJson(&d, stream)) {
- J2PERROR(error, "Invalid json format");
+ if (options.allow_remaining_bytes_after_parsing) {
+ d.ParseStream<BUTIL_RAPIDJSON_NAMESPACE::kParseStopWhenDoneFlag,
BUTIL_RAPIDJSON_NAMESPACE::UTF8<>>(*reader);
+ if (parsed_offset != nullptr) {
+ *parsed_offset = d.GetErrorOffset();
+ }
+ } else {
+ d.ParseStream<0, BUTIL_RAPIDJSON_NAMESPACE::UTF8<>>(*reader);
+ }
+ if (d.HasParseError()) {
+ if (options.allow_remaining_bytes_after_parsing) {
+ if (d.GetParseError() ==
BUTIL_RAPIDJSON_NAMESPACE::kParseErrorDocumentEmpty) {
+ // This is usual when parsing multiple jsons, don't waste time
+ // on setting the `empty error'
+ return false;
+ }
+ }
+ J2PERROR_WITH_PB(message, error, "Invalid json: %s",
BUTIL_RAPIDJSON_NAMESPACE::GetParseError_En(d.GetParseError()));
return false;
}
- return json2pb::JsonValueToProtoMessage(d, message, options, error);
+ return JsonValueToProtoMessage(d, message, options, error);
}
bool JsonToProtoMessage(const std::string& json_string,
google::protobuf::Message* message,
std::string* error) {
- return JsonToProtoMessageInline(json_string, message, Json2PbOptions(),
error);
+ return JsonToProtoMessageInline(json_string, message, Json2PbOptions(),
error, nullptr);
}
// For ABI compatibility with 1.0.0.0
@@ -647,21 +691,13 @@ bool JsonToProtoMessage(const std::string& json_string,
bool JsonToProtoMessage(std::string json_string,
google::protobuf::Message* message,
std::string* error) {
- return JsonToProtoMessageInline(json_string, message, Json2PbOptions(),
error);
+ return JsonToProtoMessageInline(json_string, message, Json2PbOptions(),
error, nullptr);
}
bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream *stream,
google::protobuf::Message* message,
std::string* error) {
- if (error) {
- error->clear();
- }
- BUTIL_RAPIDJSON_NAMESPACE::Document d;
- if (!json2pb::ZeroCopyStreamToJson(&d, stream)) {
- J2PERROR(error, "Invalid json format");
- return false;
- }
- return json2pb::JsonValueToProtoMessage(d, message, Json2PbOptions(),
error);
+ return JsonToProtoMessage(stream, message, Json2PbOptions(), error,
nullptr);
}
} //namespace json2pb
diff --git a/src/json2pb/json_to_pb.h b/src/json2pb/json_to_pb.h
index 67f5bdf5..44203e08 100644
--- a/src/json2pb/json_to_pb.h
+++ b/src/json2pb/json_to_pb.h
@@ -20,6 +20,7 @@
#ifndef BRPC_JSON2PB_JSON_TO_PB_H
#define BRPC_JSON2PB_JSON_TO_PB_H
+#include "json2pb/zero_copy_stream_reader.h"
#include <google/protobuf/message.h>
#include <google/protobuf/io/zero_copy_stream.h> // ZeroCopyInputStream
@@ -36,30 +37,52 @@ struct Json2PbOptions {
// Allow decoding json array iff there is only one repeated field.
// Default: false.
bool array_to_single_repeated;
+
+ // Allow more bytes remaining in the input after parsing the first json
+ // object. Useful when the input contains more than one json object.
+ bool allow_remaining_bytes_after_parsing;
};
// Convert `json' to protobuf `message'.
// Returns true on success. `error' (if not NULL) will be set with error
// message on failure.
+//
+// [When options.allow_remaining_bytes_after_parsing is true]
+// * `parse_offset' will be set with #bytes parsed
+// * the function still returns false on empty document but the `error' is set
+// to empty string instead of `The document is empty'.
bool JsonToProtoMessage(const std::string& json,
google::protobuf::Message* message,
const Json2PbOptions& options,
- std::string* error = NULL);
+ std::string* error = nullptr,
+ size_t* parsed_offset = nullptr);
-// send output to ZeroCopyOutputStream instead of std::string.
+// Use ZeroCopyInputStream as input instead of std::string.
bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream *json,
+ google::protobuf::Message *message,
+ const Json2PbOptions &options,
+ std::string *error = nullptr,
+ size_t *parsed_offset = nullptr);
+
+// Use ZeroCopyStreamReader as input instead of std::string.
+// If you need to parse multiple jsons from IOBuf, you should use this
+// overload instead of the ZeroCopyInputStream one which bases on this
+// and recreates a ZeroCopyStreamReader internally that can't be reused
+// between continuous calls.
+bool JsonToProtoMessage(ZeroCopyStreamReader *json,
google::protobuf::Message* message,
const Json2PbOptions& options,
- std::string* error = NULL);
+ std::string* error = nullptr,
+ size_t* parsed_offset = nullptr);
// Using default Json2PbOptions.
bool JsonToProtoMessage(const std::string& json,
google::protobuf::Message* message,
- std::string* error = NULL);
+ std::string* error = nullptr);
bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream* stream,
google::protobuf::Message* message,
- std::string* error = NULL);
+ std::string* error = nullptr);
} // namespace json2pb
#endif // BRPC_JSON2PB_JSON_TO_PB_H
diff --git a/src/json2pb/rapidjson.h b/src/json2pb/rapidjson.h
index d2cf3b68..fa5d354a 100644
--- a/src/json2pb/rapidjson.h
+++ b/src/json2pb/rapidjson.h
@@ -39,6 +39,7 @@
#include "butil/third_party/rapidjson/stringbuffer.h"
#include "butil/third_party/rapidjson/writer.h"
#include "butil/third_party/rapidjson/optimized_writer.h"
+#include "butil/third_party/rapidjson/error/en.h" // GetErrorCode_En
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
#pragma GCC diagnostic pop
diff --git a/test/addressbook.proto b/test/addressbook.proto
index fdbda4fc..068334f8 100644
--- a/test/addressbook.proto
+++ b/test/addressbook.proto
@@ -44,7 +44,7 @@ message Person {
required double datadouble = 8;
- required float datafloat = 9;
+ optional float datafloat = 9;
optional uint32 datau32 = 10;
diff --git a/test/brpc_protobuf_json_unittest.cpp
b/test/brpc_protobuf_json_unittest.cpp
index 64af4541..1469f498 100644
--- a/test/brpc_protobuf_json_unittest.cpp
+++ b/test/brpc_protobuf_json_unittest.cpp
@@ -22,12 +22,15 @@
#include <string>
#include <google/protobuf/text_format.h>
#include "butil/iobuf.h"
+#include "butil/string_printf.h"
+#include "butil/strings/string_util.h"
#include "butil/third_party/rapidjson/rapidjson.h"
#include "butil/time.h"
#include "butil/gperftools_profiler.h"
#include "json2pb/pb_to_json.h"
#include "json2pb/json_to_pb.h"
#include "json2pb/encode_decode.h"
+#include "json2pb/zero_copy_stream_reader.h"
#include "message.pb.h"
#include "addressbook1.pb.h"
#include "addressbook.pb.h"
@@ -1540,4 +1543,107 @@ TEST_F(ProtobufJsonTest, string_to_int64) {
ASSERT_EQ(person.data(), 1234567);
}
+TEST_F(ProtobufJsonTest, parse_multiple_json) {
+ const int COUNT = 4;
+ std::vector<std::string> expectedNames = { "tom", "bob", "jerry", "lucy" };
+ std::vector<int> expectedIds = { 33, 12, 2432, 435 };
+ std::vector<double> expectedData = { 1.0, 2.0, 3.0, 4.0 };
+ std::string jsonStr;
+ butil::IOBuf jsonBuf;
+ for (int i = 0; i < COUNT; ++i) {
+ const std::string d =
+ butil::string_printf(R"( { "name":"%s", "id":%d, "datadouble":%f }
)",
+ expectedNames[i].c_str(),
+ expectedIds[i],
+ expectedData[i]);
+ jsonStr.append(d);
+ jsonBuf.append(d);
+ }
+
+ Person req;
+ json2pb::Json2PbOptions copt;
+ copt.allow_remaining_bytes_after_parsing = true;
+ std::string err;
+
+ for (int i = 0; true; ++i) {
+ req.Clear();
+ size_t offset;
+ if (json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err, &offset)) {
+ jsonStr = jsonStr.substr(offset);
+ ASSERT_EQ(expectedNames[i], req.name());
+ ASSERT_EQ(expectedIds[i], req.id());
+ ASSERT_EQ(expectedData[i], req.datadouble());
+
+ std::cout << "parsed=" << req.ShortDebugString() << "
after_offset=" << jsonStr << std::endl;
+ } else {
+ if (err.empty()) {
+ // document is empty
+ break;
+ }
+ std::cerr << "error=" << err << " offset=" << offset << std::endl;
+ ASSERT_FALSE(true);
+ }
+ }
+
+ butil::IOBufAsZeroCopyInputStream stream(jsonBuf);
+ json2pb::ZeroCopyStreamReader reader(&stream);
+
+ for (int i = 0; true; ++i) {
+ req.Clear();
+ size_t offset;
+ auto res = json2pb::JsonToProtoMessage(&reader, &req, copt, &err,
&offset);
+ if (res) {
+ ASSERT_EQ(expectedNames[i], req.name());
+ ASSERT_EQ(expectedIds[i], req.id());
+ ASSERT_EQ(expectedData[i], req.datadouble());
+ std::string afterOffset;
+ jsonBuf.copy_to(&afterOffset, (size_t)-1L, offset);
+ std::cout << "parsed=" << req.ShortDebugString() << "
after_offset=" << afterOffset << std::endl;
+ } else {
+ if (err.empty()) {
+ // document is empty
+ break;
+ }
+ std::cerr << "error=" << err << " offset=" << offset << std::endl;
+ ASSERT_FALSE(true) << i;
+ }
+ }
}
+
+TEST_F(ProtobufJsonTest, parse_multiple_json_error) {
+ std::string jsonStr = R"( { "name":"tom", "id":323, "datadouble":3.2 }
abc )";
+ butil::IOBuf jsonBuf;
+ jsonBuf.append(jsonStr);
+
+ Person req;
+ json2pb::Json2PbOptions copt;
+ copt.allow_remaining_bytes_after_parsing = true;
+ std::string err;
+ size_t offset;
+
+ ASSERT_TRUE(json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err,
&offset));
+ jsonStr = jsonStr.substr(offset);
+ ASSERT_STREQ("tom", req.name().c_str());
+ ASSERT_EQ(323, req.id());
+ ASSERT_EQ(3.2, req.datadouble());
+
+ req.Clear();
+ ASSERT_FALSE(json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err,
&offset));
+ ASSERT_STREQ("Invalid json: Invalid value. [Person]", err.c_str());
+ ASSERT_EQ(2ul, offset);
+
+ butil::IOBufAsZeroCopyInputStream stream(jsonBuf);
+ json2pb::ZeroCopyStreamReader reader(&stream);
+ req.Clear();
+ ASSERT_TRUE(json2pb::JsonToProtoMessage(&reader, &req, copt, &err,
&offset));
+ ASSERT_STREQ("tom", req.name().c_str());
+ ASSERT_EQ(323, req.id());
+ ASSERT_EQ(3.2, req.datadouble());
+
+ req.Clear();
+ ASSERT_FALSE(json2pb::JsonToProtoMessage(&reader, &req, copt, &err,
&offset));
+ ASSERT_STREQ("Invalid json: Invalid value. [Person]", err.c_str());
+ ASSERT_EQ(47ul, offset);
+}
+
+} // namespace
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]