It looks like I hadn't spotted this issue, 
https://github.com/protocolbuffers/protobuf/issues/5883, that one of my 
colleagues has helped me find.

Hard to understand the justification for that, to be honest!
On Thursday, 18 August 2022 at 11:05:32 UTC+1 John McCabe wrote:

> I've started with one of the simple protobuf examples of writing to a 
> file, then reading back. I've extended it a little to try to use both 
> oneof, and serialisation with a length field in front so that I can read 
> multiple messages.
>
> The proto file is:
>
> syntax = "proto3";
>
> package pbuf;
>
> message GetRequest {
>     enum GetTarget {
>         FIRST = 0;
>         SECOND = 1;
>     }
>
>     GetTarget get_target = 1;
> }
>
> message SetRequest {
>     enum SetTarget {
>         FIRST = 0;
>         SECOND = 1;
>     }
>
>     SetTarget set_target = 1;
> }
>
> message Request {
>     oneof request_type {
>         SetRequest s_request = 1;
>         GetRequest g_request = 2;
>     }
> }
>
> I.e. there is a main message type that can hold one of the others. The 
> others have a single enum field, with enum values of 0 and 1 defined for 
> each one.
>
> The code I'm using to experiment with is compiled using gcc 7.5.0 on 
> 64-bit Ubuntu 16.04, with std=c++17, and is:
>
> #include <fstream>
> #include <iostream>
> #include <string>
> #include <cstdint>
>
> #include "oneof.pb.h"
>
> int main(int argc, char* argv[])
> {
>     GOOGLE_PROTOBUF_VERIFY_VERSION;
>
>     if (argc != 2)
>     {
>         std::cerr << "Usage:  " << argv[0] << " DATA_FILE" << std::endl;
>         return -1;
>     }
>
>     {
>         std::fstream input(argv[1], std::ios::in | std::ios::binary);
>         if (!input)
>         {
>             std::cout << argv[1] << ": File not found.  Creating a new 
> file." << std::endl;
>         }
>         else
>         {
>             while (input)
>             {
>                 pbuf::Request request;
>                 std::size_t serialisationLength { 0 };
>
>                 // Read the data size first; this is the most likely to 
> fail at end of file
>                 input.read(reinterpret_cast<char *>(&serialisationLength), 
> sizeof(serialisationLength));
>                 if (!input.eof())
>                 {
>                     std::cout << "Serialisation Length : " << 
> serialisationLength << std::endl;
>
>                     uint8_t serialisedData[serialisationLength];
>                     if (!input.read(reinterpret_cast<char 
> *>(serialisedData), serialisationLength))
>                     {
>                         std::cerr << "Failed to read request" << std::endl;
>                         return -1;
>                     }
>
>                     if (!request.ParseFromArray(serialisedData, 
> serialisationLength))
>                     {
>                         std::cerr << "Failed to parse request " << 
> std::endl;
>                         return -1;
>                     }
>
>                     std::cout << "Data: " << request.DebugString() << 
> std::endl;
>                 }
>             }
>         }
>     }
>
>     // Reset the output file
>     std::fstream output(argv[1], std::ios::out | std::ios::trunc | 
> std::ios::binary);
>
>     for (int32_t i = 0; i < 3; i++)
>     {
>         pbuf::Request request;
>         switch (i)
>         {
>         case 0:
>         {
>             pbuf::GetRequest* getRequest { request.mutable_g_request() };
>             
> getRequest->set_get_target(pbuf::GetRequest_GetTarget::GetRequest_GetTarget_FIRST);
>             break;
>         }
>
>         case 1:
>         {
>             pbuf::SetRequest* setRequest { request.mutable_s_request() };
>             
> setRequest->set_set_target(pbuf::SetRequest_SetTarget::SetRequest_SetTarget_FIRST);
>             break;
>         }
>
>         case 2:
>         {
>             pbuf::GetRequest* getRequest { request.mutable_g_request() };
>             
> getRequest->set_get_target(pbuf::GetRequest_GetTarget::GetRequest_GetTarget_SECOND);
>             break;
>         }
>         }
>
>         auto serialisationLength { request.ByteSizeLong() };
>         uint8_t serialisedData[serialisationLength];
>         if (!request.SerializeToArray(serialisedData, serialisationLength))
>         {
>             std::cerr << "Failed to serialise  request " << i << "." << 
> std::endl;
>             return -1;
>
>         }
>
>         if (!output.write(reinterpret_cast<const char 
> *>(&serialisationLength), sizeof(serialisationLength)))
>         {
>             std::cerr << "Failed to write request " << i << "." << 
> std::endl;
>             return -1;
>         }
>
>         if (!output.write(reinterpret_cast<const char *>(serialisedData), 
> serialisationLength))
>         {
>             std::cerr << "Failed to write request " << i << "." << 
> std::endl;
>             return -1;
>         }
>     }
>     output.close();
>
>     // Optional:  Delete all global objects allocated by libprotobuf.
>     google::protobuf::ShutdownProtobufLibrary();
>
>     return 0;
> }
>
> The gist of it is that it reads a given file and prints out the contents 
> using the DebugString() for each object, then overwrites the file, putting 
> new entries in (they're actually always the same anyway, but...) so, if 
> you're running this and passing "var.bin" as the file argument, the first 
> time round you should expect to see:
>
> var.bin: File not found.  Creating a new file.
> Serialisation Length: 2
> Serialisation Length: 2
> Serialisation Length: 4
>
> At this point you can, I guess, see the issue (more later). The second 
> time round, you should expect to see the contents of the 3 entries that 
> were added to the file. I.e.:
>
> Serialisation Length : 2
> Data: g_request {
> }
>
> Serialisation Length : 2
> Data: s_request {
> }
>
> Serialisation Length : 4
> Data: g_request {
>   get_target: SECOND
> }
>
> Serialisation Length: 2
> Serialisation Length: 2
> Serialisation Length: 4
>
> This further highlights the issue I've got; i.e. when the enum value is 
> set to the "0" version, the value isn't actually added to the serialized 
> data and, hence, doesn't appear in the de-serialized data.
>
> Is this expected behaviour because, to me, that seems weird? Saying that, 
> I've only got 35 years experience of professional software development, so 
> what would I know?!
>
> If I change the proto file to:
>
> syntax = "proto3";
>
> package pbuf;
>
> message GetRequest {
>     enum GetTarget {
>         DEFAULT = 0;
>         FIRST = 1;
>         SECOND = 2;
>     }
>
>     GetTarget get_target = 1;
> }
>
> message SetRequest {
>     enum SetTarget {
>         DEFAULT = 0;
>         FIRST = 1;
>         SECOND = 2;
>     }
>
>     SetTarget set_target = 1;
> }
>
> message Request {
>     oneof request_type {
>         SetRequest s_request = 1;
>         GetRequest g_request = 2;
>     }
> }
>
> i.e. add an extra enumeration literal of "DEFAULT = 0;" in both, and move 
> the others "up" 1, the output is:
>
> var.bin: File not found.  Creating a new file.
> Serialisation Length: 4
> Serialisation Length: 4
> Serialisation Length: 4
>
> the first time, and 
>
> Serialisation Length : 4
> Data: g_request {
>   get_target: FIRST
> }
>
> Serialisation Length : 4
> Data: s_request {
>   set_target: FIRST
> }
>
> Serialisation Length : 4
> Data: g_request {
>   get_target: SECOND
> }
>
> Serialisation Length: 4
> Serialisation Length: 4
> Serialisation Length: 4
>
> the second time, which is more like I would expect.
>
> The cause of this seems to be this, generated function:
>
> size_t GetRequest::ByteSizeLong() const {
> // @@protoc_insertion_point(message_byte_size_start:pbuf.GetRequest)
>   size_t total_size = 0;
>
>   uint32_t cached_has_bits = 0;
>   // Prevent compiler warnings about cached_has_bits being unused
>   (void) cached_has_bits;
>
>   // .pbuf.GetRequest.GetTarget get_target = 1;
>   if (this->_internal_get_target() != 0) {
>     total_size += 1 +
>       ::_pbi::WireFormatLite::EnumSize(this->_internal_get_target());
>   }
>
>   return MaybeComputeUnknownFieldsSize(total_size, &_impl_._cached_size_);
> }
>
> When the enum value is zero, it's (obviously) skipping over the total_size 
> update where EnumSize is called.
>
> Any comments would be appreciated; I may be missing something but this 
> doesn't seem to make sense to me.
>
>
>
>
>

-- 
You received this message because you are subscribed to the Google Groups 
"Protocol Buffers" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/protobuf/27775d01-5273-4cea-a946-2a276e87ef2cn%40googlegroups.com.

Reply via email to