This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 80cec581102f94d0d4630758d68e87c9605a9cba
Author: Benjamin Mahler <[email protected]>
AuthorDate: Mon Nov 25 17:25:43 2019 -0500

    Updated stout recordio encoder/decoder to be lower-level.
    
    These previously were templated to operate against typed records,
    which imposes a few limitations. Namely, the caller cannot encode
    records of different types without using some wrapper type.
    Similarly, on the decoding side if the caller expects different
    types of records based on some higher level protocol, it needs to
    use a wrapper type that just stores the record's bytes for the
    caller to decode.
    
    The actual motivation for this patch came from a case where the
    caller already has the record in bytes when encoding. We could
    use Encoder<string> but it imposes an extra copy of each record,
    which is not so straightforward to eliminate while keeping the
    interface simple and functional for generic T records.
    
    So, this patch makes the recordio encoder/decoder operate on
    records as bytes, and callers can do whatever they wish with
    the bytes.
    
    Review: https://reviews.apache.org/r/71824
---
 3rdparty/stout/include/stout/recordio.hpp | 49 ++++++++----------------------
 3rdparty/stout/tests/recordio_tests.cpp   | 50 +++++++++----------------------
 2 files changed, 27 insertions(+), 72 deletions(-)

diff --git a/3rdparty/stout/include/stout/recordio.hpp 
b/3rdparty/stout/include/stout/recordio.hpp
index 9d226c2..4dd0a87 100644
--- a/3rdparty/stout/include/stout/recordio.hpp
+++ b/3rdparty/stout/include/stout/recordio.hpp
@@ -44,11 +44,6 @@
  * other "Record-IO" implementations use a fixed-size header
  * of 4 bytes to directly encode an unsigned 32 bit length.
  *
- * TODO(bmahler): Make the encoder and decoder non-templated.
- * They can just take records as bytes and let a wrapper at
- * a higher level do what they like with the bytes of each
- * record.
- *
  * TODO(bmahler): Make the encoder and decoder zero-copy,
  * once they're just dealing with bytes. To make the encoder
  * zero-copy, we need to make "writes" directly to an output
@@ -60,40 +55,24 @@
 namespace recordio {
 
 /**
- * Given an encoding function for individual records, this
- * provides encoding from typed records into "Record-IO" data.
+ * Returns the "Record-IO" encoded record. Unlike the
+ * decoder, this can just be a stateless function since
+ * we're taking entire records and each encoded record
+ * is independent.
  */
-template <typename T>
-class Encoder
+inline std::string encode(const std::string& record)\
 {
-public:
-  Encoder(std::function<std::string(const T&)> _serialize)
-    : serialize(_serialize) {}
-
-  /**
-   * Returns the "Record-IO" encoded record.
-   */
-  std::string encode(const T& record) const
-  {
-    std::string s = serialize(record);
-    return stringify(s.size()) + "\n" + s;
-  }
-
-private:
-  std::function<std::string(const T&)> serialize;
-};
+  return stringify(record.size()) + "\n" + record;
+}
 
 
 /**
- * Given a decoding function for individual records, this
- * provides decoding from "Record-IO" data into typed records.
+ * Decodes records from "Record-IO" data (see above).
  */
-template <typename T>
 class Decoder
 {
 public:
-  Decoder(std::function<Try<T>(const std::string&)> _deserialize)
-    : state(HEADER), deserialize(_deserialize) {}
+  Decoder() : state(HEADER) {}
 
   /**
    * Decodes another chunk of data from the "Record-IO" stream
@@ -107,13 +86,13 @@ public:
    * TODO(bmahler): Allow the caller to signal EOF, this allows
    * detection of invalid partial data at the end of the input.
    */
-  Try<std::deque<Try<T>>> decode(const std::string& data)
+  Try<std::deque<std::string>> decode(const std::string& data)
   {
     if (state == FAILED) {
       return Error("Decoder is in a FAILED state");
     }
 
-    std::deque<Try<T>> records;
+    std::deque<std::string> records;
 
     foreach (char c, data) {
       if (state == HEADER) {
@@ -140,7 +119,7 @@ public:
 
         // Note that for 0 length records, we immediately decode.
         if (numify.get() <= 0) {
-          records.push_back(deserialize(buffer));
+          records.push_back(buffer);
           state = HEADER;
         }
       } else if (state == RECORD) {
@@ -150,7 +129,7 @@ public:
         buffer += c;
 
         if (buffer.size() == length.get()) {
-          records.push_back(deserialize(buffer));
+          records.push_back(std::move(buffer));
           buffer.clear();
           state = HEADER;
         }
@@ -172,8 +151,6 @@ private:
   // its underlying memory allocation when we clear it.
   std::string buffer;
   Option<size_t> length;
-
-  std::function<Try<T>(const std::string&)> deserialize;
 };
 
 } // namespace recordio {
diff --git a/3rdparty/stout/tests/recordio_tests.cpp 
b/3rdparty/stout/tests/recordio_tests.cpp
index 1779394..8818b0d 100644
--- a/3rdparty/stout/tests/recordio_tests.cpp
+++ b/3rdparty/stout/tests/recordio_tests.cpp
@@ -71,31 +71,25 @@ bool operator==(deque<T> rhs, deque<T> lhs)
 }
 
 
-TEST(RecordIOTest, Encoder)
+TEST(RecordIOTest, Encode)
 {
-  recordio::Encoder<string> encoder(strings::upper);
-
   string data;
 
-  data += encoder.encode("hello!");
-  data += encoder.encode("");
-  data += encoder.encode(" ");
-  data += encoder.encode("13 characters");
+  data += recordio::encode("hello!");
+  data += recordio::encode("");
+  data += recordio::encode(" ");
+  data += recordio::encode("13 characters");
 
   EXPECT_EQ(
-      "6\nHELLO!"
+      "6\nhello!"
       "0\n"
       "1\n "
-      "13\n13 CHARACTERS",
+      "13\n13 characters",
       data);
 
-  // Make sure these can be decoded.
-  recordio::Decoder<string> decoder(
-      [=](const string& data) {
-        return Try<string>(strings::lower(data));
-      });
+  recordio::Decoder decoder;
 
-  deque<Try<string>> records;
+  deque<string> records;
   records.push_back("hello!");
   records.push_back("");
   records.push_back(" ");
@@ -107,18 +101,9 @@ TEST(RecordIOTest, Encoder)
 
 TEST(RecordIOTest, Decoder)
 {
-  // Deserializing brings to lower case, but add an
-  // error case to test deserialization failures.
-  auto deserialize = [](const string& data) -> Try<string> {
-    if (data == "error") {
-      return Error("error");
-    }
-    return strings::lower(data);
-  };
-
-  recordio::Decoder<string> decoder(deserialize);
+  recordio::Decoder decoder;
 
-  deque<Try<string>> records;
+  deque<string> records;
 
   // Empty data should not result in an error.
   records.clear();
@@ -131,14 +116,7 @@ TEST(RecordIOTest, Decoder)
   records.push_back("");
   records.push_back(" ");
 
-  EXPECT_SOME_EQ(records, decoder.decode("6\nHELLO!0\n1\n "));
-
-  // An entry which cannot be decoded should not
-  // fail the decoder permanently.
-  records.clear();
-  records.push_back(Error("error"));
-
-  EXPECT_SOME_EQ(records, decoder.decode("5\nerror"));
+  EXPECT_SOME_EQ(records, decoder.decode("6\nhello!0\n1\n "));
 
   // Record should only be decoded once complete.
   records.clear();
@@ -146,12 +124,12 @@ TEST(RecordIOTest, Decoder)
   EXPECT_SOME_EQ(records, decoder.decode("1"));
   EXPECT_SOME_EQ(records, decoder.decode("3"));
   EXPECT_SOME_EQ(records, decoder.decode("\n"));
-  EXPECT_SOME_EQ(records, decoder.decode("13 CHARACTER"));
+  EXPECT_SOME_EQ(records, decoder.decode("13 character"));
 
   records.clear();
   records.push_back("13 characters");
 
-  EXPECT_SOME_EQ(records, decoder.decode("S"));
+  EXPECT_SOME_EQ(records, decoder.decode("s"));
 
   // If the format is bad, the decoder should fail permanently.
   EXPECT_ERROR(decoder.decode("not a number\n"));

Reply via email to