[GitHub] [incubator-zipkin] anuraaga commented on a change in pull request #2589: Makes SpanBytesDecoder work on ByteBuffer

GitBox Mon, 13 May 2019 23:18:29 -0700

anuraaga commented on a change in pull request #2589: Makes SpanBytesDecoder 
work on ByteBuffer
URL: https://github.com/apache/incubator-zipkin/pull/2589#discussion_r283638729


 ##########
 File path: zipkin/src/main/java/zipkin2/internal/WriteBuffer.java
 ##########
 @@ -369,86 +190,102 @@ void writeLongLe(long v) {
     writeByte((byte) ((v >> 56) & 0xff));
   }
 
-  long readLongLe() {
-    require(8);
-    int pos = this.pos;
-    this.pos = pos + 8;
-    return (buf[pos] & 0xffL)
-      | (buf[pos + 1] & 0xffL) << 8
-      | (buf[pos + 2] & 0xffL) << 16
-      | (buf[pos + 3] & 0xffL) << 24
-      | (buf[pos + 4] & 0xffL) << 32
-      | (buf[pos + 5] & 0xffL) << 40
-      | (buf[pos + 6] & 0xffL) << 48
-      | (buf[pos + 7] & 0xffL) << 56;
-  }
-
-  final byte readByte() {
-    require(1);
-    return buf[pos++];
-  }
-
   /**
-   * @return the value read. Use {@link #varintSizeInBytes(long)} to tell how 
many bytes.
-   * @throws IllegalArgumentException if more than 64 bits were encoded
+   * This returns the bytes needed to transcode a UTF-16 Java String to UTF-8 
bytes.
+   *
+   * <p>Originally based on
+   * 
http://stackoverflow.com/questions/8511490/calculating-length-in-utf-8-of-java-string-without-actually-encoding-it
+   *
+   * <p>Later, ASCII run and malformed surrogate logic borrowed from okio.Utf8
    */
-  // included in the main api as this is used commonly, for example reading 
proto tags
-  int readVarint32() {
-    byte b; // negative number implies MSB set
-    if ((b = readByte()) >= 0) {
-      return b;
-    }
-    int result = b & 0x7f;
-
-    if ((b = readByte()) >= 0) {
-      return result | b << 7;
-    }
-    result |= (b & 0x7f) << 7;
-
-    if ((b = readByte()) >= 0) {
-      return result | b << 14;
-    }
-    result |= (b & 0x7f) << 14;
-
-    if ((b = readByte()) >= 0) {
-      return result | b << 21;
-    }
-    result |= (b & 0x7f) << 21;
-
-    b = readByte();
-    if ((b & 0xf0) != 0) {
-      throw new IllegalArgumentException("Greater than 32-bit varint at 
position " + (pos - 1));
+  public static int utf8SizeInBytes(CharSequence string) {
+    int sizeInBytes = 0;
+    for (int i = 0, len = string.length(); i < len; i++) {
+      char ch = string.charAt(i);
+      if (ch < 0x80) {
+        sizeInBytes++; // 7-bit ASCII character
+        // This could be an ASCII run, or possibly entirely ASCII
+        while (i < len - 1) {
+          ch = string.charAt(i + 1);
+          if (ch >= 0x80) break;
+          i++;
+          sizeInBytes++; // another 7-bit ASCII character
+        }
+      } else if (ch < 0x800) {
+        sizeInBytes += 2; // 11-bit character
+      } else if (ch < 0xd800 || ch > 0xdfff) {
+        sizeInBytes += 3; // 16-bit character
+      } else {
+        int low = i + 1 < len ? string.charAt(i + 1) : 0;
+        if (ch > 0xdbff || low < 0xdc00 || low > 0xdfff) {
+          sizeInBytes++; // A malformed surrogate, which yields '?'.
+        } else {
+          // A 21-bit character
+          sizeInBytes += 4;
+          i++;
+        }
+      }
     }
-    return result | b << 28;
+    return sizeInBytes;
   }
 
-  long readVarint64() {
-    byte b; // negative number implies MSB set
-    if ((b = readByte()) >= 0) {
-      return b;
-    }
+  /**
+   * Binary search for character width which favors matching lower numbers.
+   *
+   * <p>Adapted from okio.Buffer
+   */
+  public static int asciiSizeInBytes(long v) {
+    if (v == 0) return 1;
+    if (v == Long.MIN_VALUE) return 20;
 
-    long result = b & 0x7f;
-    for (int i = 1; b < 0 && i < 10; i++) {
-      b = readByte();
-      if (i == 9 && (b & 0xf0) != 0) {
-        throw new IllegalArgumentException("Greater than 64-bit varint at 
position " + (pos - 1));
-      }
-      result |= (long) (b & 0x7f) << (i * 7);
+    boolean negative = false;
+    if (v < 0) {
+      v = -v; // making this positive allows us to compare using less-than
+      negative = true;
     }
-    return result;
+    int width =
+      v < 100000000L
+        ? v < 10000L
+        ? v < 100L ? v < 10L ? 1 : 2 : v < 1000L ? 3 : 4
+        : v < 1000000L ? v < 100000L ? 5 : 6 : v < 10000000L ? 7 : 8
+        : v < 1000000000000L
+          ? v < 10000000000L ? v < 1000000000L ? 9 : 10 : v < 100000000000L ? 
11 : 12
+          : v < 1000000000000000L
+            ? v < 10000000000000L ? 13 : v < 100000000000000L ? 14 : 15
+            : v < 100000000000000000L
+              ? v < 10000000000000000L ? 16 : 17
+              : v < 1000000000000000000L ? 18 : 19;
+    return negative ? width + 1 : width; // conditionally add room for 
negative sign
   }
 
-  public interface Writer<T> {
-    int sizeInBytes(T value);
-
-    void write(T value, UnsafeBuffer buffer);
+  /**
+   * A base 128 varint encodes 7 bits at a time, this checks how many bytes 
are needed to represent
+   * the value.
+   *
+   * <p>See 
https://developers.google.com/protocol-buffers/docs/encoding#varints
+   *
+   * <p>This logic is the same as {@code 
com.squareup.wire.ProtoWriter.varint32Size} v2.3.0 which
+   * benchmarked faster than loop variants of the frequently copy/pasted 
VarInt.varIntSize
+   */
+  public static int varintSizeInBytes(int value) {
+    if ((value & (0xffffffff << 7)) == 0) return 1;
+    if ((value & (0xffffffff << 14)) == 0) return 2;
+    if ((value & (0xffffffff << 21)) == 0) return 3;
+    if ((value & (0xffffffff << 28)) == 0) return 4;
+    return 5;
   }
 
-  void require(int byteCount) {
-    if (pos + byteCount > buf.length) {
-      throw new IllegalArgumentException(
-        "Truncated: length " + byteCount + " > bytes remaining " + 
remaining());
-    }
+  /** Like {@link #varintSizeInBytes(int)}, except for uint64. */
+  public static int varintSizeInBytes(long v) {
 
 Review comment:
   Can consider this one with less if's for 64-bit (the 32-bit version is the 
same as protobuf). Since trace IDs are random, I guess they cover the entire 
spectrum of varint sizes and probably would especially benefit from this.
   
   
https://github.com/protocolbuffers/protobuf/blob/master/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L770

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-zipkin] anuraaga commented on a change in pull request #2589: Makes SpanBytesDecoder work on ByteBuffer

Reply via email to