adriancole commented on a change in pull request #2589: Makes SpanBytesDecoder
work on ByteBuffer
URL: https://github.com/apache/incubator-zipkin/pull/2589#discussion_r283652272
##########
File path: zipkin/src/main/java/zipkin2/internal/WriteBuffer.java
##########
@@ -369,86 +190,102 @@ void writeLongLe(long v) {
writeByte((byte) ((v >> 56) & 0xff));
}
- long readLongLe() {
- require(8);
- int pos = this.pos;
- this.pos = pos + 8;
- return (buf[pos] & 0xffL)
- | (buf[pos + 1] & 0xffL) << 8
- | (buf[pos + 2] & 0xffL) << 16
- | (buf[pos + 3] & 0xffL) << 24
- | (buf[pos + 4] & 0xffL) << 32
- | (buf[pos + 5] & 0xffL) << 40
- | (buf[pos + 6] & 0xffL) << 48
- | (buf[pos + 7] & 0xffL) << 56;
- }
-
- final byte readByte() {
- require(1);
- return buf[pos++];
- }
-
/**
- * @return the value read. Use {@link #varintSizeInBytes(long)} to tell how
many bytes.
- * @throws IllegalArgumentException if more than 64 bits were encoded
+ * This returns the bytes needed to transcode a UTF-16 Java String to UTF-8
bytes.
+ *
+ * <p>Originally based on
+ *
http://stackoverflow.com/questions/8511490/calculating-length-in-utf-8-of-java-string-without-actually-encoding-it
+ *
+ * <p>Later, ASCII run and malformed surrogate logic borrowed from okio.Utf8
*/
- // included in the main api as this is used commonly, for example reading
proto tags
- int readVarint32() {
- byte b; // negative number implies MSB set
- if ((b = readByte()) >= 0) {
- return b;
- }
- int result = b & 0x7f;
-
- if ((b = readByte()) >= 0) {
- return result | b << 7;
- }
- result |= (b & 0x7f) << 7;
-
- if ((b = readByte()) >= 0) {
- return result | b << 14;
- }
- result |= (b & 0x7f) << 14;
-
- if ((b = readByte()) >= 0) {
- return result | b << 21;
- }
- result |= (b & 0x7f) << 21;
-
- b = readByte();
- if ((b & 0xf0) != 0) {
- throw new IllegalArgumentException("Greater than 32-bit varint at
position " + (pos - 1));
+ public static int utf8SizeInBytes(CharSequence string) {
+ int sizeInBytes = 0;
+ for (int i = 0, len = string.length(); i < len; i++) {
+ char ch = string.charAt(i);
+ if (ch < 0x80) {
+ sizeInBytes++; // 7-bit ASCII character
+ // This could be an ASCII run, or possibly entirely ASCII
+ while (i < len - 1) {
+ ch = string.charAt(i + 1);
+ if (ch >= 0x80) break;
+ i++;
+ sizeInBytes++; // another 7-bit ASCII character
+ }
+ } else if (ch < 0x800) {
+ sizeInBytes += 2; // 11-bit character
+ } else if (ch < 0xd800 || ch > 0xdfff) {
+ sizeInBytes += 3; // 16-bit character
+ } else {
+ int low = i + 1 < len ? string.charAt(i + 1) : 0;
+ if (ch > 0xdbff || low < 0xdc00 || low > 0xdfff) {
+ sizeInBytes++; // A malformed surrogate, which yields '?'.
+ } else {
+ // A 21-bit character
+ sizeInBytes += 4;
+ i++;
+ }
+ }
}
- return result | b << 28;
+ return sizeInBytes;
}
- long readVarint64() {
- byte b; // negative number implies MSB set
- if ((b = readByte()) >= 0) {
- return b;
- }
+ /**
+ * Binary search for character width which favors matching lower numbers.
+ *
+ * <p>Adapted from okio.Buffer
+ */
+ public static int asciiSizeInBytes(long v) {
+ if (v == 0) return 1;
+ if (v == Long.MIN_VALUE) return 20;
- long result = b & 0x7f;
- for (int i = 1; b < 0 && i < 10; i++) {
- b = readByte();
- if (i == 9 && (b & 0xf0) != 0) {
- throw new IllegalArgumentException("Greater than 64-bit varint at
position " + (pos - 1));
- }
- result |= (long) (b & 0x7f) << (i * 7);
+ boolean negative = false;
+ if (v < 0) {
+ v = -v; // making this positive allows us to compare using less-than
+ negative = true;
}
- return result;
+ int width =
+ v < 100000000L
+ ? v < 10000L
+ ? v < 100L ? v < 10L ? 1 : 2 : v < 1000L ? 3 : 4
+ : v < 1000000L ? v < 100000L ? 5 : 6 : v < 10000000L ? 7 : 8
+ : v < 1000000000000L
+ ? v < 10000000000L ? v < 1000000000L ? 9 : 10 : v < 100000000000L ?
11 : 12
+ : v < 1000000000000000L
+ ? v < 10000000000000L ? 13 : v < 100000000000000L ? 14 : 15
+ : v < 100000000000000000L
+ ? v < 10000000000000000L ? 16 : 17
+ : v < 1000000000000000000L ? 18 : 19;
+ return negative ? width + 1 : width; // conditionally add room for
negative sign
}
- public interface Writer<T> {
- int sizeInBytes(T value);
-
- void write(T value, UnsafeBuffer buffer);
+ /**
+ * A base 128 varint encodes 7 bits at a time, this checks how many bytes
are needed to represent
+ * the value.
+ *
+ * <p>See
https://developers.google.com/protocol-buffers/docs/encoding#varints
+ *
+ * <p>This logic is the same as {@code
com.squareup.wire.ProtoWriter.varint32Size} v2.3.0 which
+ * benchmarked faster than loop variants of the frequently copy/pasted
VarInt.varIntSize
+ */
+ public static int varintSizeInBytes(int value) {
+ if ((value & (0xffffffff << 7)) == 0) return 1;
+ if ((value & (0xffffffff << 14)) == 0) return 2;
+ if ((value & (0xffffffff << 21)) == 0) return 3;
+ if ((value & (0xffffffff << 28)) == 0) return 4;
+ return 5;
}
- void require(int byteCount) {
- if (pos + byteCount > buf.length) {
- throw new IllegalArgumentException(
- "Truncated: length " + byteCount + " > bytes remaining " +
remaining());
- }
+ /** Like {@link #varintSizeInBytes(int)}, except for uint64. */
+ public static int varintSizeInBytes(long v) {
Review comment:
another TODO!
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services