This is an automated email from the ASF dual-hosted git repository. adriancole pushed a commit to branch cheap-ipv4 in repository https://gitbox.apache.org/repos/asf/incubator-zipkin.git
commit d723c53d6500e97cc2b912efe8034823ec2352f6 Author: Adrian Cole <[email protected]> AuthorDate: Sat May 11 17:11:58 2019 +0800 Adds cheap string encoding for IpV4 path used in protobuf Commonly, we parse protobuf, but write json. This implies reading bytes and encoding IP literals. This improves performance of IpV4 address encoding and tries to pay for the extra bytecode by removing some. --- .../src/main/java/zipkin2/EndpointBenchmarks.java | 13 ++++- .../java/zipkin2/codec/ProtoCodecBenchmarks.java | 2 +- zipkin/src/main/java/zipkin2/Endpoint.java | 67 +++++++++++++++------- zipkin/src/main/java/zipkin2/internal/Buffer.java | 6 -- .../src/main/java/zipkin2/internal/JsonCodec.java | 11 +--- .../main/java/zipkin2/internal/ThriftCodec.java | 1 + zipkin/src/test/java/zipkin2/EndpointTest.java | 28 ++++++++- 7 files changed, 87 insertions(+), 41 deletions(-) diff --git a/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java b/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java index afdc9a1..690c033 100644 --- a/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java +++ b/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java @@ -39,10 +39,10 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @Measurement(iterations = 5, time = 1) @Warmup(iterations = 10, time = 1) @Fork(3) -@BenchmarkMode(Mode.AverageTime) +@BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.MICROSECONDS) @State(Scope.Thread) -@Threads(1) +@Threads(2) public class EndpointBenchmarks { static final String IPV4 = "43.0.192.2", IPV6 = "2001:db8::c001"; static final InetAddress IPV4_ADDR, IPV6_ADDR; @@ -66,6 +66,10 @@ public class EndpointBenchmarks { return builder.parseIp(IPV4_ADDR); } + @Benchmark public boolean parseIpv4_bytes() { + return builder.parseIp(IPV4_ADDR.getAddress()); + } + @Benchmark public boolean parseIpv6_literal() { return builder.parseIp(IPV6); } @@ -74,9 +78,14 @@ public class EndpointBenchmarks { return builder.parseIp(IPV6_ADDR); } + @Benchmark public boolean parseIpv6_bytes() { + return builder.parseIp(IPV6_ADDR.getAddress()); + } + // Convenience main entry-point public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() + .addProfiler("gc") .include(".*" + EndpointBenchmarks.class.getSimpleName()) .build(); diff --git a/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java b/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java index 85fb28c..a863158 100644 --- a/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java +++ b/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java @@ -103,7 +103,7 @@ public class ProtoCodecBenchmarks { // Convenience main entry-point public static void main(String[] args) throws Exception { Options opt = new OptionsBuilder() - .include(".*" + ProtoCodecBenchmarks.class.getSimpleName() + ".*bytes_zipkin") + .include(".*" + ProtoCodecBenchmarks.class.getSimpleName() + ".*bytes.*") .addProfiler("gc") .build(); diff --git a/zipkin/src/main/java/zipkin2/Endpoint.java b/zipkin/src/main/java/zipkin2/Endpoint.java index ae17473..af29260 100644 --- a/zipkin/src/main/java/zipkin2/Endpoint.java +++ b/zipkin/src/main/java/zipkin2/Endpoint.java @@ -177,18 +177,13 @@ public final class Endpoint implements Serializable { // for Spark and Flink job /** * Like {@link #parseIp(String)} except this accepts a byte array. * - * @param ipBytes byte array whose ownership is exclusively transfered to this endpoint. + * @param ipBytes byte array whose ownership is exclusively transferred to this endpoint. */ public final boolean parseIp(byte[] ipBytes) { if (ipBytes == null) return false; if (ipBytes.length == 4) { ipv4Bytes = ipBytes; - ipv4 = String.valueOf( - ipBytes[0] & 0xff) + '.' - + (ipBytes[1] & 0xff) + '.' - + (ipBytes[2] & 0xff) + '.' - + (ipBytes[3] & 0xff - ); + ipv4 = writeIpV4(ipBytes); } else if (ipBytes.length == 16) { if (!parseEmbeddedIPv4(ipBytes)) { ipv6 = writeIpV6(ipBytes); @@ -200,6 +195,33 @@ public final class Endpoint implements Serializable { // for Spark and Flink job return true; } + static String writeIpV4(byte[] ipBytes) { + char[] buf = ipBuffer(); + int pos = 0; + pos = writeBackwards(ipBytes[0] & 0xff, pos, buf); + buf[pos++] = '.'; + pos = writeBackwards(ipBytes[1] & 0xff, pos, buf); + buf[pos++] = '.'; + pos = writeBackwards(ipBytes[2] & 0xff, pos, buf); + buf[pos++] = '.'; + pos = writeBackwards(ipBytes[3] & 0xff, pos, buf); + return new String(buf, 0, pos); + } + + static int writeBackwards(int b, int pos, char[] buf) { + if (b < 10) { + buf[pos] = HEX_DIGITS[b]; + return pos + 1; + } + int i = pos += b < 100 ? 2 : 3; // We write backwards from right to left. + while (b != 0) { + int digit = b % 10; + buf[--i] = HEX_DIGITS[digit]; + b /= 10; + } + return pos; + } + /** Chaining variant of {@link #parseIp(String)} */ public Builder ip(@Nullable String ipString) { parseIp(ipString); @@ -340,19 +362,24 @@ public final class Endpoint implements Serializable { // for Spark and Flink job return IpFamily.Unknown; } - private static boolean notHex(char c) { + static boolean notHex(char c) { return (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F'); } - private static final ThreadLocal<char[]> IPV6_TO_STRING = new ThreadLocal<char[]>() { - @Override protected char[] initialValue() { - return new char[39]; // maximum length of encoded ipv6 + static final ThreadLocal<char[]> IP_BUFFER = new ThreadLocal<>(); + + static char[] ipBuffer() { + char[] idBuffer = IP_BUFFER.get(); + if (idBuffer == null) { + idBuffer = new char[39]; // maximum length of encoded ipv6 + IP_BUFFER.set(idBuffer); } - }; + return idBuffer; + } static String writeIpV6(byte[] ipv6) { int pos = 0; - char[] buf = IPV6_TO_STRING.get(); + char[] buf = ipBuffer(); // Compress the longest string of zeros int zeroCompressionIndex = -1; @@ -414,10 +441,10 @@ public final class Endpoint implements Serializable { // for Spark and Flink job {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; // Begin code from com.google.common.net.InetAddresses 23 - private static final int IPV6_PART_COUNT = 8; + static final int IPV6_PART_COUNT = 8; @Nullable - private static byte[] textToNumericFormatV6(String ipString) { + static byte[] textToNumericFormatV6(String ipString) { // An address can have [2..8] colons, and N colons make N+1 parts. String[] parts = ipString.split(":", IPV6_PART_COUNT + 2); if (parts.length < 3 || parts.length > IPV6_PART_COUNT + 1) { @@ -480,7 +507,7 @@ public final class Endpoint implements Serializable { // for Spark and Flink job return rawBytes.array(); } - private static short parseHextet(String ipPart) { + static short parseHextet(String ipPart) { // Note: we already verified that this string contains only hex digits. int hextet = Integer.parseInt(ipPart, 16); if (hextet > 0xffff) { @@ -491,7 +518,7 @@ public final class Endpoint implements Serializable { // for Spark and Flink job // End code from com.google.common.net.InetAddresses 23 // Begin code from io.netty.util.NetUtil 4.1 - private static boolean isValidIpV4Address(String ip, int from, int toExcluded) { + static boolean isValidIpV4Address(String ip, int from, int toExcluded) { int len = toExcluded - from; int i; return len <= 15 && len >= 7 && @@ -501,7 +528,7 @@ public final class Endpoint implements Serializable { // for Spark and Flink job isValidIpV4Word(ip, i + 1, toExcluded); } - private static boolean isValidIpV4Word(CharSequence word, int from, int toExclusive) { + static boolean isValidIpV4Word(CharSequence word, int from, int toExclusive) { int len = toExclusive - from; char c0, c1, c2; if (len < 1 || len > 3 || (c0 = word.charAt(from)) < '0') { @@ -516,7 +543,7 @@ public final class Endpoint implements Serializable { // for Spark and Flink job return c0 <= '9' && (len == 1 || isValidNumericChar(word.charAt(from + 1))); } - private static boolean isValidNumericChar(char c) { + static boolean isValidNumericChar(char c) { return c >= '0' && c <= '9'; } // End code from io.netty.util.NetUtil 4.1 @@ -584,7 +611,7 @@ public final class Endpoint implements Serializable { // for Spark and Flink job } // TODO: replace this with native proto3 encoding - private static final class SerializedForm implements Serializable { + static final class SerializedForm implements Serializable { static final long serialVersionUID = 0L; final String serviceName, ipv4, ipv6; diff --git a/zipkin/src/main/java/zipkin2/internal/Buffer.java b/zipkin/src/main/java/zipkin2/internal/Buffer.java index 925f5ae..c5d9b8b 100644 --- a/zipkin/src/main/java/zipkin2/internal/Buffer.java +++ b/zipkin/src/main/java/zipkin2/internal/Buffer.java @@ -135,12 +135,6 @@ public abstract class Buffer { data[pos + 1] = (byte) HEX_DIGITS[b & 0xf]; } - static void checkNotTruncated(int pos, int lastIndex) { - if (pos > lastIndex) { - throw new IllegalArgumentException("Truncated reading position " + pos); - } - } - public abstract void writeByte(int v); public abstract void write(byte[] v); diff --git a/zipkin/src/main/java/zipkin2/internal/JsonCodec.java b/zipkin/src/main/java/zipkin2/internal/JsonCodec.java index 4fe624a..cce9f9a 100644 --- a/zipkin/src/main/java/zipkin2/internal/JsonCodec.java +++ b/zipkin/src/main/java/zipkin2/internal/JsonCodec.java @@ -182,15 +182,6 @@ public final class JsonCodec { } } - final byte[] bytesWritten; - if (lengthWritten == bytes.length) { - bytesWritten = bytes; - } else { - bytesWritten = new byte[lengthWritten]; - System.arraycopy(bytes, 0, bytesWritten, 0, lengthWritten); - } - - String written = new String(bytesWritten, UTF_8); // Don't use value directly in the message, as its toString might be implemented using this // method. If that's the case, we'd stack overflow. Instead, emit what we've written so far. String message = @@ -200,7 +191,7 @@ public final class JsonCodec { value.getClass().getSimpleName(), lengthWritten, bytes.length, - written); + new String(bytes, 0, lengthWritten, UTF_8)); throw Platform.get().assertionError(message, e); } return b.toByteArrayUnsafe(); diff --git a/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java b/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java index 3b70f15..ef38c5d 100644 --- a/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java +++ b/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java @@ -199,6 +199,7 @@ public final class ThriftCodec { } static String readUtf8(ByteBuffer bytes) { + // TODO: optimize out the array copy here return new String(readByteArray(bytes), UTF_8); } diff --git a/zipkin/src/test/java/zipkin2/EndpointTest.java b/zipkin/src/test/java/zipkin2/EndpointTest.java index defab78..4f28074 100644 --- a/zipkin/src/test/java/zipkin2/EndpointTest.java +++ b/zipkin/src/test/java/zipkin2/EndpointTest.java @@ -56,8 +56,15 @@ public class EndpointTest { assertExpectedIpv4(endpoint); } - @Test - public void ip_string_ipv4() { + @Test public void ip_bytes_ipv4() throws Exception { + Endpoint.Builder newBuilder = Endpoint.newBuilder(); + assertThat(newBuilder.parseIp(Inet4Address.getByName("43.0.192.2").getAddress())).isTrue(); + Endpoint endpoint = newBuilder.build(); + + assertExpectedIpv4(endpoint); + } + + @Test public void ip_string_ipv4() { Endpoint.Builder newBuilder = Endpoint.newBuilder(); assertThat(newBuilder.parseIp("43.0.192.2")).isTrue(); Endpoint endpoint = newBuilder.build(); @@ -93,6 +100,23 @@ public class EndpointTest { .containsExactly(Inet6Address.getByName(ipv6).getAddress()); } + @Test public void parseIp_ipv6_bytes() throws Exception { + String ipv6 = "2001:db8::c001"; + + Endpoint.Builder newBuilder = Endpoint.newBuilder(); + assertThat(newBuilder.parseIp(Inet6Address.getByName(ipv6))).isTrue(); + Endpoint endpoint = newBuilder.build(); + + assertThat(endpoint.ipv4()) + .isNull(); + assertThat(endpoint.ipv4Bytes()) + .isNull(); + assertThat(endpoint.ipv6()) + .isEqualTo(ipv6); + assertThat(endpoint.ipv6Bytes()) + .containsExactly(Inet6Address.getByName(ipv6).getAddress()); + } + @Test public void ip_ipv6_mappedIpv4() { String ipv6 = "::FFFF:43.0.192.2"; Endpoint endpoint = Endpoint.newBuilder().ip(ipv6).build();
