commit d4452b38183e50d20c29bad6fff51bae58ebde0c
Author: Karsten Loesing <[email protected]>
Date: Sun Jun 2 15:56:50 2019 +0200
Add OnionPerf throughput graph.
We calculate throughput from the time between receiving 0.5 and 1 MiB
of a response, which obviously excludes any measurements with
responses smaller than 1 MiB. From the FILESIZE and DATAPERC* fields
we can compute the number of milliseconds that have elapsed between
receiving bytes 524,288 and 1,048,576, which is a total of 524,288
bytes or 4,194,304 bits. We divide the value 4,194,304 by this time
difference to obtain throughput in bits per millisecond which happens
to be the same value as the number of kilobits per second.
Implements #29772.
---
src/main/R/rserver/rserve-init.R | 40 +++++++++++++++++++
.../torproject/metrics/stats/onionperf/Main.java | 30 +++++++++++++++
src/main/resources/web.xml | 4 ++
src/main/resources/web/json/categories.json | 3 +-
src/main/resources/web/json/metrics.json | 12 ++++++
.../resources/web/jsps/reproducible-metrics.jsp | 9 ++++-
src/main/resources/web/jsps/stats.jsp | 26 +++++++++++++
src/main/sql/onionperf/init-onionperf.sql | 45 ++++++++++++++++++++++
src/submods/metrics-lib | 2 +-
9 files changed, 168 insertions(+), 3 deletions(-)
diff --git a/src/main/R/rserver/rserve-init.R b/src/main/R/rserver/rserve-init.R
index 88aa5b9..f74fd03 100644
--- a/src/main/R/rserver/rserve-init.R
+++ b/src/main/R/rserver/rserve-init.R
@@ -697,6 +697,46 @@ plot_onionperf_latencies <- function(start_p, end_p,
server_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
+prepare_onionperf_throughput <- function(start_p = NULL, end_p = NULL,
+ server_p = NULL) {
+ read_csv(file = paste(stats_dir, "onionperf-throughput.csv", sep = ""),
+ col_types = cols(
+ date = col_date(format = ""),
+ source = col_character(),
+ server = col_character(),
+ low = col_double(),
+ q1 = col_double(),
+ md = col_double(),
+ q3 = col_double(),
+ high = col_double())) %>%
+ filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ filter(if (!is.null(server_p)) server == server_p else TRUE)
+}
+
+plot_onionperf_throughput <- function(start_p, end_p, server_p, path_p) {
+ prepare_onionperf_throughput(start_p, end_p, server_p) %>%
+ complete(date = full_seq(date, period = 1), nesting(source)) %>%
+ ggplot(aes(x = date, ymin = q1 / 1000, ymax = q3 / 1000, fill = source)) +
+ geom_ribbon(alpha = 0.5) +
+ geom_line(aes(y = md / 1000, colour = source), size = 0.75) +
+ geom_line(aes(y = high / 1000, colour = source), size = 0.375) +
+ geom_line(aes(y = low / 1000, colour = source), size = 0.375) +
+ scale_x_date(name = "", breaks = custom_breaks,
+ labels = custom_labels, minor_breaks = custom_minor_breaks) +
+ scale_y_continuous(name = "", labels = unit_format(unit = "Mbps"),
+ limits = c(0, NA)) +
+ scale_fill_hue(name = "Source") +
+ scale_colour_hue(name = "Source") +
+ facet_grid(source ~ ., scales = "free", space = "free") +
+ ggtitle(paste("Throughput when downloading from", server_p, "server")) +
+ labs(caption = copyright_notice) +
+ theme(legend.position = "none",
+ strip.text.y = element_text(angle = 0, hjust = 0),
+ strip.background = element_rect(fill = NA))
+ ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
prepare_connbidirect <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "connbidirect2.csv", sep = ""),
col_types = cols(
diff --git a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
index a75cd1b..8fb762d 100644
--- a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
@@ -55,6 +55,9 @@ public class Main {
queryBuildTimes(connection));
writeStatistics(new File(baseDir, "stats/latencies.csv").toPath(),
queryLatencies(connection));
+ writeStatistics(
+ new File(baseDir, "stats/onionperf-throughput.csv").toPath(),
+ queryThroughput(connection));
disconnectFromDatabase(connection);
log.info("Terminated onionperf module.");
}
@@ -321,6 +324,33 @@ public class Main {
return statistics;
}
+ static List<String> queryThroughput(Connection connection)
+ throws SQLException {
+ log.info("Querying throughput statistics from database.");
+ List<String> statistics = new ArrayList<>();
+ statistics.add("date,source,server,low,q1,md,q3,high");
+ Statement st = connection.createStatement();
+ String queryString = "SELECT date, source, server, low, q1, md, q3, high "
+ + "FROM throughput_stats";
+ DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.US);
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ try (ResultSet rs = st.executeQuery(queryString)) {
+ while (rs.next()) {
+ statistics.add(String.format("%s,%s,%s,%d,%d,%d,%d,%d",
+ dateFormat.format(rs.getDate("date", calendar)),
+ getStringFromResultSet(rs, "source"),
+ rs.getString("server"),
+ rs.getInt("low"),
+ rs.getInt("q1"),
+ rs.getInt("md"),
+ rs.getInt("q3"),
+ rs.getInt("high")));
+ }
+ }
+ return statistics;
+ }
+
/** Retrieves the <code>String</code> value of the designated column in the
* current row of the given <code>ResultSet</code> object, or returns the
* empty string if the retrieved value was <code>NULL</code>. */
diff --git a/src/main/resources/web.xml b/src/main/resources/web.xml
index b643b89..045dd2e 100644
--- a/src/main/resources/web.xml
+++ b/src/main/resources/web.xml
@@ -43,6 +43,7 @@
<url-pattern>/torperf-failures.html</url-pattern>
<url-pattern>/onionperf-buildtimes.html</url-pattern>
<url-pattern>/onionperf-latencies.html</url-pattern>
+ <url-pattern>/onionperf-throughput.html</url-pattern>
<url-pattern>/connbidirect.html</url-pattern>
<url-pattern>/hidserv-dir-onions-seen.html</url-pattern>
<url-pattern>/hidserv-rend-relayed-cells.html</url-pattern>
@@ -135,6 +136,9 @@
<url-pattern>/onionperf-latencies.png</url-pattern>
<url-pattern>/onionperf-latencies.pdf</url-pattern>
<url-pattern>/onionperf-latencies.csv</url-pattern>
+ <url-pattern>/onionperf-throughput.png</url-pattern>
+ <url-pattern>/onionperf-throughput.pdf</url-pattern>
+ <url-pattern>/onionperf-throughput.csv</url-pattern>
<url-pattern>/connbidirect.png</url-pattern>
<url-pattern>/connbidirect.pdf</url-pattern>
<url-pattern>/connbidirect.csv</url-pattern>
diff --git a/src/main/resources/web/json/categories.json
b/src/main/resources/web/json/categories.json
index d19aeca..ad0df11 100644
--- a/src/main/resources/web/json/categories.json
+++ b/src/main/resources/web/json/categories.json
@@ -62,7 +62,8 @@
"torperf",
"torperf-failures",
"onionperf-buildtimes",
- "onionperf-latencies"
+ "onionperf-latencies",
+ "onionperf-throughput"
]
},
{
diff --git a/src/main/resources/web/json/metrics.json
b/src/main/resources/web/json/metrics.json
index 006de8b..bfcde22 100644
--- a/src/main/resources/web/json/metrics.json
+++ b/src/main/resources/web/json/metrics.json
@@ -309,6 +309,18 @@
]
},
{
+ "id": "onionperf-throughput",
+ "title": "Throughput",
+ "type": "Graph",
+ "description": "<p>This graph shows throughput when downloading static
files of different sizes over Tor, either from a server on the public internet
or from a version 2 onion server. Throughput is calculated from the time
between receiving 0.5 and 1 MiB of the response. The graph shows the median of
measurements as thick line, the range of measurements from first to third
quartile as ribbon, and the highest and lowest non-outlier measurements as thin
lines.</p>",
+ "function": "onionperf_throughput",
+ "parameters": [
+ "start",
+ "end",
+ "server"
+ ]
+ },
+ {
"id": "connbidirect",
"title": "Fraction of connections used uni-/bidirectionally",
"type": "Graph",
diff --git a/src/main/resources/web/jsps/reproducible-metrics.jsp
b/src/main/resources/web/jsps/reproducible-metrics.jsp
index a833b31..aee4d5c 100644
--- a/src/main/resources/web/jsps/reproducible-metrics.jsp
+++ b/src/main/resources/web/jsps/reproducible-metrics.jsp
@@ -619,6 +619,7 @@ Here we explain how we evaluate Torperf/OnionPerf
measurement to obtain the same
<li>Timeouts and failures of downloading files over Tor <a
href="/torperf-failures.html" class="btn btn-primary btn-xs"><i class="fa
fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Circuit build times <a href="/onionperf-buildtimes.html" class="btn
btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i>
graph</a></li>
<li>Circuit round-trip latencies <a href="/onionperf-latencies.html"
class="btn btn-primary btn-xs"><i class="fa fa-chevron-right"
aria-hidden="true"></i> graph</a></li>
+<li>Throughput <a href="/onionperf-throughput.html" class="btn btn-primary
btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
</ul>
<h4>Step 1: Parse OnionPerf and/or Torperf measurement results</h4>
@@ -636,17 +637,23 @@ Here we explain how we evaluate Torperf/OnionPerf
measurement to obtain the same
<li><code>DATACOMPLETE</code>: Download end time that is only set if the
request succeeded.</li>
<li><code>READBYTES</code>: Total number of bytes read, which indicates
whether this request succeeded (if ≥ <code>FILESIZE</code>) or failed.</li>
<li><code>DIDTIMEOUT</code>: 1 if the request timed out, 0 otherwise.</li>
+<li><code>DATAPERCx</code>: Time when x% of expected bytes were read for x = {
10, 20, 50, 100 }.</li>
<li><code>BUILDTIMES</code>: Comma-separated list of times when circuit hops
were built, which includes all circuits used for making measurement requests,
successful or not.</li>
<li><code>ENDPOINTREMOTE</code>: Hostname, IP address, and port that was used
to connect to the remote server; we use this to distinguish a request to a
public server (if <code>ENDPOINTREMOTE</code> is not present or does not
contain <code>".onion"</code> as substring) or to an onion server.</li>
</ul>
<h4>Step 2: Aggregate measurement results</h4>
-<p>Each of the measurement results parsed in the previous steps constitutes a
single measurement.
+<p>Each of the measurement results parsed in the previous step constitutes a
single measurement.
We're first interested in statistics on download times for the <a
href="/torperf.html">Time to download files over Tor</a> graph.
Therefore we consider only measurements with <code>DATACOMPLETE >
START</code>, for which we calculate the download time as: <code>DATACOMPLETE -
START</code>.
We then compute the 25th, 50th, and 75th percentile of download times by
sorting download times, determining the percentile rank, and using linear
interpolation between adjacent ranks.</p>
+<p>Next we're interested in the average throughput of measurements for the <a
href="/onionperf-throughput.html">Throughput</a> graph.
+We calculate throughput from the time between receiving 0.5 and 1 MiB of a
response, which obviously excludes any measurements with responses smaller than
1 MiB.
+From <code>DATAPERC50</code> and <code>DATAPERC100</code> (if <code>FILESIZE =
1048576</code>) or <code>DATAPERC10</code> and <code>DATAPERC20</code> (if
<code>FILESIZE = 5242880</code>) we can compute the number of milliseconds that
have elapsed between receiving bytes 524,288 and 1,048,576, which is a total of
524,288 bytes or 4,194,304 bits.
+We divide the value 4,194,304 by this time difference to obtain throughput in
bits per millisecond which happens to be the same value as the number of
kilobits per second.</p>
+
<p>We're also interested in circuit round-trip latencies for the <a
href="/onionperf-latencies.html">Circuit round-trip latencies</a> graph.
We measure circuit latency as the time between sending the HTTP request and
receiving the HTTP response header.
We calculate latencies as <code>DATARESPONSE - DATAREQUEST</code> for
measurements with non-zero values for both timestamps.
diff --git a/src/main/resources/web/jsps/stats.jsp
b/src/main/resources/web/jsps/stats.jsp
index 443c292..68c4114 100644
--- a/src/main/resources/web/jsps/stats.jsp
+++ b/src/main/resources/web/jsps/stats.jsp
@@ -570,6 +570,32 @@ Performance <a href="#performance" name="performance"
class="anchor">#</a></h2>
<li><b>high:</b> Highest latency within 1.5 IQR of upper quartile (upper
whisker in a boxplot) of time in milliseconds between sending the HTTP request
and receiving the HTTP response header.</li>
</ul>
+<h3>Throughput
+<a href="/onionperf-throughput.html" class="btn btn-primary btn-xs"><i
class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
+<a href="/onionperf-throughput.csv" class="btn btn-primary btn-xs"><i
class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
+<a href="#onionperf-throughput" name="onionperf-throughput"
class="anchor">#</a></h3>
+
+<h4>Parameters</h4>
+
+<ul>
+<li><b>start:</b> First UTC date (YYYY-MM-DD) to include in the file.</li>
+<li><b>end:</b> Last UTC date (YYYY-MM-DD) to include in the file.</li>
+<li><b>server:</b> Either <b>"public"</b> for requests to a server on the
public internet, or <b>"onion"</b> for requests to a version 2 onion
server.</li>
+</ul>
+
+<h4>Columns</h4>
+
+<ul>
+<li><b>date:</b> UTC date (YYYY-MM-DD) when download performance was
measured.</li>
+<li><b>source:</b> Name of the OnionPerf or Torperf service performing
measurements.</li>
+<li><b>server:</b> Either <b>"public"</b> if the request was made to a server
on the public internet, or <b>"onion"</b> if the request was made to a version
2 onion server.</li>
+<li><b>low:</b> Lowest measured throughput within 1.5 IQR of lower quartile
(lower whisker in a boxplot) in kilobits per second.</li>
+<li><b>q1:</b> First quartile of measured throughput in kilobits per
second.</li>
+<li><b>md:</b> Median of measured throughput in kilobits per second.</li>
+<li><b>q3:</b> Third quartile of measured throughput in kilobits per
second.</li>
+<li><b>high:</b> Highest measured throughput within 1.5 IQR of upper quartile
(upper whisker in a boxplot) in kilobits per second.</li>
+</ul>
+
</div>
<div class="container">
diff --git a/src/main/sql/onionperf/init-onionperf.sql
b/src/main/sql/onionperf/init-onionperf.sql
index 536cd45..b7c41d6 100644
--- a/src/main/sql/onionperf/init-onionperf.sql
+++ b/src/main/sql/onionperf/init-onionperf.sql
@@ -164,3 +164,48 @@ FROM filtered_measurements NATURAL JOIN quartiles
GROUP BY 1, 2, 3
ORDER BY date, source, server;
+-- Explanation of the number 4194304 below for computing kbps: From the
FILESIZE
+-- and DATAPERC* fields we can compute the number of milliseconds that have
+-- elapsed between receiving bytes 524,288 and 1,048,576, which is a total
+-- amount of 524,288 bytes or 4,194,304 bits. If we divide that value by
+-- 4,194,304 we obtain the number of milliseconds that have elapsed for
+-- downloading 1 bit, which happens to be the same value as the number of
+-- seconds for downloading 1 kilobit. We want the reciprocal of that value
which
+-- has the unit kilobits per second.
+CREATE OR REPLACE VIEW throughput_stats AS
+WITH filtered_measurements AS (
+ SELECT DATE(start) AS date,
+ source,
+ CASE WHEN endpointremote LIKE '%.onion:%' THEN 'onion'
+ ELSE 'public' END AS server,
+ CASE WHEN filesize = 1048576 AND dataperc100 > dataperc50
+ THEN 4194304 / (dataperc100 - dataperc50)
+ WHEN filesize = 5242880 AND dataperc20 > dataperc10
+ THEN 4194304 / (dataperc20 - dataperc10)
+ ELSE NULL END AS kbps
+ FROM measurements
+ WHERE DATE(start) < current_date - 1
+ AND endpointremote NOT SIMILAR TO '_{56}.onion%'
+), quartiles AS (
+ SELECT date,
+ source,
+ server,
+ PERCENTILE_CONT(ARRAY[0.25,0.5,0.75])
+ WITHIN GROUP(ORDER BY kbps) AS q
+ FROM filtered_measurements
+ GROUP BY date, source, server
+)
+SELECT date,
+ source,
+ server,
+ MIN(CASE WHEN kbps >= q[1] - ((q[3] - q[1]) * 1.5)
+ THEN kbps ELSE NULL END) AS low,
+ TRUNC(AVG(q[1])) AS q1,
+ TRUNC(AVG(q[2])) AS md,
+ TRUNC(AVG(q[3])) AS q3,
+ MAX(CASE WHEN kbps <= q[3] + ((q[3] - q[1]) * 1.5)
+ THEN kbps ELSE NULL END) AS high
+FROM filtered_measurements NATURAL JOIN quartiles
+GROUP BY date, source, server
+ORDER BY date, source, server;
+
diff --git a/src/submods/metrics-lib b/src/submods/metrics-lib
index 3693e10..e723c06 160000
--- a/src/submods/metrics-lib
+++ b/src/submods/metrics-lib
@@ -1 +1 @@
-Subproject commit 3693e107a3aff7473200ece3ba3889dc9462c7b3
+Subproject commit e723c065b764ecfbb3bb96d4c491e67398b7f21b
_______________________________________________
tor-commits mailing list
[email protected]
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits