commit 0e6936003d0b9b4cc8d9388ea8a56eadcfdce1f9
Author: Karsten Loesing <karsten.loes...@gmx.net>
Date:   Thu Aug 9 16:08:17 2018 +0200

    Make suggested changes to per-graph CSV files.
    
    Implements #26998.
---
 src/main/R/clients/split-clients.R    |   6 +-
 src/main/R/rserver/graphs.R           |  70 ++++++----------
 src/main/resources/web/jsps/stats.jsp | 153 ++++++++--------------------------
 3 files changed, 64 insertions(+), 165 deletions(-)

diff --git a/src/main/R/clients/split-clients.R 
b/src/main/R/clients/split-clients.R
index 50b03d4..9f80902 100644
--- a/src/main/R/clients/split-clients.R
+++ b/src/main/R/clients/split-clients.R
@@ -1,12 +1,12 @@
 dir.create("RData", showWarnings = FALSE)
 
 c <- read.csv("clients.csv", stringsAsFactors = FALSE)
-data <- c[c$node == 'relay', !(names(c) %in% c("node", "frac"))]
+data <- c[c$node == 'relay', !(names(c) %in% c("node"))]
 save(data, file = "RData/clients-relay.RData")
-data <- c[c$node == 'bridge', !(names(c) %in% c("node", "frac"))]
+data <- c[c$node == 'bridge', !(names(c) %in% c("node"))]
 save(data, file = "RData/clients-bridge.RData")
 
 u <- read.csv("userstats-combined.csv", stringsAsFactors = FALSE)
-data <- u[, !(names(u) %in% c("node", "version", "frac"))]
+data <- u[, !(names(u) %in% c("node", "version"))]
 save(data, file = "RData/userstats-bridge-combined.RData")
 
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index 12a80e9..ab37a32 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -423,7 +423,6 @@ plot_versions <- function(start_p, end_p, path_p) {
 
 write_versions <- function(start_p = NULL, end_p = NULL, path_p) {
   prepare_versions(start_p, end_p) %>%
-    spread(key = "version", value = "relays", fill = 0) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -459,6 +458,7 @@ plot_platforms <- function(start_p, end_p, path_p) {
 
 write_platforms <- function(start_p = NULL, end_p = NULL, path_p) {
   prepare_platforms(start_p, end_p) %>%
+    mutate(platform = tolower(platform)) %>%
     spread(platform, relays) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -607,8 +607,6 @@ plot_relayflags <- function(start_p, end_p, flag_p, path_p) 
{
 write_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL,
     path_p) {
   prepare_relayflags(start_p, end_p, flag_p) %>%
-    mutate(flag = tolower(flag)) %>%
-    spread(flag, relays) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -832,9 +830,6 @@ plot_connbidirect <- function(start_p, end_p, path_p) {
 write_connbidirect <- function(start_p = NULL, end_p = NULL, path_p) {
   prepare_connbidirect(start_p, end_p) %>%
     rename(q1 = X0.25, md = X0.5, q3 = X0.75) %>%
-    gather(variable, value, -(date:direction)) %>%
-    unite(temp, direction, variable) %>%
-    spread(temp, value) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -900,7 +895,8 @@ plot_userstats <- function(start_p, end_p, node_p, 
variable_p, value_p,
     events_p, path_p) {
   load(paste(rdata_dir, "clients-", node_p, ".RData", sep = ""))
   c <- data
-  u <- c[c$date >= start_p & c$date <= end_p, ]
+  u <- c[c$date >= start_p & c$date <= end_p, c("date", "country", "transport",
+      "version", "lower", "upper", "clients")]
   u <- rbind(u, data.frame(date = start_p,
       country = ifelse(variable_p == "country" & value_p != "all", value_p, 
""),
       transport = ifelse(variable_p == "transport", value_p, ""),
@@ -1053,8 +1049,7 @@ write_userstats_relay_country <- function(start_p = NULL, 
end_p = NULL,
       country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
     filter(transport == "") %>%
     filter(version == "") %>%
-    mutate(downturns = clients < lower, upturns = clients > upper) %>%
-    select(date, country, clients, downturns, upturns, lower, upper) %>%
+    select(date, country, clients, lower, upper, frac) %>%
     rename(users = clients) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -1069,7 +1064,7 @@ write_userstats_bridge_country <- function(start_p = 
NULL, end_p = NULL,
       country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
     filter(transport == "") %>%
     filter(version == "") %>%
-    select(date, country, clients) %>%
+    select(date, country, clients, frac) %>%
     rename(users = clients) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -1083,24 +1078,21 @@ write_userstats_bridge_transport <- function(start_p = 
NULL, end_p = NULL,
     filter(country == "") %>%
     filter(version == "") %>%
     filter(transport != "") %>%
-    select(date, transport, clients)
+    select(date, transport, clients, frac)
   if (is.null(transport_p) || "!<OR>" %in% transport_p) {
     n <- u %>%
       filter(transport != "<OR>") %>%
-      group_by(date) %>%
+      group_by(date, frac) %>%
       summarize(clients = sum(clients))
     u <- rbind(u, data.frame(date = n$date, transport = "!<OR>",
-                             clients = n$clients))
+                             clients = n$clients, frac = n$frac))
   }
   u %>%
     filter(if (!is.null(transport_p)) transport %in% transport_p else TRUE) %>%
-    mutate(transport = ifelse(transport == "<OR>", "default_or_protocol",
-      ifelse(transport == "!<OR>", "any_pt",
-      ifelse(transport == "<??>", "unknown_pluggable_transports",
-      transport)))) %>%
     group_by(date, transport) %>%
-    select(date, transport, clients) %>%
-    spread(transport, clients) %>%
+    select(date, transport, clients, frac) %>%
+    rename(users = clients) %>%
+    arrange(date, transport) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -1113,7 +1105,7 @@ write_userstats_bridge_version <- function(start_p = 
NULL, end_p = NULL,
     filter(country == "") %>%
     filter(transport == "") %>%
     filter(if (!is.null(version_p)) version == version_p else TRUE) %>%
-    select(date, version, clients) %>%
+    select(date, version, clients, frac) %>%
     rename(users = clients) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -1159,12 +1151,8 @@ write_userstats_bridge_combined <- function(start_p = 
NULL, end_p = NULL,
     write_userstats_bridge_country(start_p, end_p, country_p, path_p)
   } else {
     prepare_userstats_bridge_combined(start_p, end_p, country_p) %>%
-      select(date, country, transport, low, high) %>%
-      mutate(transport = ifelse(transport == "<OR>", "default_or_protocol",
-        ifelse(transport == "<??>", "unknown_transport", transport))) %>%
-      gather(variable, value, -(date:transport)) %>%
-      unite(temp, transport, variable) %>%
-      spread(temp, value) %>%
+      select(date, country, transport, low, high, frac) %>%
+      arrange(date, country, transport) %>%
       write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
   }
 }
@@ -1201,8 +1189,8 @@ plot_advbwdist_perc <- function(start_p, end_p, p_p, 
path_p) {
 write_advbwdist_perc <- function(start_p = NULL, end_p = NULL, p_p = NULL,
     path_p) {
   prepare_advbwdist_perc(start_p, end_p, p_p) %>%
-    unite(temp, variable, percentile) %>%
-    spread(temp, advbw) %>%
+    spread(variable, advbw) %>%
+    rename(p = percentile) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -1238,8 +1226,8 @@ plot_advbwdist_relay <- function(start_p, end_p, n_p, 
path_p) {
 write_advbwdist_relay <- function(start_p = NULL, end_p = NULL, n_p = NULL,
     path_p) {
   prepare_advbwdist_relay(start_p, end_p, n_p) %>%
-    unite(temp, variable, relay) %>%
-    spread(temp, advbw) %>%
+    spread(variable, advbw) %>%
+    rename(n = relay) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -1249,7 +1237,7 @@ prepare_hidserv_dir_onions_seen <- function(start_p, 
end_p) {
     filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
     filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
     filter(type == "dir-onions-seen") %>%
-    transmute(date = date, onions = ifelse(frac >= 0.01, wiqm, NA))
+    transmute(date, onions = ifelse(frac >= 0.01, wiqm, NA), frac)
 }
 
 plot_hidserv_dir_onions_seen <- function(start_p, end_p, path_p) {
@@ -1277,7 +1265,7 @@ prepare_hidserv_rend_relayed_cells <- function(start_p, 
end_p) {
     filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
     filter(type == "rend-relayed-cells") %>%
     transmute(date,
-      relayed = ifelse(frac >= 0.01, wiqm * 8 * 512 / (86400 * 1e9), NA))
+      relayed = ifelse(frac >= 0.01, wiqm * 8 * 512 / (86400 * 1e9), NA), frac)
 }
 
 plot_hidserv_rend_relayed_cells <- function(start_p, end_p, path_p) {
@@ -1440,22 +1428,14 @@ plot_webstats_tb_locale <- function(start_p, end_p, 
path_p) {
 # plot_webstats_tb_locale needs the preliminary data frame e for its
 # breaks and labels. Left as future work.
 write_webstats_tb_locale <- function(start_p = NULL, end_p = NULL, path_p) {
-  d <- read.csv(paste(stats_dir, "webstats.csv", sep = ""),
-    colClasses = c("log_date" = "Date", "locale" = "character"))
-  d <- d %>%
+  read.csv(paste(stats_dir, "webstats.csv", sep = ""),
+    colClasses = c("log_date" = "Date", "locale" = "character")) %>%
     filter(if (!is.null(start_p)) log_date >= as.Date(start_p) else TRUE) %>%
     filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
-    filter(request_type == "tbid")
-  e <- d
-  e <- aggregate(list(count = e$count), by = list(locale = e$locale), FUN = 
sum)
-  e <- e[order(e$count, decreasing = TRUE), ]
-  e <- e[1:5, ]
-  d <- aggregate(list(count = d$count), by = list(log_date = d$log_date,
-    locale = ifelse(d$locale %in% e$locale, d$locale, "other")), FUN = sum)
-  d %>%
-    mutate(locale = tolower(locale)) %>%
+    filter(request_type == "tbid") %>%
+    group_by(log_date, locale) %>%
+    summarize(initial_downloads = sum(count)) %>%
     rename(date = log_date) %>%
-    spread(locale, count) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
diff --git a/src/main/resources/web/jsps/stats.jsp 
b/src/main/resources/web/jsps/stats.jsp
index 1c18921..719176c 100644
--- a/src/main/resources/web/jsps/stats.jsp
+++ b/src/main/resources/web/jsps/stats.jsp
@@ -45,7 +45,7 @@ https://metrics.torproject.org/identifier.csv
 <li><b>February 28, 2018:</b> Added per-graph CSV files to eventually replace 
pre-aggregated CSV files.</li>
 <li><b>May 29, 2018:</b> Made all parameters of per-graph CSV files optional 
to support providing both pre-filtered and complete data sets.</li>
 <li><b>July 31, 2018:</b> Announced pending changes to per-graph CSV files to 
become effective on August 15 and pre-aggregated CSV files to be removed by 
September 15.</li>
-<li><b>August 15, 2018 (scheduled):</b> Make the first batch of changes to 
per-graph CSV files (marked as "Suggested change" below).</li>
+<li><b>August 15, 2018:</b> Made the first batch of changes to per-graph CSV 
files.</li>
 <li><b>September 15, 2018 (scheduled):</b> Remove all pre-aggregated CSV 
files.</li>
 </ul>
 
@@ -75,22 +75,11 @@ Users <a href="#users" name="users" 
class="anchor">#</a></h2>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are 
estimated.</li>
 <li><b>country:</b> Two-letter lower-case country code as found in a GeoIP 
database by resolving clients' IP addresses, or <b>"??"</b> if client IP 
addresses could not be resolved. If this column contains the empty string, all 
clients are included, regardless of their country code.</li>
 <li><b>users:</b> Estimated number of clients.</li>
-<li><b>downturns:</b> Whether the estimated number of clients is below the 
lower number of expected clients, indicating a possible censorship-related 
event. If this column contains the empty string, there are no expectations on 
the number of clients.</li>
-<li><b>upturns:</b> Whether the estimated number of clients is above the upper 
number of expected clients, indicating a possible censorship-related event. If 
this column contains the empty string, there are no expectations on the number 
of clients.</li>
-<li><b>lower:</b> Lower number of expected clients under the assumption that 
there has been no censorship event. If this column contains the empty string, 
there are no expectations on the number of clients.</li>
-<li><b>upper:</b> Upper number of expected clients under the assumption that 
there has been no release of censorship. If this column contains the empty 
string, there are no expectations on the number of clients.</li>
+<li><b>lower:</b> Lower number of expected clients under the assumption that 
there has been no censorship event. If <b>users &lt; lower</b>, a 
censorship-related event might have happened in this country on the given day. 
If this column contains the empty string, there are no expectations on the 
number of clients.</li>
+<li><b>upper:</b> Upper number of expected clients under the assumption that 
there has been no release of censorship. If <b>users &gt; upper</b>, a 
censorship-related event might have happened in this country on the given day. 
If this column contains the empty string, there are no expectations on the 
number of clients.</li>
+<li><b>frac:</b> Fraction of relays in percent that the estimate is based 
on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Remove the <b>downturns</b> and <b>upturns</b> columns which are trivial to 
compute as <b>users &lt; lower</b> and <b>users &gt; upper</b>, and which don't 
necessarily make the CSV file easier to handle. There could even be a gentle 
hint on computing the dots in the graph from two columns.</p>
-</div>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of relays (as value between 0 and 1) that 
the estimate is based on.") which might be relevant for pro users. Related to 
the discussion on <a href="https://bugs.torproject.org/26950";>#26950</a>.</p>
-</div>
-
 <h3>Bridge users by country
 <a href="/userstats-bridge-country.html" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-country.csv" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -110,13 +99,9 @@ Users <a href="#users" name="users" 
class="anchor">#</a></h2>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are 
estimated.</li>
 <li><b>country:</b> Two-letter lower-case country code as found in a GeoIP 
database by resolving clients' IP addresses, or <b>"??"</b> if client IP 
addresses could not be resolved. If this column contains the empty string, all 
clients are included, regardless of their country code.</li>
 <li><b>users:</b> Estimated number of clients.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based 
on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) 
that the estimate is based on.") which might be relevant for pro users. Related 
to the discussion on <a href="https://bugs.torproject.org/26950";>#26950</a>.</p>
-</div>
-
 <h3>Bridge users by transport
 <a href="/userstats-bridge-transport.html" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-transport.csv" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -134,19 +119,11 @@ Users <a href="#users" name="users" 
class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are 
estimated.</li>
-<li><b>$transport:</b> One or more columns with the estimated number of 
clients using transport with lower-case name <b>$transport</b> to connect to 
the Tor network using bridges. Examples for transport names are <b>"obfs4"</b>, 
<b>"websocket"</b> for Flash proxy/websocket, <b>"fte"</b> for FTE, 
<b>"any_pt"</b> for any pluggable transport, 
<b>"unknown_pluggable_transports"</b> for unknown pluggable transport(s), or 
<b>"default_or_protocol"</b> for the default OR protocol.</li>
+<li><b>transport:</b> Transport name used by clients to connect to the Tor 
network using bridges. Examples are <b>"obfs4"</b>, <b>"websocket"</b> for 
Flash proxy/websocket, <b>"fte"</b> for FTE, <b>"!&lt;OR&gt;"</b> for any 
pluggable transport, <b>"&lt;??&gt;"</b> for unknown pluggable transport(s), or 
<b>"&lt;OR&gt;"</b> for the default OR protocol.</li>
+<li><b>users:</b> Estimated number of clients.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based 
on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$transport</b> by a <b>transport</b> column for the transport 
name (written as non-percent-encoded !&lt;OR&gt;, &lt;??&gt;, and &lt;OR&gt; 
for consistency with the <b>transport</b> parameter) and a <b>users</b> column 
for the estimated number of clients, similar to the bridge users by country 
graph.</p>
-</div>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) 
that the estimate is based on.") which might be relevant for pro users. Related 
to the discussion on <a href="https://bugs.torproject.org/26950";>#26950</a>.</p>
-</div>
-
 <h3>Bridge users by country and transport
 <a href="/userstats-bridge-combined.html" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-combined.csv" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -165,20 +142,12 @@ Users <a href="#users" name="users" 
class="anchor">#</a></h2>
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are 
estimated.</li>
 <li><b>country:</b> Two-letter lower-case country code as found in a GeoIP 
database by resolving clients' IP addresses, or <b>"??"</b> if client IP 
addresses could not be resolved.</li>
-<li><b>$transport_high:</b> Upper bound of estimated users from the given 
country and transport. Transport names are written in lower case, and the 
default OR protocol is written as <b>default_or_protocol</b>.</li>
-<li><b>$transport_low:</b> Lower bound of estimated users from the given 
country and transport. Transport names are written in lower case, and the 
default OR protocol is written as <b>default_or_protocol</b>.</li>
+<li><b>transport:</b> Transport name used by clients to connect to the Tor 
network using bridges. Examples are <b>"obfs4"</b>, <b>"websocket"</b> for 
Flash proxy/websocket, <b>"fte"</b> for FTE, <b>"&lt;??&gt;"</b> for unknown 
pluggable transport(s), or <b>"&lt;OR&gt;"</b> for the default OR protocol.</li>
+<li><b>high:</b> Upper bound of estimated users from the given country and 
transport.</li>
+<li><b>low:</b> Lower bound of estimated users from the given country and 
transport.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based 
on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$transport_high</b> and <b>$transport_low</b> by a 
<b>transport</b> column for the transport name (written as non-percent-encoded 
&lt;OR&gt; for consistency with the previous graph) and a <b>high</b> and a 
<b>low</b> column for the upper and lower bound.</p>
-</div>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) 
that the estimate is based on.") which might be relevant for pro users. Related 
to the discussion on <a href="https://bugs.torproject.org/26950";>#26950</a>.</p>
-</div>
-
 <h3>Bridge users by IP version
 <a href="/userstats-bridge-version.html" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-version.csv" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -199,15 +168,9 @@ using bridges, which can be either <b>"v4"</b> or 
<b>"v6"</b>.</li>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are 
estimated.</li>
 <li><b>version:</b> IP version used by clients to connect to the Tor network 
using bridges, which can be either <b>"v4"</b> or <b>"v6"</b>. If this column 
contains the empty string, all clients are included, regardless of their IP 
version.</li>
 <li><b>users:</b> Estimated number of clients.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based 
on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) 
that the estimate is based on.") which might be relevant for pro users. Related 
to the discussion on <a href="https://bugs.torproject.org/26950";>#26950</a>.</p>
-</div>
-
-</div>
-
 <div class="container">
 <h2><i class="fa fa-server fa-fw" aria-hidden="true"></i>
 Servers <a href="#servers" name="servers" class="anchor">#</a></h2>
@@ -249,14 +212,10 @@ Servers <a href="#servers" name="servers" 
class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as 
running.</li>
-<li><b>$flag:</b> Average number of relays with the given relay flag in lower 
case, which can be <b>"exit"</b>, <b>"fast"</b>, <b>"guard"</b>, 
<b>"hsdir"</b>, <b>"fast"</b>, <b>"running"</b>, and <b>"stable"</b>.</li>
+<li><b>flag:</b> Relay flag, which can be <b>"Exit"</b>, <b>"Fast"</b>, 
<b>"Guard"</b>, <b>"HSDir"</b>, <b>"Fast"</b>, <b>"Running"</b>, or 
<b>"Stable"</b>.</li>
+<li><b>relays:</b> Average number of relays.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$flag</b> columns by a <b>flag</b> and a <b>relays</b> column, 
and include the relay flag name in their original capitalization, rather than 
lower-cased.</p>
-</div>
-
 <h3>Relays by tor version
 <a href="/versions.html" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/versions.csv" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -273,14 +232,10 @@ Servers <a href="#servers" name="servers" 
class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as 
running.</li>
-<li><b>$version:</b> Average number of relays with the given first three 
dotted numbers of the Tor software version as reported by the relay. An example 
is <b>"0.3.4"</b>.
+<li><b>version:</b> First three dotted numbers of the Tor software version as 
reported by the relay. An example is <b>"0.3.4"</b>.</li>
+<li><b>relays:</b> Average number of relays.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$version</b> columns by a <b>version</b> and a <b>relays</b> 
column.</p>
-</div>
-
 <h3>Relays by platform
 <a href="/platforms.html" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/platforms.csv" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -297,18 +252,13 @@ Servers <a href="#servers" name="servers" 
class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as 
running.</li>
-<li><b>BSD:</b> Average number of relays on *BSD.</li>
-<li><b>Linux:</b> Average number of relays on Linux.</li>
-<li><b>Other:</b> Average number of relays on another platform than Linux, 
*BSD, Windows, or macOS.</li>
-<li><b>Windows:</b> Average number of relays on Windows.</li>
-<li><b>macOS:</b> Average number of relays on macOS.</li>
+<li><b>bsd:</b> Average number of relays on *BSD.</li>
+<li><b>linux:</b> Average number of relays on Linux.</li>
+<li><b>macos:</b> Average number of relays on macOS.</li>
+<li><b>other:</b> Average number of relays on another platform than Linux, 
*BSD, Windows, or macOS.</li>
+<li><b>windows:</b> Average number of relays on Windows.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Change platform-specific columns to be all lower-case as a good practice to 
only use lower-cased column names everywhere.</p>
-</div>
-
 <h3>Relays by IP version
 <a href="/relays-ipv6.html" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/relays-ipv6.csv" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -440,14 +390,9 @@ Traffic <a href="#traffic" name="traffic" 
class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) that relays reported bandwidth data 
for.</li>
-<li><b>all_$p:</b> Advertised bandwidth in Gbit/s of the p-th percentile of 
all relays.</li>
-<li><b>exits_$p:</b> Advertised bandwidth in Gbit/s of the p-th percentile of 
relays with the <b>"Exit"</b> relay flag.</li>
-</ul>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>all_p$</b> and <b>exits_$p</b> columns by three columns 
<b>p</b>, <b>all</b>, and <b>exit</b>.</p>
-</div>
+<li><b>p:</b> Percentile as value between 0 and 100.</li>
+<li><b>all:</b> Advertised bandwidth in Gbit/s of the p-th percentile of all 
relays.</li>
+<li><b>exits:</b> Advertised bandwidth in Gbit/s of the p-th percentile of 
relays with the <b>"Exit"</b> relay flag.</li>
 
 <h3>Advertised bandwidth of n-th fastest relays
 <a href="/advbwdist-relay.html" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> graph</a>
@@ -466,15 +411,11 @@ Traffic <a href="#traffic" name="traffic" 
class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) that relays reported bandwidth data 
for.</li>
-<li><b>all_$n:</b> Advertised bandwidth in Gbit/s of n-th fastest relay.</li>
-<li><b>exits_$n:</b> Advertised bandwidth in Gbit/s of n-th fastest relay with 
the <b>"Exit"</b> relay flag.</li>
+<li><b>n:</b> Position of the relay in an ordered list of all advertised 
bandwidths, starting at 1 for the fastest relay in the network.</li>
+<li><b>all:</b> Advertised bandwidth in Gbit/s of n-th fastest relay.</li>
+<li><b>exits:</b> Advertised bandwidth in Gbit/s of n-th fastest relay with 
the <b>"Exit"</b> relay flag.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>all_n$</b> and <b>exits_$n</b> columns by three columns 
<b>n</b>, <b>all</b>, and <b>exit</b>.</p>
-</div>
-
 <h3>Consumed bandwidth by Exit/Guard flag combination
 <a href="/bwhist-flags.html" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/bwhist-flags.csv" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -533,22 +474,12 @@ Traffic <a href="#traffic" name="traffic" 
class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which statistics on 
uni-/bidirectional connection usage were reported.</li>
-<li><b>both_md:</b> Median of fraction of connections classified as both 
reading and writing.</li>
-<li><b>both_q1:</b> First quartile of fraction of connections classified as 
both reading and writing.</li>
-<li><b>both_q3:</b> Third quartile of fraction of connections classified as 
both reading and writing.</li>
-<li><b>read_md:</b> Median of fraction of connections classified as mostly 
reading.</li>
-<li><b>read_q1:</b> First quartile of fraction of connections classified as 
mostly reading.</li>
-<li><b>read_q3:</b> Third quartile of fraction of connections classified as 
mostly reading.</li>
-<li><b>write_md:</b> Median of fraction of connections classified as mostly 
writing.</li>
-<li><b>write_q1:</b> First quartile of fraction of connections classified as 
mostly writing.</li>
-<li><b>write_q3:</b> Third quartile of fraction of connections classified as 
mostly writing.</li>
+<li><b>direction:</b> Direction of reported fraction, which can be 
<b>"read"</b>, <b>"write"</b>, or <b>"both"</b> for connections classified as 
"mostly reading", "mostly writing", or "both reading and writing". Connections 
below the threshold have been removed from this statistics file entirely.</li>
+<li><b>q1:</b> First quartile of fraction of connections.</li>
+<li><b>md:</b> Median of fraction of connections.</li>
+<li><b>q3:</b> Third quartile of fraction of connections.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace columns except <b>date</b> by four columns <b>direction</b>, 
<b>q1</b>, <b>md</b>, and <b>q3</b>.</p>
-</div>
-
 </div>
 
 <div class="container">
@@ -679,13 +610,9 @@ Onion Services <a href="#onion-services" name="servers" 
class="anchor">#</a></h2
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as 
running.</li>
 <li><b>onions:</b> Estimated number of unique .onion addresses observed by 
onion-service directories.</li>
+<li><b>frac:</b> Total network fraction of statistics reported by 
onion-service directories.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column as suggested on <a 
href="https://bugs.torproject.org/26950";>#26950</a>.</p>
-</div>
-
 <h3>Onion-service traffic (versions 2 and 3)
 <a href="/hidserv-rend-relayed-cells.html" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/hidserv-rend-relayed-cells.csv" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -703,13 +630,9 @@ Onion Services <a href="#onion-services" name="servers" 
class="anchor">#</a></h2
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as 
running.</li>
 <li><b>relayed:</b> Estimated bandwidth in Gbit/s relayed on rendezvous 
circuits as observed by rendezvous points.</li>
+<li><b>frac:</b> Total network fraction of statistics reported by rendezvous 
points.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column as suggested on <a 
href="https://bugs.torproject.org/26950";>#26950</a>.</p>
-</div>
-
 <h3>Fraction of relays reporting onion-service statistics
 <a href="/hidserv-frac-reporting.html" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/hidserv-frac-reporting.csv" class="btn btn-primary btn-xs"><i 
class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -800,14 +723,10 @@ Applications <a href="#applications" name="applications" 
class="anchor">#</a></h
 
 </ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when requests to 
<code>torproject.org</code> web servers have been logged.</li>
-<li><b>$locale:</b> Number of Tor Browser initial downloads for the given 
locale; limited to the top-5 locales in the requested time period.</li>
+<li><b>locale:</b> Locale, like "en-US" for English (United States), "de" for 
German, etc., and "??" for unrecognized locales.</li>
+<li><b>initial_downloads:</b> Number of Tor Browser initial downloads.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace all locale-specific columns by two columns <b>locale</b> and 
<b>count</b> to avoid dynamically changing columns. Maybe also take out the 
limitation to top-5 locales in the file (not the graph), similar to how the 
"Bridge users by country and transport" file contains all transports, not just 
the top-3 ones.</p>
-</div>
-
 <h3>Tor Messenger downloads and updates
 <a href="/webstats-tm.html" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/webstats-tm.csv" class="btn btn-primary btn-xs"><i class="fa 
fa-chevron-right" aria-hidden="true"></i> data</a>



_______________________________________________
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits

Reply via email to