Bearloga has submitted this change and it was merged.

Change subject: Add PaulScore documentation & relative display, fix formatting
......................................................................


Add PaulScore documentation & relative display, fix formatting

- Keeps only pow_1, pow_5, and pow_9, renames them to
  "F = 0.1", "F = 0.5", and "F = 0.9", respectively
- Adds documentation to the PaulScore page
- Adds the option to view relative PaulScores
- Changes formatting on all dygraphs that display percentages

Bug: T144424
Change-Id: I8960aa442f86357d9fa6fb6c68c81724bc3a5dd2
---
M server.R
A tab_documentation/paulscore_approx.html
D tab_documentation/paulscore_approx.md
M ui.R
M utils.R
5 files changed, 90 insertions(+), 29 deletions(-)

Approvals:
  Bearloga: Verified; Looks good to me, approved



diff --git a/server.R b/server.R
index 5ac100c..339306b 100644
--- a/server.R
+++ b/server.R
@@ -78,19 +78,39 @@
   })
 
   output$paulscore_approx_plot_fulltext <- renderDygraph({
-    paulscore_fulltext %>%
+    temp <- paulscore_fulltext
+    if (input$paulscore_relative) {
+      temp$`F = 0.1` <- temp$`F = 0.1` / (1/(1-0.1))
+      temp$`F = 0.5` <- temp$`F = 0.5` / (1/(1-0.5))
+      temp$`F = 0.9` <- temp$`F = 0.9` / (1/(1-0.9))
+    }
+    dyOut <- temp %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_paulscore_approx)) %>%
       polloi::make_dygraph(xlab = "Date", ylab = "PaulScore", title = 
"PaulScore for fulltext searches, by day", use_si = FALSE, group = 
"paulscore_approx") %>%
       dyRangeSelector %>%
       dyLegend(labelsDiv = "paulscore_approx_legend", show = "always")
+    if (input$paulscore_relative) {
+      dyOut <- dyAxis(dyOut, "y", axisLabelFormatter = "function(x) { return 
Math.round(100*x, 2) + '%'; }", valueFormatter = "function(x) { return 
Math.round(100*x, 2) + '%'; }")
+    }
+    return(dyOut)
   })
 
   output$paulscore_approx_plot_autocomplete <- renderDygraph({
-    paulscore_autocomplete %>%
+    temp <- paulscore_autocomplete
+    if (input$paulscore_relative) {
+      temp$`F = 0.1` <- temp$`F = 0.1` / (1/(1-0.1))
+      temp$`F = 0.5` <- temp$`F = 0.5` / (1/(1-0.5))
+      temp$`F = 0.9` <- temp$`F = 0.9` / (1/(1-0.9))
+    }
+    dyOut <- temp %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_paulscore_approx)) %>%
       polloi::make_dygraph(xlab = "Date", ylab = "PaulScore", title = 
"PaulScore for autocomplete searches, by day", use_si = FALSE, group = 
"paulscore_approx") %>%
       dyRangeSelector %>%
       dyLegend(labelsDiv = "paulscore_approx_legend", show = "always")
+    if (input$paulscore_relative) {
+      dyOut <- dyAxis(dyOut, "y", axisLabelFormatter = "function(x) { return 
Math.round(100*x, 2) + '%'; }", valueFormatter = "function(x) { return 
Math.round(100*x, 2) + '%'; }")
+    }
+    return(dyOut)
   })
 
   ## Mobile value boxes
@@ -197,6 +217,7 @@
     position_prop %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_app_click_position)) %>%
       polloi::make_dygraph(xlab = "", ylab = "Proportion of Clicks (%)", title 
= "Proportion of Clicks on Nth Result") %>%
+      dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", 
valueFormatter = "function(x) { return x + '%'; }") %>%
       dyAxis("x", ticker = "Dygraph.dateTicker", axisLabelFormatter = 
polloi::custom_axis_formatter,
              axisLabelWidth = 100, pixelsPerLabel = 80) %>%
       dyLegend(labelsDiv = "app_click_position_legend") %>%
@@ -207,6 +228,7 @@
     source_prop %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_app_invoke_source)) %>%
       polloi::make_dygraph(xlab = "", ylab = "Proportion of Search Sessions 
(%)", title = "Proportion of Search Sessions, by Invoke Source") %>%
+      dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", 
valueFormatter = "function(x) { return x + '%'; }") %>%
       dyAxis("x", ticker = "Dygraph.dateTicker", axisLabelFormatter = 
polloi::custom_axis_formatter,
              axisLabelWidth = 100, pixelsPerLabel = 80) %>%
       dyLegend(labelsDiv = "app_invoke_source_legend") %>%
@@ -256,6 +278,7 @@
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>%
       polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)", 
title = "Zero Results Rate, by day",
                            legend_name = "ZRR") %>%
+      dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", 
valueFormatter = "function(x) { return x + '%'; }") %>%
       dyRangeSelector(fillColor = "") %>%
       dyEvent(as.Date("2016-02-01"), "A (format switch)", labelLoc = "bottom") 
%>%
       dyEvent(as.Date("2016-03-16"), "Completion Suggester Deployed", labelLoc 
= "bottom")
@@ -264,7 +287,8 @@
   output$failure_rate_change_plot <- renderDygraph({
     polloi::data_select(input$failure_rate_automata, 
failure_roc_with_automata, failure_roc_no_automata) %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>%
-      polloi::make_dygraph(xlab = "Date", ylab = "Change (%)", title = "Zero 
Results rate change, by day", legend_name = "Change") %>%
+      polloi::make_dygraph(xlab = "Date", ylab = "Change", title = "Zero 
Results rate change, by day", legend_name = "Change") %>%
+      dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", 
valueFormatter = "function(x) { return Math.round(x, 3) + '%'; }") %>%
       dyRangeSelector(fillColor = "", strokeColor = "") %>%
       dyEvent(as.Date("2016-03-16"), "Completion Suggester Deployed", labelLoc 
= "bottom")
   })
@@ -275,11 +299,12 @@
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_failure_breakdown)) %>%
 
       { xts(.[, -1], order.by = .$date) }
-    xts_data %>% dygraph(xlab = "Date", ylab = "Zero Results Rate (%)",
+    xts_data %>% dygraph(xlab = "Date", ylab = "Zero Results Rate",
                          main = "Zero result rate by search type") %>%
       dyLegend(width = 600, show = "always", labelsDiv = 
"failure_breakdown_plot_legend") %>%
       dyOptions(strokeWidth = 2, drawPoints = FALSE, pointSize = 3, labelsKMB 
= TRUE, includeZero = TRUE) %>%
       dyCSS(css = system.file("custom.css", package = "polloi")) %>%
+      dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", 
valueFormatter = "function(x) { return x + '%'; }") %>%
       # We use grep(colnames(xts_data), value = TRUE) because smoothing 
appends "(... median)" to colnames.
       # Customize the full_text and prefix series colors so they match 
"Full-Text Search" and "Prefix Search":
       dySeries(grep("Full-Text Search", colnames(xts_data), value = TRUE, 
fixed = TRUE),
@@ -302,7 +327,8 @@
   output$suggestion_dygraph_plot <- renderDygraph({
     polloi::data_select(input$failure_suggestions_automata, 
suggestion_with_automata, suggestion_no_automata) %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_failure_suggestions)) %>%
-      polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)", 
title = "Zero Result Rates with Search Suggestions") %>%
+      polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate", title = 
"Zero Result Rates with Search Suggestions") %>%
+      dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", 
valueFormatter = "function(x) { return x + '%'; }") %>%
       dyRangeSelector(fillColor = "") %>%
       dyEvent(as.Date("2016-02-01"), "A (format switch)", labelLoc = "bottom") 
%>%
       dyEvent(as.Date("2016-03-16"), "Completion Suggester Deployed", labelLoc 
= "bottom")
@@ -358,7 +384,8 @@
     polloi::data_select(input$failure_langproj_automata, 
langproj_with_automata, langproj_no_automata) %>%
       aggregate_wikis(input$language_selector, input$project_selector) %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_failure_langproj)) %>%
-      polloi::make_dygraph(xlab = "", ylab = "Zero Results Rate (%)", title = 
"Zero result rate by language and project") %>%
+      polloi::make_dygraph(xlab = "", ylab = "Zero Results Rate", title = 
"Zero result rate by language and project") %>%
+      dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", 
valueFormatter = "function(x) { return x + '%'; }") %>%
       dyLegend(show = "always", width = 400, labelsDiv = 
"failure_langproj_legend") %>%
       dyAxis("x", axisLabelFormatter = polloi::custom_axis_formatter) %>%
       dyRangeSelector(fillColor = "")
@@ -572,10 +599,14 @@
                     valueRange = c(-1, 1) * 
max(max(abs(as.numeric(zrr$Change)), na.rm = TRUE), 10),
                     axisLineColor = RColorBrewer::brewer.pal(3, "Set2")[2],
                     axisLabelColor = RColorBrewer::brewer.pal(3, "Set2")[2],
-                    independentTicks = TRUE, drawGrid = FALSE) %>%
+                    independentTicks = TRUE, drawGrid = FALSE,
+                    axisLabelFormatter = "function(x) { return x + '%'; }",
+                    valueFormatter = "function(x) { return Math.round(x, 3) + 
'%'; }") %>%
              dyAxis("y", drawGrid = FALSE,
                     axisLineColor = RColorBrewer::brewer.pal(3, "Set2")[1],
-                    axisLabelColor = RColorBrewer::brewer.pal(3, "Set2")[1]) 
%>%
+                    axisLabelColor = RColorBrewer::brewer.pal(3, "Set2")[1],
+                    axisLabelFormatter = "function(x) { return x + '%'; }",
+                    valueFormatter = "function(x) { return x + '%'; }") %>%
              dyLimit(limit = 0, color = RColorBrewer::brewer.pal(3, 
"Set2")[2], strokePattern = "dashed") %>%
              dyLegend(width = 400, show = "always") %>%
              dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(3, 
"Set2"),
@@ -733,7 +764,7 @@
       cols_to_keep <- base::setdiff(cols_to_keep, 5)
     }
     temp[, cols_to_keep]
-  })
+  }, rownames = TRUE, striped = TRUE)
 
   # Check datasets for missing data and notify user which datasets are missing 
data (if any)
   output$message_menu <- renderMenu({
diff --git a/tab_documentation/paulscore_approx.html 
b/tab_documentation/paulscore_approx.html
new file mode 100644
index 0000000..0661123
--- /dev/null
+++ b/tab_documentation/paulscore_approx.html
@@ -0,0 +1,34 @@
+<h1>PaulScore Approximations</h1>
+
+<div>
+<div style = "float: left; width: 47%; padding: 0 3% 0 1%;">
+<p>"PaulScore" is the name we've given to a metric proposed by Paul Nelson in 
a talk he gave at <a href = 
"https://www.youtube.com/watch?v=YJ_amC9gZmk&t=16m38s"; title = "Paul Nelson's 
talk at Elasticon">Elasticon</a>. We use PaulScore to evaluate the quality of 
results provided by CirrusSearch or proposed modifications to CirrusSearch, 
based on historical click data. A big advantage of the PaulScore is that it 
relies on user click history to award points, so it is easy to compute.</p>
+<p>This dashboard shows the PaulScore approximation for 3 values of $F$: 0.1, 
0.5, and 0.9. The maximum score possible for each value of $F$ is $1/(1-F)$, so 
the dashboard has the option of looking at relative PaulScores, which is the 
computed value divided by maximum possible value for given $F$.</p>
+<p>For auto-completion suggestions, we expect a much lower score since most 
queries get no clicks -- i.e., while typing many results are shown and ignored 
-- and most users will only click on one results, whereas full-text searchers 
can more easily go back to the results page or open multiple results in other 
windows.</p>
+<p>For more details, please see <a href = 
"https://www.mediawiki.org/wiki/Wikimedia_Discovery/Search/Glossary#PaulScore"; 
title = "Definition of PaulScore">Discovery's Search glossary</a>.</p>
+</div>
+<div style = "float: left; width: 46%; padding: 0 1% 0 3%;">
+<p>PaulScore is computed via the following steps:</p>
+<ol>
+<li>Pick scoring factor $0 < F < 1$.</li>
+<li>For $i$-th search session $S_i$ $(i = 1, \ldots, n)$ containing $m$ 
queries $Q_1, \ldots, Q_m$ and search result sets $\mathbf{R}_1, \ldots, 
\mathbf{R}_m$: <ol>
+    <li>For each $j$-th search query $Q_j$ with result set $\mathbf{R}_j$, let 
$\nu_j$ be the query score: $$\nu_j = \sum_{k~\in~\{\text{0-based positions of 
clicked results in}~\mathbf{R}_j\}} F^k.$$</li>
+    <li>Let user's average query score $\bar{\nu}_{(i)}$ be $$\bar{\nu}_{(i)} 
= \frac{1}{m} \sum_{j = 1}^m \nu_j.$$</li>
+  </ol></li>
+<li>Then the PaulScore is the average of all users' average query scores: 
$$\text{PaulScore}~=~\frac{1}{n} \sum_{i = 1}^n \bar{\nu}_{(i)}.$$</li>
+</ol>
+</div>
+</div>
+<div style = "clear: both;"></div>
+
+<h2>Questions, bug reports, and feature suggestions</h2>
+
+<p>For technical, non-bug questions, <a href = 
"mailto:mpo...@wikimedia.org?subject=Dashboard%20Question";>email Mikhail</a>. 
If you experience a bug or notice something wrong or have a suggestion, <a href 
= 
"https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery";>open
 a ticket in Phabricator</a> in the Discovery board or <a href = 
"mailto:d...@wikimedia.org?subject=Dashboard%20Question";>email Deb</a>.</p>
+
+<hr style="border-color: gray;">
+<p style="font-size: small; color: gray;">
+  <strong>Link to this dashboard:</strong>
+  <a href="http://discovery.wmflabs.org/metrics/#paulscore_approx";>
+    http://discovery.wmflabs.org/metrics/#paulscore_approx
+  </a>
+</p>
diff --git a/tab_documentation/paulscore_approx.md 
b/tab_documentation/paulscore_approx.md
deleted file mode 100644
index 4163a00..0000000
--- a/tab_documentation/paulscore_approx.md
+++ /dev/null
@@ -1,15 +0,0 @@
-PaulScore Approximations
-=======
-...
-
-Questions, bug reports, and feature suggestions
-------
-For technical, non-bug questions, [email 
Mikhail](mailto:mpo...@wikimedia.org?subject=Dashboard%20Question). If you 
experience a bug or notice something wrong or have a suggestion, [open a ticket 
in 
Phabricator](https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery)
 in the Discovery board or [email 
Deb](mailto:d...@wikimedia.org?subject=Dashboard%20Question).
-
-<hr style="border-color: gray;">
-<p style="font-size: small; color: gray;">
-  <strong>Link to this dashboard:</strong>
-  <a href="http://discovery.wmflabs.org/metrics/#paulscore_approx";>
-    http://discovery.wmflabs.org/metrics/#paulscore_approx
-  </a>
-</p>
diff --git a/ui.R b/ui.R
index cce737a..75d69b0 100644
--- a/ui.R
+++ b/ui.R
@@ -140,11 +140,22 @@
                 dygraphOutput("desktop_load_plot"),
                 includeMarkdown("./tab_documentation/desktop_load.md")),
         tabItem(tabName = "paulscore_approx",
-                polloi::smooth_select("smoothing_paulscore_approx"),
+                fluidRow(
+                  column(polloi::smooth_select("smoothing_paulscore_approx"), 
width = 6),
+                  column(checkboxInput("paulscore_relative", "Use relative 
PaulScores", FALSE),
+                         helpText("Divides PaulScore by the maximum possible 
score for each F"), width = 6)
+                ),
                 dygraphOutput("paulscore_approx_plot_fulltext"),
                 div(id = "paulscore_approx_legend", style = "text-align: 
center;"),
                 dygraphOutput("paulscore_approx_plot_autocomplete"),
-                includeMarkdown("./tab_documentation/paulscore_approx.md")),
+                withMathJax(),
+                tags$div(HTML("<!-- Additional MathJax configuration -->
+                  <script type='text/x-mathjax-config'>
+                    MathJax.Hub.Config({
+                      tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}
+                    });
+                  </script>")),
+                includeHTML("./tab_documentation/paulscore_approx.html")),
         tabItem(tabName = "mobile_events",
                 fluidRow(
                   valueBoxOutput("mobile_event_searches"),
diff --git a/utils.R b/utils.R
index afbb776..68567f7 100644
--- a/utils.R
+++ b/utils.R
@@ -189,9 +189,9 @@
 }
 
 read_paul_score <- function() {
-  data <- polloi::read_dataset("search/paulscore_approximations.tsv")
-  paulscore_autocomplete <<- data[data$event_source == "autocomplete", -2]
-  paulscore_fulltext <<- data[data$event_source == "fulltext", -2]
+  data <- polloi::read_dataset("search/paulscore_approximations.tsv")[, 
c("date", "event_source", "pow_1", "pow_5", "pow_9")]
+  paulscore_autocomplete <<- data[data$event_source == "autocomplete", -2] %>% 
set_names(c("date", "F = 0.1", "F = 0.5", "F = 0.9"))
+  paulscore_fulltext <<- data[data$event_source == "fulltext", -2] %>% 
set_names(c("date", "F = 0.1", "F = 0.5", "F = 0.9"))
 }
 
 aggregate_wikis <- function(data, languages, projects) {

-- 
To view, visit https://gerrit.wikimedia.org/r/311598
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8960aa442f86357d9fa6fb6c68c81724bc3a5dd2
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/rainbow
Gerrit-Branch: master
Gerrit-Owner: Bearloga <mpo...@wikimedia.org>
Gerrit-Reviewer: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to