jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/391008 )

Change subject: Initial
......................................................................


Initial

Change-Id: I3cb15f8dc8fed47e571dc9884541af48683bb29a
---
A server.R
A ui.R
A www/Wikidata-logo-en.png
3 files changed, 774 insertions(+), 0 deletions(-)

Approvals:
  GoranSMilovanovic: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/server.R b/server.R
new file mode 100644
index 0000000..86149ee
--- /dev/null
+++ b/server.R
@@ -0,0 +1,409 @@
+### ---------------------------------------------------------------------------
+### --- WDCM Dashboard Module, v. Beta 0.1
+### --- Script: server.R, v. Beta 0.1
+### ---------------------------------------------------------------------------
+
+### --- Setup
+rm(list = ls())
+### --------------------------------
+### --- general
+library(shiny)
+library(RMySQL)
+library(data.table)
+library(DT)
+library(stringr)
+library(tidyr)
+library(dplyr)
+library(reshape2)
+### --- compute
+library(parallelDist)
+library(smacof)
+### --- visualization
+library(wordcloud)
+library(RColorBrewer)
+library(visNetwork)
+library(rbokeh)
+library(networkD3)
+library(ggplot2)
+library(ggrepel)
+library(scales)
+library(ggvis)
+
+### --- Server (Session) Scope
+### --------------------------------
+
+### --- Credentials
+# setwd('/home/goransm/WMDE/WDCM/WDCM_RScripts/WDCM_Dashboard/aux')
+setwd('/srv/shiny-server/aux')
+
+mySQLCreds <- fread("mySQLCreds.csv", 
+                    header = T,
+                    drop = 1)
+
+### -- Connect
+con <- dbConnect(MySQL(), 
+                 host = "tools.labsdb", 
+                 defult.file = 
"/home/goransm/mySQL_Credentials/replica.my.cnf",
+                 dbname = "u16664__wdcm_p",
+                 user = mySQLCreds$user,
+                 password = mySQLCreds$password)
+
+### --- list existing tables
+q <- "SHOW TABLES;"
+res <- dbSendQuery(con, q)
+st <- fetch(res, -1)
+dbClearResult(res)
+colnames(st) <- "tables"
+
+### --- SET CHARACTER SET utf8
+q <- "SET CHARACTER SET utf8;"
+res <- dbSendQuery(con, q)
+dbClearResult(res)
+
+### --- fetch wdcm2_project
+q <- "SELECT * FROM wdcm2_project;"
+res <- dbSendQuery(con, q)
+wdcmProject <- fetch(res, -1)
+dbClearResult(res) 
+
+### --- determine how many project types are present
+### --- and assign Brewer colors
+lengthProjectColor <- length(unique(wdcmProject$projectype))
+projectTypeColor <- brewer.pal(lengthProjectColor, "Set1")
+
+### --- fetch wdcm2_project_category_2dmap
+q <- "SELECT * FROM wdcm2_project_category_2dmap;"
+res <- dbSendQuery(con, q)
+wdcm2_project_category_2dmap <- fetch(res, -1)
+dbClearResult(res)
+colnames(wdcm2_project_category_2dmap)[3] <- "eu_project"
+wdcm2_project_category_2dmap <- left_join(wdcm2_project_category_2dmap,
+                                          wdcmProject,
+                                          by = "eu_project")
+labelSet <- unlist(lapply(unique(wdcm2_project_category_2dmap$projecttype), 
function(x){
+  w <- which(wdcm2_project_category_2dmap$projecttype %in% x)
+  lS <- arrange(wdcm2_project_category_2dmap[w, ], desc(eu_count))[1:5, ]
+  lS$eu_project 
+}))
+labelSetSmall1 <- 
unlist(lapply(unique(wdcm2_project_category_2dmap$projecttype), function(x){
+  w <- which(wdcm2_project_category_2dmap$projecttype %in% x)
+  lS <- arrange(wdcm2_project_category_2dmap[w, ], desc(eu_count))[1, ]
+  lS$eu_project 
+}))
+labelSetSmall3 <- 
unlist(lapply(unique(wdcm2_project_category_2dmap$projecttype), function(x){
+  w <- which(wdcm2_project_category_2dmap$projecttype %in% x)
+  lS <- arrange(wdcm2_project_category_2dmap[w, ], desc(eu_count))[1:3, ]
+  lS$eu_project 
+}))
+
+wdcm2_project_category_2dmapReduceLabels <- wdcm2_project_category_2dmap 
+wdcm2_project_category_2dmapReduceLabels$eu_project[which(!(wdcm2_project_category_2dmapReduceLabels$eu_project
 %in% labelSet))] <- ""
+colnames(wdcm2_project_category_2dmap)[c(3, 5,6)] <- c('Project', 'Usage', 
'Project Type')
+wdcm2_project_category_2dmap$projectTypeColor <- 
sapply(wdcm2_project_category_2dmap$`Project Type`, function(x) {
+  projectTypeColor[which(sort(unique(wdcm2_project_category_2dmap$`Project 
Type`)) %in% x)]
+})
+
+### --- fetch wdcm2_category
+q <- "SELECT * FROM wdcm2_category;"
+res <- dbSendQuery(con, q)
+wdcmCategory <- fetch(res, -1)
+dbClearResult(res) 
+
+### ---fetch wdcm2_category_project_2dmap
+q <- "SELECT * FROM wdcm2_category_project_2dmap;"
+res <- dbSendQuery(con, q)
+wdcm2_category_project_2dmap <- fetch(res, -1)
+dbClearResult(res) 
+wdcm2_category_project_2dmap <- left_join(wdcm2_category_project_2dmap,
+                                          wdcmCategory,
+                                          by = "category")
+colnames(wdcm2_category_project_2dmap)[3:4] <- c('Category', 'Usage')
+
+### --- fetch wdcm2_project_category
+q <- "SELECT * FROM wdcm2_project_category;"
+res <- dbSendQuery(con, q)
+wdcmProjectCategory <- fetch(res, -1)
+dbClearResult(res) 
+colnames(wdcmProjectCategory) <- c('Project', 'Category', 'Usage', 'Project 
Type')
+
+### --- Disconnect
+dbDisconnect(con)
+
+### --- shinyServer
+shinyServer(function(input, output, session) {
+  
+  ### ----------------------------------
+  ### --- TAB: Overview
+  ### ----------------------------------
+  
+  ### --- output$overviewPlot
+  output$overviewPlot <- renderPlot({
+    ggplot(wdcm2_project_category_2dmapReduceLabels, aes(x = D1, y = D2,
+                                                         color = projecttype,
+                                                         label = eu_project)) +
+      geom_point(aes(size = eu_count), shape=21) +
+      scale_size(name = "Usage", 
+                 breaks = waiver(), 
+                 labels = comma,
+                 limits = NULL, 
+                 range = c(.5, 30), 
+                 trans = "identity", 
+                 guide = "legend") + 
+      scale_colour_manual(values = projectTypeColor, name = "Project Type") +
+      geom_text_repel(size = 5, fontface = 'bold', segment.size = .25, 
show.legend = FALSE) +
+      theme_bw() +
+      theme(axis.text.x = element_blank()) +
+      theme(axis.text.y = element_blank()) +
+      theme(axis.title.x = element_blank()) +
+      theme(axis.title.y = element_blank()) +
+      theme(axis.ticks.x = element_blank()) +
+      theme(axis.ticks.y = element_blank()) +
+      theme(panel.background = element_rect(color = "white", fill = "white")) +
+      theme(panel.border = element_blank()) +
+      theme(panel.grid = element_blank()) + 
+      theme(legend.text = element_text(size = 14)) +
+      theme(legend.title = element_text(size = 15))
+  }) %>% withProgress(message = 'Generating plot',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$overviewPlotDynamic
+  output$overviewPlotDynamic <- renderRbokeh({
+    outFig <- figure(width = 1400, height = 900, logo = NULL) %>%
+      ly_points(D1, D2, 
+                data = wdcm2_project_category_2dmap,
+                size = log(Usage),
+                color = 'Project Type', 
+                hover = list(Project, Usage)) %>% 
+      x_axis(visible = F) %>% 
+      y_axis(visible = F) %>% 
+      theme_grid(which = c("x", "y"), 
+                 grid_line_color = "white") %>% 
+      theme_plot(outline_line_alpha = 0) %>% 
+      set_palette(discrete_color = pal_color(projectTypeColor))
+    outFig
+  }) %>% withProgress(message = 'Generating plot',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$usageTendencyPlot
+  output$usageTendencyPlot <- renderPlot({
+    ggplot(wdcm2_category_project_2dmap, aes(x = D1, 
+                                             y = D2, 
+                                             label = Category)) +
+      scale_color_discrete(guide=FALSE) +
+      geom_point(aes(size = Usage), fill = "white", color = "darkblue", 
shape=21) +
+      scale_size(name = "Usage", 
+                 breaks = waiver(), 
+                 labels = comma,
+                 limits = NULL, 
+                 range = c(2, 20), 
+                 trans = "identity", 
+                 guide = "legend") + 
+      theme_bw() +
+      geom_text(size = 4, show.legend = FALSE) +
+      theme(axis.text.x = element_blank()) +
+      theme(axis.text.y = element_blank()) +
+      theme(axis.title.x = element_blank()) +
+      theme(axis.title.y = element_blank()) +
+      theme(axis.ticks.x = element_blank()) +
+      theme(axis.ticks.y = element_blank()) +
+      theme(panel.border = element_blank()) +
+      theme(panel.grid = element_blank()) + 
+      theme(legend.text = element_text(size = 10)) +
+      theme(legend.title = element_text(size = 12))
+  }) %>% withProgress(message = 'Generating plot',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$projectRankFrequencyPlot
+  output$projectRankFrequencyPlot <- renderPlot({
+    rank <- order(wdcmProject$eu_count)
+    frequency <- wdcmProject$eu_count[rank]
+    project <- wdcmProject$eu_project[rank]
+    dataSet <- data.frame(Rank = rank,
+                          Frequency = frequency, 
+                          Project = project,
+                          stringsAsFactors = F)
+    dataSet$Project[which(!(dataSet$Project %in% labelSetSmall1))] <- ""
+    dataSet <- dataSet[order(-dataSet$Frequency), ]
+    ggplot(dataSet, aes(x = Rank, 
+                        y = Frequency,
+                        label = Project)) +
+      geom_path(size = .25, color = "darkblue") + 
+      geom_point(size = 1, color = "darkblue") + 
+      geom_point(size = .65, color = "white") + 
+      scale_y_continuous(labels = comma) +
+      xlab("Project Usage Rank") + ylab("Project Usage") + 
+      geom_text_repel(size = 3, segment.size = .15, show.legend = FALSE) +
+      theme_bw() +
+      theme(axis.text.x = element_text(size = 9)) +
+      theme(axis.text.y = element_text(size = 9)) +
+      theme(axis.title.x = element_text(size = 12)) +
+      theme(axis.title.y = element_text(size = 12)) +
+      theme(axis.ticks.x = element_blank()) +
+      theme(axis.ticks.y = element_blank()) +
+      theme(panel.border = element_blank()) +
+      theme(panel.grid = element_blank()) + 
+      theme(legend.text = element_text(size = 10)) +
+      theme(legend.title = element_text(size = 12))
+  }) %>% withProgress(message = 'Generating plot',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$projectLogRankLogFrequencyPlot
+  output$projectLogRankLogFrequencyPlot <- renderPlot({
+    rank <- order(wdcmProject$eu_count)
+    frequency <- wdcmProject$eu_count[rank]
+    project <- wdcmProject$eu_project[rank]
+    dataSet <- data.frame(Rank = log(rank),
+                          Frequency = log(frequency), 
+                          Project = project,
+                          stringsAsFactors = F)
+    dataSet$Project[which(!(dataSet$Project %in% labelSetSmall3))] <- ""
+    dataSet <- dataSet[order(-dataSet$Frequency), ]
+    ggplot(dataSet, aes(x = Rank, 
+                        y = Frequency,
+                        label = Project)) +
+      geom_path(size = .25, color = "red") + 
+      geom_smooth(size = .25, method = "lm", color = "red") + 
+      geom_point(size = 1, color = "red") + 
+      geom_point(size = .65, color = "white") + 
+      scale_y_continuous(labels = comma) +
+      xlab("log(Project Usage Rank)") + ylab("log(Project Usage)") + 
+      geom_text_repel(size = 3, segment.size = .15, show.legend = FALSE) +
+      theme_bw() +
+      theme(axis.text.x = element_text(size = 9)) +
+      theme(axis.text.y = element_text(size = 9)) +
+      theme(axis.title.x = element_text(size = 12)) +
+      theme(axis.title.y = element_text(size = 12)) +
+      theme(axis.ticks.x = element_blank()) +
+      theme(axis.ticks.y = element_blank()) +
+      theme(panel.border = element_blank()) +
+      theme(panel.grid = element_blank()) + 
+      theme(legend.text = element_text(size = 10)) +
+      theme(legend.title = element_text(size = 12))
+  }) %>% withProgress(message = 'Generating plot',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$projectCategoryCross
+  output$projectCategoryCross <- renderPlot({
+    dataSet <- wdcmProjectCategory
+    dataSet$Project <- NULL
+    dataSet <- dataSet %>% 
+      group_by(`Project Type`, Category) %>%
+      summarise(Usage = sum(Usage)) %>% 
+      as.data.frame()
+    dataSet$`Project Type` <- factor(dataSet$`Project Type`, 
+                                     levels = unique(dataSet$`Project Type`))
+    dataSet$Category <- factor(dataSet$Category, 
+                               levels = unique(dataSet$Category))
+    ggplot(dataSet, aes(x = Category, 
+                        y = Usage,
+                        fill = `Project Type`,
+                        color = `Project Type`)) +
+      geom_bar(stat = "identity", width = .35) +
+      scale_fill_manual(values = projectTypeColor, name = "Project Type") +
+      scale_color_manual(values = projectTypeColor) +
+      scale_y_continuous(labels = comma) +
+      xlab("Category") + ylab("Usage") + 
+      facet_grid(`Project Type` ~ ., scales = "free_y") +
+      theme_bw() +
+      theme(axis.text.x = element_text(angle = 90, size = 11, hjust = 1)) +
+      theme(axis.text.y = element_text(size = 9)) +
+      theme(axis.title.x = element_text(size = 12)) +
+      theme(axis.title.y = element_text(size = 12)) +
+      theme(axis.ticks.x = element_blank()) +
+      theme(axis.ticks.y = element_blank()) +
+      theme(panel.background = element_rect(fill = "snow2")) +
+      theme(panel.border = element_blank()) +
+      theme(panel.grid = element_blank()) + 
+      theme(legend.position = "none") +
+      theme(strip.background = element_blank()) +
+      theme(strip.text = element_text(size = 11))
+  }) %>% withProgress(message = 'Generating plot',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$projectVolume
+  output$projectVolume <- renderPlot({
+    minQ <- input$volumeSlider[1]/100
+    maxQ <- input$volumeSlider[2]/100
+    wSel <- which(wdcmProject$eu_count <= quantile(wdcmProject$eu_count, maxQ) 
& 
+                    wdcmProject$eu_count >= quantile(wdcmProject$eu_count, 
minQ))
+    dataSet <- wdcmProject[wSel, ] %>% 
+      arrange(desc(eu_count)) %>% as.data.frame()
+    colnames(dataSet) <- c('Project', 'Usage', 'Project Type')
+    if (dim(dataSet)[1] > 30) {
+      dataSet <- dataSet[1:30, ]
+    }
+    dataSet$Project <- factor(dataSet$Project)
+    dataSet <- dataSet[order(dataSet$Usage), ]
+    ggplot(dataSet, aes(x = reorder(Project, Usage), 
+                        y = Usage,
+                        fill = `Project Type`)) +
+      geom_bar(stat = "identity", width = .2) + 
+      xlab("Project") + ylab("Usage") + 
+      scale_fill_manual(values = 
projectTypeColor[which(sort(unique(wdcmProject$projectype)) %in% 
dataSet$`Project Type`)], 
+                        name = "Project Type") +
+      scale_y_continuous(labels = comma) +
+      theme_bw() +
+      theme(axis.text.x = element_text(angle = 90, size = 11, hjust = 1)) +
+      theme(axis.text.y = element_text(size = 9)) +
+      theme(axis.title.x = element_text(size = 12)) +
+      theme(axis.title.y = element_text(size = 12)) +
+      theme(axis.ticks.x = element_blank()) +
+      theme(axis.ticks.y = element_blank()) +
+      theme(panel.border = element_blank()) +
+      theme(panel.grid = element_blank())
+  }) %>% withProgress(message = 'Generating plot',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$projectCategoryDataTable
+  output$projectCategoryDataTable <- DT::renderDataTable({
+    datatable(wdcmProjectCategory,
+    options = list(
+      pageLength = 20,
+      width = '100%',
+      columnDefs = list(list(className = 'dt-center', targets = "_all"))
+      ),
+    rownames = FALSE
+    )
+  }) %>% withProgress(message = 'Generating data',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  ### --- output$projectDataTable
+  output$projectDataTable <- DT::renderDataTable({
+    dataSet <- wdcmProject
+    colnames(dataSet) <- c('Project', 'Usage', 'Project Type')
+    datatable(dataSet,
+              options = list(
+                pageLength = 20,
+                width = '100%',
+                columnDefs = list(list(className = 'dt-center', targets = 
"_all"))
+              ),
+              rownames = FALSE
+    )
+  }) %>% withProgress(message = 'Generating data',
+                      min = 0,
+                      max = 1,
+                      value = 1, {incProgress(amount = 1)})
+  
+  
+})
+### --- END shinyServer
+
+
+
+
diff --git a/ui.R b/ui.R
new file mode 100644
index 0000000..33bbfc8
--- /dev/null
+++ b/ui.R
@@ -0,0 +1,365 @@
+### ---------------------------------------------------------------------------
+### --- WDCM Dashboard Module, v. Beta 0.1
+### --- Script: ui.R, v. Beta 0.1
+### ---------------------------------------------------------------------------
+
+### --- Setup
+rm(list = ls())
+### --- general
+library(shiny)
+library(shinydashboard)
+library(shinycssloaders)
+### --- outputs
+library(visNetwork)
+library(rbokeh)
+library(networkD3)
+library(ggvis)
+library(DT)
+
+# - options
+options(warn = -1)
+
+shinyUI(
+  
+  fluidPage(title = 'WDCM Projects', 
+            theme = NULL,
+            
+            # - fluidRow Title
+            fluidRow(
+              column(width = 12,
+                     h2('WDCM Overview Dashboard'),
+                     HTML('<font size="3"><b>Wikidata Concepts 
Monitor</b></font>')
+                     )
+            ), # - fluidRow Title END
+            
+            # - fluidRow Logo
+            fluidRow(
+              column(width = 12,
+                     img(src='Wikidata-logo-en.png', 
+                         align = "left")
+              )
+            ), # - fluidRow END
+            
+            # - hr()
+            fluidRow(
+              column(width = 12,
+                     hr()
+              )
+            ),
+            
+            fluidRow(
+              column(width = 12,
+                     tabBox(id = 'MainBox', 
+                            selected = 'Overview', 
+                            title = '', 
+                            width = 12,
+                            height = NULL, 
+                            side = 'left',
+                            
+                            # - tabPanel Overview
+                            tabPanel("Overview", 
+                                     fluidRow(
+                                       column(width = 12,
+                                              hr(),
+                                              tabBox(width = 12,
+                                                     title = "Wikidata Usage 
Overview",
+                                                     id = "projectoverview",
+                                                     selected = "Explore",
+                                                     tabPanel(title = 
"Explore",
+                                                             id = 
"projexplore",
+                                                             fluidRow(
+                                                               column(width = 
12,
+                                                                      
h3('Explore Wikidata Usage'),
+                                                                      
HTML('Each bubble represents a client project. 
+                                                                            
The size of the bubble reflects the volume of Wikidata usage in the respective 
project; a logarithmic scale is used in this plot.<br> 
+                                                                            
Projects similar in respect to the semantics of Wikidata usage are grouped 
together. 
+                                                                            
Use the tools next to the plot legend to explore the plot and hover over 
bubbles for details.'),
+                                                                      hr(),
+                                                                      
withSpinner(rbokeh::rbokehOutput('overviewPlotDynamic',
+                                                                               
              width = "1400px",
+                                                                               
              height = "900px")
+                                                                               
   )
+                                                                      )
+                                                               )
+                                                             ),
+                                                     tabPanel(title = 
"Highlights",
+                                                              id = 
"projoverview",
+                                                              fluidRow(
+                                                                column(width = 
12,
+                                                                       
h3('Wikidata Usage Highlights'),
+                                                                       
HTML('Each bubble represents a client project. 
+                                                                            
The size of the bubble reflects the volume of Wikidata usage in the respective 
project.<br> 
+                                                                            
Projects similar in respect to the semantics of Wikidata usage are grouped 
together. 
+                                                                            
Only top five projects (of each project type) in respect to Wikidata usage 
volume are labeled.'),
+                                                                       hr(),
+                                                                       
withSpinner(plotOutput('overviewPlot',
+                                                                               
               width = "1400px",
+                                                                               
               height = "900px")
+                                                                               
    )
+                                                                       )
+                                                                )
+                                                              )
+                                                     )
+                                              )
+                                     ),
+                                     hr(),
+                                     fluidRow(
+                                       column(width = 6,
+                                              h4('Wikidata Usage Tendency'),
+                                              HTML('<font size="2">Each bubble 
represents a Wikidata semantic category. These categories represent one 
possible way of categorizing the Wikidata items.
+                                                   The size of the bubble 
reflects the volume of Wikidata usage from the respective category. 
+                                                   If two categories are found 
in proximity, that means that the projects that tend to use the one 
+                                                   also tend to use the 
another, and vice versa.</font>'),
+                                              hr(),
+                                              
withSpinner(plotOutput('usageTendencyPlot',
+                                                                     width = 
"900px",
+                                                                     height = 
"700px")
+                                              )
+                                              ),
+                                       column(width = 6,
+                                              tabBox(width = 12,
+                                                    title = "Wikidata Usage 
Distribution",
+                                                    id = "usagedist",
+                                                    selected = "Project Usage 
Rank-Frequency",
+                                                    tabPanel(title = "Project 
Usage Rank-Frequency",
+                                                             id = "RF",
+                                                             fluidRow(
+                                                               column(width = 
12,
+                                                                      br(),
+                                                                      
HTML('<font size="2">Each point represents a client project. 
+                                                                           
Wikidata usage is represented on the vertical and the project 
+                                                                           
usage rank on the horizontal axis. Only top projects per project type 
+                                                                           are 
labeled.</font>'),
+                                                                      hr(),
+                                                                      
withSpinner(plotOutput('projectRankFrequencyPlot',
+                                                                               
              width = "900px",
+                                                                               
              height = "700px")
+                                                                               
   )
+                                                                      )
+                                                               )
+                                                             ),
+                                                    tabPanel(title = "Project 
Usage log(Rank)-log(Frequency)",
+                                                             id = "lRlF",
+                                                             fluidRow(
+                                                               column(width = 
12,
+                                                                      br(),
+                                                                      
HTML('<font size="2">Each point represents a client project. 
+                                                                           The 
logarithms of Wikidata usage and project 
+                                                                           
usage rank are represented on on the vertical and horizontal axis, 
respectively. 
+                                                                           Top 
three projects per project type are labeled.</font>'),
+                                                                      hr(),
+                                                                      
withSpinner(plotOutput('projectLogRankLogFrequencyPlot', 
+                                                                               
              width = "900px",
+                                                                               
              height = "700px")
+                                                                               
   )
+                                                                      )
+                                                               )
+                                                             )
+                                                    )
+                                              )
+                                     ),
+                                     hr(),
+                                     fluidRow(
+                                       column(width = 6,
+                                              h4('Client Project Types'),
+                                              HTML('<font size="2">Wikidata 
usage breakdown across the client project types. Each row represents one client 
project type. 
+                                                   Semantic categories of 
Wikidata items are placed on the horizontal axis, while the respective usage 
counts are given on the vertical axis.</font>'),
+                                              hr(),
+                                              
withSpinner(plotOutput('projectCategoryCross',
+                                                                     width = 
"900px",
+                                                                     height = 
"700px")
+                                              )
+                                              ),
+                                       column(width = 6,
+                                              h4('Client Projects Usage 
Volume'),
+                                              HTML('<font size="2">Wikidata 
usage across the client projects. 
+                                                    Use slider (below the 
chart) to select the range of client projects by percentile ranks*. 
+                                                    <br><b>Note:</b> The chart 
present at most 30 top projects (in terms of Wikidata usage volume) from the 
selection.</font>'),
+                                              hr(),
+                                              
withSpinner(plotOutput('projectVolume',
+                                                                     width = 
"900px",
+                                                                     height = 
"700px")
+                                              ),
+                                              sliderInput('volumeSlider', 
+                                                          'Percentile Rank 
(select lower and upper limit):', 
+                                                          min = 1, 
+                                                          max = 100, 
+                                                          value = c(95,100), 
+                                                          step = 1, 
+                                                          round = TRUE,
+                                                          ticks = TRUE, 
+                                                          animate = FALSE,
+                                                          width = '100%'),
+                                              HTML('<font size="2">*The <a 
href="https://en.wikipedia.org/wiki/Percentile_rank"; target="_blank">percentile 
rank</a> 
+                                                    of a score is the 
percentage of scores in its frequency distribution that are equal to or lower 
than it. 
+                                                   For example, a client 
project that has a Wikidata usage volume greater than or equal to 75% of all 
client projects under
+                                                   consideration is said to be 
at the 75th percentile, where 75 is the percentile rank.</font>')
+                                              )
+                                       ),
+                                     hr(),
+                                     fluidRow(
+                                       column(width = 6,
+                                              h3('Client Project + Semantic 
Category Usage Cross-Tabulation'),
+                                              HTML(' Wikidata usage breakdown 
across the client projects, project types, and semantic categories. 
+                                                   Sort the table by any of 
its columns or enter a search term to find a specific project, project type, or 
+                                                   Wikidata semantic 
category.'),
+                                              hr(),
+                                              
withSpinner(DT::dataTableOutput('projectCategoryDataTable', width = "100%"))
+                                              ),
+                                       column(width = 6,
+                                              h3('Client Project Usage 
Tabulation'),
+                                              HTML(' Wikidata usage per client 
project. 
+                                                    Sort the table by any of 
its columns or enter a search term to find a specific project or project 
type.'),
+                                              br(), br(),
+                                              hr(),
+                                              
withSpinner(DT::dataTableOutput('projectDataTable', width = "100%"))
+                                              )
+                                       )
+                                     ), # - tabPanel Overview END
+                            
+                            # - tabPanel Description
+                            tabPanel("Description",
+                                     fluidRow(
+                                       column(width = 8,
+                                              HTML('<h2>WDCM Overview 
Dashboard</h2>
+<h4>Description<h4>
+                                                   <hr>
+                                                   <h4>Introduction<h4>
+                                                   <br>
+                                                   <p><font size = 2>This 
Dashboard is a part of the <b>Wikidata Concepts Monitor (WDMC)</b>. The WDCM 
system provides analytics on Wikidata usage
+                                                   across the client projects. 
The WDCM Overview Dashboard presents the big picture of Wikidata usage; other 
WDCM dashboards go
+                                                   into more detail. The 
Overview Dashboard provides insights into <b>(1)</b> the similarities between 
the client projects in respect to their use of 
+                                                   of Wikidata, as well as 
<b>(2)</b> the volume of Wikidata usage in every client project, <b>(3)</b> 
Wikidata usage tendencies, described by the volume of 
+                                                   Wikidata usage in each of 
the semantic categories of items that are encompassed by the current WDCM 
edition, <b>(4)</b> the similarities between the 
+                                                   Wikidata semantic 
categories of items in respect to their usage across the client projects, 
<b>(5)</b> ranking of client projects in respect to their 
+                                                   Wikidata usage volume, 
<b>(6)</b> the Wikidata usage breakdown across the types of client projects and 
Wikidata semantic categories.</font></p>
+                                                   <hr>
+                                                   <h4>Definitions</h4>
+                                                   <br>
+                                                   <p><font size = 
2><b>N.B.</b> The current <b>Wikidata item usage statistic</b> definition is 
<i>the count of the number of pages in a particular client project
+                                                   where the respective 
Wikidata item is used</i>. Thus, the current definition ignores the usage 
aspects completely. This definition is motivated by the currently 
+                                                   present constraints in 
Wikidata usage tracking across the client projects 
+                                                   (see <a href = 
"https://www.mediawiki.org/wiki/Wikibase/Schema/wbc_entity_usage"; target = 
"_blank">Wikibase/Schema/wbc entity usage</a>). 
+                                                   With more mature Wikidata 
usage tracking systems, the definition will become a subject 
+                                                   of change. The term 
<b>Wikidata usage volume</b> is reserved for total Wikidata usage (i.e. the sum 
of usage statistics) in a particular 
+                                                   client project, group of 
client projects, or semantic categories. By a <b>Wikidata semantic category</b> 
we mean a selection of Wikidata items that is 
+                                                   that is operationally 
defined by a respective SPARQL query returning a selection of items that 
intuitivelly match a human, natural semantic category. 
+                                                   The structure of Wikidata 
does not necessarily match any intuitive human semantics. In WDCM, an effort is 
made to select the semantic categories so to match 
+                                                   the intuitive, everyday 
semantics as much as possible, in order to assist anyone involved in analytical 
work with this system. However, the choice of semantic 
+                                                   categories in WDCM is not 
necessarily exhaustive (i.e. they do not necessarily cover all Wikidata items), 
neither the categories are necessarily 
+                                                   mutually exclusive. The 
Wikidata ontology is very complex and a product of work of many people, so 
there is an optimization price to be paid in every attempt to 
+                                                   adapt or simplify its 
present structure to the needs of a statistical analytical system such as WDCM. 
The current set of WDCM semantic categories is thus not 
+                                                   normative in any sense and 
can become a subject of change in any moment, depending upon the analytical 
needs of the community.</font></p>
+                                                   <p><font size = 2>The 
currently used <b>WDCM Taxonomy</b> of Wikidata items encompasses the following 
14 semantic categories: <i>Geographical Object</i>, <i>Organization</i>, 
<i>Architectural Structure</i>, 
+                                                   <i>Human</i>, 
<i>Wikimedia</i>, <i>Work of Art</i>, <i>Book</i>, <i>Gene</i>, <i>Scientific 
Article</i>, <i>Chemical Entities</i>, <i>Astronomical Object</i>, 
<i>Thoroughfare</i>, <i>Event</i>, 
+                                                   and <i>Taxon</i>.</font></p>
+                                                   <hr>
+                                                   <h4>Wikidata Usage 
Overview</h4>
+                                                   <br>
+                                                   <p><font size = 2>The 
similarity structure in Wikidata usage <i>across the client projects</i> is 
presented. Each bubble represents a client project.
+                                                   The size of the bubble 
reflects the volume of Wikidata usage in the respective project. Projects 
similar in respect to the semantics of Wikidata
+                                                   usage are grouped 
together.<br>
+                                                   The bubble chart is 
produced by performing a <a 
href="https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding";
 target="_blank">t-SNE dimensionality reduction</a> 
+                                                   of the client project 
pairwise Euclidean distances derived from the Projects x Categories contingency 
table. Given that the original higher-dimensional space 
+                                                   from which the 2D map is 
derived is rather constrained by the choice of a small number of semantic 
categories, the similarity mapping is somewhat 
+                                                   imprecise and should be 
taken as an attempt at an approximate big picture of the client projects 
similarity structure only. More precise 2D maps of 
+                                                   the similarity structures 
in client projects are found on the <a href = 
"http://wdcm.wmflabs.org/WDCM_SemanticsDashboard/"; target = "_blank">WDCM 
Semantics Dashboard</a>, where each semantic category first receives an 
+                                                   <a href = 
"https://en.wikipedia.org/wiki/Topic_model"; target = "_blank">LDA Topic 
Model</a>, 
+                                                   and the similarity 
structure between the client projects is then derived from project topical 
distributions.<br>
+                                                   While the <i>Explore</i> 
tab presents a dynamic <a href = "http://hafen.github.io/rbokeh/"; 
target="_blank">{Rbokeh}</a> visualization alongside 
+                                                   the tools to explore it in 
detail, the <i>Highlights</i> tab shows a static <a href = 
"http://ggplot2.org/"; target="_blank">{ggplot2}</a> plot with the most 
important client projects 
+                                                   marked (<b>NOTE.</b> Only 
top five projects (of each project type) in respect to Wikidata usage volume 
are labeled).</font></p>
+                                                   <hr>
+                                                   <h4>Wikidata Usage 
Tendency</h4>
+                                                   <br>
+                                                   <p><font size = 2>The 
similarity structure in Wikidata usage <i>across the semantic categories</i> is 
presented. Each bubble represents a Wikidata semantic
+                                                   category. The size of the 
bubble reflects the volume of Wikidata usage from the respective category. If 
two categories are found in proximity,
+                                                   that means that the 
projects that tend to use the one also tend to use the another, and vice versa. 
Similarly to the Usage Overview, the 2D mapping is obtained by performing 
+                                                   a <a 
href="https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding";
 target="_blank">t-SNE dimensionality reduction</a> 
+                                                   of the pairwise category 
Euclidean distances derived from the Projects x Categories contingency table. 
</font></p>
+                                                   <hr>
+                                                   <h4>Wikidata Usage 
Distribution</h4>
+                                                   <br>
+                                                   <p><font size = 2>The plots 
are helpful to build an understanding of the relative range of Wikidata usage 
across the client projects.
+                                                   In the <i>Project Usage 
Rank-Frequency</i> plot, each point represents a client project; Wikidata usage 
is represented on the vertical and
+                                                   the project usage rank on 
the horizontal axis, while only top project (per project type) are labeled. The 
highly-skewed, asymmetrical
+                                                   distribution reveals that a 
small fraction of client projects only accounts for a huge proportion of 
Wikidata usage.<br> In the
+                                                   <i>Project Usage 
log(Rank)-log(Frequency)</i> plot, the logarithms of both variables are 
represented. 
+                                                   A <a href = 
"https://en.wikipedia.org/wiki/Power_law"; target="_blank">power-law</a> 
relationship holds true if this
+                                                   plot is linear. The plot 
includes the best linear fit, however, no attempts to estimate the underlying 
probability distribution were made. </font></p>
+                                                   <hr>
+                                                   <h4>Client Project 
Types</h4>
+                                                   <br>
+                                                   <p><font size = 2>Project 
types are provided in the rows of this chart, while the semantic categories are 
given on the horizontal axis.
+                                                   The height of the 
respective bar indicates Wikidata usage volume from the respective semantic 
category in a particular client project.</font></p>
+                                                   <hr>
+                                                   <h4>Client Projects Usage 
Volume</h4>
+                                                   <br>
+                                                   <p><font size = 2>Use the 
slider to select the percentile rank range of the Wikidata usage volume 
distribution across the client project to show. The
+                                                   chart will automatically 
adjust to present the selected projects in increasing order of Wikidata usage, 
and presenting at most 30 top projects
+                                                   from the selection. 
<b>NOTE.</b> The <a href="https://en.wikipedia.org/wiki/Percentile_rank"; 
target="_blank">percentile rank</a> 
+                                                   of a score is the 
percentage of scores in its frequency distribution that are equal to or lower 
than it. 
+                                                   For example, a client 
project that has a Wikidata usage volume greater than or equal to 75% of all 
client projects under
+                                                   consideration is said to be 
at the 75th percentile, where 75 is the percentile rank.<br> In effect, you can 
browse the whole 
+                                                   distribution of Wikidata 
usage across the client projects by selecting the lower and uppers limit in 
terms of usage percentile rank.</font></p>
+                                                   <hr>
+                                                   <h4>Wikidata Usage 
Browser</h4>
+                                                   <br>
+                                                   <p><font size = 2>A 
breakdown of Wikidata usage statistics across client projects and semantic 
categories. To the left, 
+                                                   a table that presents a 
Client Project vs. Semantic Category cross-tabulation. The Usage column in this 
table is the Wikidata 
+                                                  usage statistic for a 
particular Semantic Category x Client Project combination (e.g. The Wikidata 
usage in the category "Human" in 
+                                                  the dewiki project). To the 
right, the total Wikidata usage per client project is presented (i.e. the sum 
of Wikidata usage across 
+                                                  all semantic categories for 
a particular client project; e.g. the total Wikidata usage volume of 
enwiki).</font></p>
+                                                   ')
+                                       )
+                                     )
+                                     ), # - tabPanel Usage END
+                            
+                            # - tabPanel Navigate
+                            tabPanel("Navigate WDCM", 
+                                     fluidRow(
+                                       column(width = 8,
+                                              HTML('<h2>WDCM Navigate</h2>
+                                                   <h4>Your orientation in the 
WDCM Dashboards System<h4>
+                                                   <hr>
+                                                   <ul>
+                                                   <li><b><a href = 
"http://wdcm.wmflabs.org/";>WDCM Portal</a></b>.<br>
+                                                   <font size = "2">The entry 
point to WDCM Dashboards.</font></li><br>
+                                                   <li><b><a href = 
"http://wdcm.wmflabs.org/WDCM_OverviewDashboard/";>WDCM Overview</a> (current 
dashboard)</b><br>
+                                                   <font size = "2">The big 
picture. Fundamental insights in how Wikidata is used across the client 
projects.</font></li><br>
+                                                   <li><b><a href = 
"http://wdcm.wmflabs.org/WDCM_SemanticsDashboard/";>WDCM Semantics</a></b><br>
+                                                   <font size = "2">Detailed 
insights into the WDCM Taxonomy (a selection of semantic categories from 
Wikidata), its distributional
+                                                   semantics, and the way it 
is used across the client projects. If you are looking for Topic Models - 
that&#8217;s where
+                                                   they live.</font></li><br>
+                                                   <li><b><a href = 
"http://wdcm.wmflabs.org/WDCM_UsageDashboard/";>WDCM Usage</a></b><br>
+                                                   <font size = 
"2">Fine-grained information on Wikidata usage across client projects and 
project types. Cross-tabulations and similar..</font></li><br>
+                                                   <li><b>WDCM Items</b><br>
+                                                   <font size = 
"2">Fine-grained information on particular Wikidata item usage across the 
client projects.<b> (Under development)</b></font></li><br>
+                                                   <li><b><a href = 
"https://wikitech.wikimedia.org/wiki/Wikidata_Concepts_Monitor";>WDCM System 
Technical Documentation</a></b><br>
+                                                   <font size = "2">The WDCM 
Wikitech Page.</font></li>
+                                                   </ul>'
+                                              )
+                                       )
+                                     )
+                                     ) # - tabPanel Structure END
+                            
+                            ) # - MainBox END
+                     ) # - Main column End
+              
+            ), #- Main fluidRow END
+            
+            # - fluidRow Footer
+            fluidRow(
+              column(width = 12,
+                     hr(),
+                     HTML('<b>Wikidata Concepts Monitor :: WMDE 
2017</b><br>Diffusion: <a 
href="https://phabricator.wikimedia.org/diffusion/AWCM/"; target = 
"_blank">WDCM</a><br>'),
+                     HTML('Contact: Goran S. Milovanovic, Data Scientist, 
WMDE<br>e-mail: [email protected]
+                          <br>IRC: goransm'),
+                     br(),
+                     br()
+              )
+            ) # - fluidRow Footer END
+            
+            ) # - fluidPage END
+  
+) # - ShinyUI END
diff --git a/www/Wikidata-logo-en.png b/www/Wikidata-logo-en.png
new file mode 100644
index 0000000..380ea29
--- /dev/null
+++ b/www/Wikidata-logo-en.png
Binary files differ

-- 
To view, visit https://gerrit.wikimedia.org/r/391008
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I3cb15f8dc8fed47e571dc9884541af48683bb29a
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wmde/WDCM-Overview-Dashboard
Gerrit-Branch: master
Gerrit-Owner: GoranSMilovanovic <[email protected]>
Gerrit-Reviewer: GoranSMilovanovic <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to