Christopher Johnson (WMDE) has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/251826

Change subject: adds property usage count functions aand view
......................................................................

adds property usage count functions aand view

Change-Id: I30882895c84ec2c65702fb9c5337639d438a9da7
---
A assets/property-query.xml
M server.R
M src/config.R
M src/model.R
A src/output/server-properties.R
M src/utils.R
M ui.R
7 files changed, 103 insertions(+), 6 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikidata/analytics/dashboard 
refs/changes/26/251826/1

diff --git a/assets/property-query.xml b/assets/property-query.xml
new file mode 100644
index 0000000..3a3ae5f
--- /dev/null
+++ b/assets/property-query.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE rdf:RDF [
+    <!ENTITY owl "http://www.w3.org/2002/07/owl#"; >
+    <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#"; >
+    <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#"; >
+    <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; >
+]>
+
+
+<RDF xmlns="http://wikiba.se/rdfq#"; 
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+     xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#";>
+  <prefixes>
+    
<prefix>PREFIX%20wikibase%3A%20%3Chttp%3A%2F%2Fwikiba.se%2Fontology%23%3E</prefix>
+  <!-- Get All Properties -->
+  <rdf-query>
+    <select>SELECT ?s WHERE {?s ?p wikibase:Property}</select>
+    <rdfs:comment>All Properties</rdfs:comment>
+  </rdf-query>
+</RDF>
\ No newline at end of file
diff --git a/server.R b/server.R
index 8835f6c..3137bc8 100644
--- a/server.R
+++ b/server.R
@@ -77,4 +77,6 @@
     source('./src/output/server-content.R', local=TRUE)
     # KPI
     source('./src/output/server-KPI.R', local=TRUE)
+    # Property Usage
+    source('./src/output/server-properties.R', local=TRUE)
 }
diff --git a/src/config.R b/src/config.R
index d328cbd..25ae16c 100644
--- a/src/config.R
+++ b/src/config.R
@@ -14,6 +14,7 @@
 library(lubridate)
 library(magrittr)
 library(curl)
+library(RCurl)
 library(rrdf)
 library(data.table)
 library(DT)
@@ -29,4 +30,6 @@
 source_data_uri <- "http://wdm-data.wmflabs.org/data/";
 agg_data_uri <- "http://datasets.wikimedia.org/aggregate-datasets/wikidata/";
 wdqs_uri <- "https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=";
+wdmrdf_uri <- "https://wdm-rdf.wmflabs.org/bigdata/namespace/wdq/sparql?query=";
+estcard.uri <- 
"http://wdm-rdf.wmflabs.org/bigdata/namespace/wdq/sparql?ESTCARD";
 
diff --git a/src/model.R b/src/model.R
index 8c9e60d..f3b699c 100644
--- a/src/model.R
+++ b/src/model.R
@@ -23,6 +23,7 @@
   sparql2 <<- get_local_set("spql2.tsv", sparql_data_uri)
   sparql3 <<- get_local_set("spql3.tsv", sparql_data_uri)
   sparql13 <<- get_local_set("spql13.tsv", sparql_data_uri)
+  property_usage_counts <<- get_local_set("prop_usage.tsv", sparql_data_uri)
   return(invisible())
 }
 
diff --git a/src/output/server-properties.R b/src/output/server-properties.R
new file mode 100644
index 0000000..fa3afe2
--- /dev/null
+++ b/src/output/server-properties.R
@@ -0,0 +1,27 @@
+#RDF Property Use Counts
+
+# http://wikiba.se/metrics#Property_Use
+output$metric_meta_rdf_queries <- renderUI({
+  metric_desc <- "Property Use Counts"
+  box(title = "Definition", width = 6, status = "info", metric_desc)
+})
+output$wikidata_property_usage_count_table <- DT::renderDataTable({
+  datatable(property_usage_counts,   class = "display compact", colnames = 
c("Property", "Count"),
+            options = list(
+              order = list(2, 'desc'),
+              pageLength = 100,
+              columnDefs = list(
+                list(className = 'dt-left', targets = c(0,1,2)
+                ),
+                list(targets = c(1), render = JS(
+                  "function(data, type, row, meta) {",
+                  "return '<a 
href=\"https://www.wikidata.org/wiki/Property:'+data+'\" 
target=\"_blank\">'+data+'</a>'",
+                  "}")
+                )
+              )
+            ),
+  caption = "Property Usage Counts") %>%
+  formatCurrency(c("Count"), currency = "", interval = 3, mark = ",")
+})
+
+
diff --git a/src/utils.R b/src/utils.R
index e1a13de..ce61144 100644
--- a/src/utils.R
+++ b/src/utils.R
@@ -1,14 +1,13 @@
 download_set <- function(file, uri = data_uri){
-      location <- paste0(uri, file,
-                         "?ts=", gsub(x = Sys.time(), pattern = "(-| )", 
replacement = ""))
-      con <- url(location);
+      location <- paste0(uri, file)
+      con <- url(location)
       set <- readr::read_delim(con, delim = "\t")
       return(set)
 }
 
 get_csv_from_api <- function(params, uri = graphite_api_uri){
   location <- paste0(uri, params)
-  con <- url(location);
+  con <- url(location)
   set <- readr::read_csv(con, col_names = c("desc", "date", "value"), 
col_types = list(col_character(), col_character(), col_double()))
   return(set)
 }
@@ -230,10 +229,48 @@
   return(prefixes)
 }
 
+get_property_list_query <- function(){
+  query = curl_escape("SELECT ?s WHERE {?s ?p wikibase:Property}")
+  return(query)
+}
+
 get_sparql_result <- function(uri = wdqs_uri, prefix, query) {
-  # escape_query <- curl_escape(query)
   xml_result <- readLines(curl(paste0(uri, prefix, query)))
   doc = xmlParse(xml_result)
   result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:literal", c(sq = 
"http://www.w3.org/2005/sparql-results#";)))
   return(result)
-}
\ No newline at end of file
+}
+
+get_sparql_result_from_uri <- function(uri = wdmrdf_uri, prefix, query) {
+  xml_result <- readLines(curl(paste0(uri, prefix, query)))
+  doc = xmlParse(xml_result)
+  result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:uri", c(sq = 
"http://www.w3.org/2005/sparql-results#";)))
+  return(result)
+}
+
+get_estimated_card_from_prop_predicate <- function(uri = estcard.uri, 
predicate) {
+  xml_result <- getForm(uri, 
p=paste0("<http://www.wikidata.org/prop/statement/";, predicate, ">"))
+  doc = xmlParse(xml_result)
+  result = xpathApply(doc, "//data[@rangeCount]", xmlGetAttr, "rangeCount")
+  return(result)
+
+}
+
+write_prop_usage_counts <- function() {
+  query <- get_property_list_query()
+  prefix <- get_property_label_prefixes()
+  plist <- get_sparql_result_from_uri(wdmrdf_uri, prefix, query)
+  props <- lapply(plist, function(x) gsub("http://www.wikidata.org/entity/";, 
"", x))
+  values <- lapply(props$text, function(x) 
get_estimated_card_from_prop_predicate(estcard.uri, x))
+  vals <- do.call(c, unlist(values, recursive=FALSE))
+  prop_counts <- data.table(vals)
+  props <- data.table(props$text)
+  props$id <- seq_len(nrow(props))
+  prop_counts$id <- seq_len(nrow(prop_counts))
+  setkey(props, id)
+  setkey(prop_counts, id)
+  dt_join_prop_usage <- props[prop_counts]
+  dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3)]
+  write.table(dtjoin_prop_usage, 
"/srv/dashboards/shiny-server/wdm/data/sparql/prop_usage.tsv", sep = "\t", 
row.names = FALSE, col.names = FALSE)
+}
+
diff --git a/ui.R b/ui.R
index 9988f46..a1f8643 100644
--- a/ui.R
+++ b/ui.R
@@ -25,6 +25,8 @@
     menuItem(text = "API Usage", icon = icon("gears"),
              menuSubItem(text = "wbgetclaims", tabName = 
"wikidata_daily_getclaims_property_use"),
              menuSubItem(text = "Graphs", tabName = 
"wikidata_getclaims_property_graphs")),
+    menuItem(text = "Property Usage", icon = icon("exchange"),
+             menuSubItem(text = "List", tabName = 
"wikidata_property_usage_count")),
     menuItem(text = "", badgeLabel = "Graphite", badgeColor = "black"),
     menuItem(text = "Usages", icon = icon("question"),
              menuSubItem(text = "addUsagesForPage", tabName = 
"wikidata_addUsagesForPage")),
@@ -110,6 +112,11 @@
             
DT::dataTableOutput("wikidata_daily_getclaims_property_use_table")),
     tabItem(tabName = "wikidata_getclaims_property_graphs",
             dygraphOutput("param_property_graph")),
+    tabItem(tabName = "wikidata_property_usage_count",
+            fluidRow(
+              uiOutput("metric_meta_property_usage_count")
+            ),
+            DT::dataTableOutput("wikidata_property_usage_count_table")),
     tabItem(tabName = "wikidata_addUsagesForPage",
             dygraphOutput("wikidata_addUsagesForPage_plot")),
     tabItem(tabName = "wikidata_rdf_queries",

-- 
To view, visit https://gerrit.wikimedia.org/r/251826
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I30882895c84ec2c65702fb9c5337639d438a9da7
Gerrit-PatchSet: 1
Gerrit-Project: wikidata/analytics/dashboard
Gerrit-Branch: master
Gerrit-Owner: Christopher Johnson (WMDE) <christopher.john...@wikimedia.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to