Christopher Johnson (WMDE) has uploaded a new change for review. https://gerrit.wikimedia.org/r/251826
Change subject: adds property usage count functions aand view ...................................................................... adds property usage count functions aand view Change-Id: I30882895c84ec2c65702fb9c5337639d438a9da7 --- A assets/property-query.xml M server.R M src/config.R M src/model.R A src/output/server-properties.R M src/utils.R M ui.R 7 files changed, 103 insertions(+), 6 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikidata/analytics/dashboard refs/changes/26/251826/1 diff --git a/assets/property-query.xml b/assets/property-query.xml new file mode 100644 index 0000000..3a3ae5f --- /dev/null +++ b/assets/property-query.xml @@ -0,0 +1,20 @@ +<?xml version="1.0"?> + +<!DOCTYPE rdf:RDF [ + <!ENTITY owl "http://www.w3.org/2002/07/owl#" > + <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" > + <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" > + <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" > +]> + + +<RDF xmlns="http://wikiba.se/rdfq#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"> + <prefixes> + <prefix>PREFIX%20wikibase%3A%20%3Chttp%3A%2F%2Fwikiba.se%2Fontology%23%3E</prefix> + <!-- Get All Properties --> + <rdf-query> + <select>SELECT ?s WHERE {?s ?p wikibase:Property}</select> + <rdfs:comment>All Properties</rdfs:comment> + </rdf-query> +</RDF> \ No newline at end of file diff --git a/server.R b/server.R index 8835f6c..3137bc8 100644 --- a/server.R +++ b/server.R @@ -77,4 +77,6 @@ source('./src/output/server-content.R', local=TRUE) # KPI source('./src/output/server-KPI.R', local=TRUE) + # Property Usage + source('./src/output/server-properties.R', local=TRUE) } diff --git a/src/config.R b/src/config.R index d328cbd..25ae16c 100644 --- a/src/config.R +++ b/src/config.R @@ -14,6 +14,7 @@ library(lubridate) library(magrittr) library(curl) +library(RCurl) library(rrdf) library(data.table) library(DT) @@ -29,4 +30,6 @@ source_data_uri <- "http://wdm-data.wmflabs.org/data/" agg_data_uri <- "http://datasets.wikimedia.org/aggregate-datasets/wikidata/" wdqs_uri <- "https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=" +wdmrdf_uri <- "https://wdm-rdf.wmflabs.org/bigdata/namespace/wdq/sparql?query=" +estcard.uri <- "http://wdm-rdf.wmflabs.org/bigdata/namespace/wdq/sparql?ESTCARD" diff --git a/src/model.R b/src/model.R index 8c9e60d..f3b699c 100644 --- a/src/model.R +++ b/src/model.R @@ -23,6 +23,7 @@ sparql2 <<- get_local_set("spql2.tsv", sparql_data_uri) sparql3 <<- get_local_set("spql3.tsv", sparql_data_uri) sparql13 <<- get_local_set("spql13.tsv", sparql_data_uri) + property_usage_counts <<- get_local_set("prop_usage.tsv", sparql_data_uri) return(invisible()) } diff --git a/src/output/server-properties.R b/src/output/server-properties.R new file mode 100644 index 0000000..fa3afe2 --- /dev/null +++ b/src/output/server-properties.R @@ -0,0 +1,27 @@ +#RDF Property Use Counts + +# http://wikiba.se/metrics#Property_Use +output$metric_meta_rdf_queries <- renderUI({ + metric_desc <- "Property Use Counts" + box(title = "Definition", width = 6, status = "info", metric_desc) +}) +output$wikidata_property_usage_count_table <- DT::renderDataTable({ + datatable(property_usage_counts, class = "display compact", colnames = c("Property", "Count"), + options = list( + order = list(2, 'desc'), + pageLength = 100, + columnDefs = list( + list(className = 'dt-left', targets = c(0,1,2) + ), + list(targets = c(1), render = JS( + "function(data, type, row, meta) {", + "return '<a href=\"https://www.wikidata.org/wiki/Property:'+data+'\" target=\"_blank\">'+data+'</a>'", + "}") + ) + ) + ), + caption = "Property Usage Counts") %>% + formatCurrency(c("Count"), currency = "", interval = 3, mark = ",") +}) + + diff --git a/src/utils.R b/src/utils.R index e1a13de..ce61144 100644 --- a/src/utils.R +++ b/src/utils.R @@ -1,14 +1,13 @@ download_set <- function(file, uri = data_uri){ - location <- paste0(uri, file, - "?ts=", gsub(x = Sys.time(), pattern = "(-| )", replacement = "")) - con <- url(location); + location <- paste0(uri, file) + con <- url(location) set <- readr::read_delim(con, delim = "\t") return(set) } get_csv_from_api <- function(params, uri = graphite_api_uri){ location <- paste0(uri, params) - con <- url(location); + con <- url(location) set <- readr::read_csv(con, col_names = c("desc", "date", "value"), col_types = list(col_character(), col_character(), col_double())) return(set) } @@ -230,10 +229,48 @@ return(prefixes) } +get_property_list_query <- function(){ + query = curl_escape("SELECT ?s WHERE {?s ?p wikibase:Property}") + return(query) +} + get_sparql_result <- function(uri = wdqs_uri, prefix, query) { - # escape_query <- curl_escape(query) xml_result <- readLines(curl(paste0(uri, prefix, query))) doc = xmlParse(xml_result) result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:literal", c(sq = "http://www.w3.org/2005/sparql-results#"))) return(result) -} \ No newline at end of file +} + +get_sparql_result_from_uri <- function(uri = wdmrdf_uri, prefix, query) { + xml_result <- readLines(curl(paste0(uri, prefix, query))) + doc = xmlParse(xml_result) + result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:uri", c(sq = "http://www.w3.org/2005/sparql-results#"))) + return(result) +} + +get_estimated_card_from_prop_predicate <- function(uri = estcard.uri, predicate) { + xml_result <- getForm(uri, p=paste0("<http://www.wikidata.org/prop/statement/", predicate, ">")) + doc = xmlParse(xml_result) + result = xpathApply(doc, "//data[@rangeCount]", xmlGetAttr, "rangeCount") + return(result) + +} + +write_prop_usage_counts <- function() { + query <- get_property_list_query() + prefix <- get_property_label_prefixes() + plist <- get_sparql_result_from_uri(wdmrdf_uri, prefix, query) + props <- lapply(plist, function(x) gsub("http://www.wikidata.org/entity/", "", x)) + values <- lapply(props$text, function(x) get_estimated_card_from_prop_predicate(estcard.uri, x)) + vals <- do.call(c, unlist(values, recursive=FALSE)) + prop_counts <- data.table(vals) + props <- data.table(props$text) + props$id <- seq_len(nrow(props)) + prop_counts$id <- seq_len(nrow(prop_counts)) + setkey(props, id) + setkey(prop_counts, id) + dt_join_prop_usage <- props[prop_counts] + dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3)] + write.table(dtjoin_prop_usage, "/srv/dashboards/shiny-server/wdm/data/sparql/prop_usage.tsv", sep = "\t", row.names = FALSE, col.names = FALSE) +} + diff --git a/ui.R b/ui.R index 9988f46..a1f8643 100644 --- a/ui.R +++ b/ui.R @@ -25,6 +25,8 @@ menuItem(text = "API Usage", icon = icon("gears"), menuSubItem(text = "wbgetclaims", tabName = "wikidata_daily_getclaims_property_use"), menuSubItem(text = "Graphs", tabName = "wikidata_getclaims_property_graphs")), + menuItem(text = "Property Usage", icon = icon("exchange"), + menuSubItem(text = "List", tabName = "wikidata_property_usage_count")), menuItem(text = "", badgeLabel = "Graphite", badgeColor = "black"), menuItem(text = "Usages", icon = icon("question"), menuSubItem(text = "addUsagesForPage", tabName = "wikidata_addUsagesForPage")), @@ -110,6 +112,11 @@ DT::dataTableOutput("wikidata_daily_getclaims_property_use_table")), tabItem(tabName = "wikidata_getclaims_property_graphs", dygraphOutput("param_property_graph")), + tabItem(tabName = "wikidata_property_usage_count", + fluidRow( + uiOutput("metric_meta_property_usage_count") + ), + DT::dataTableOutput("wikidata_property_usage_count_table")), tabItem(tabName = "wikidata_addUsagesForPage", dygraphOutput("wikidata_addUsagesForPage_plot")), tabItem(tabName = "wikidata_rdf_queries", -- To view, visit https://gerrit.wikimedia.org/r/251826 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I30882895c84ec2c65702fb9c5337639d438a9da7 Gerrit-PatchSet: 1 Gerrit-Project: wikidata/analytics/dashboard Gerrit-Branch: master Gerrit-Owner: Christopher Johnson (WMDE) <christopher.john...@wikimedia.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits