Christopher Johnson (WMDE) has uploaded a new change for review.
https://gerrit.wikimedia.org/r/247051
Change subject: adds sparql bulk query and output write script adds daily
social metrics
..
adds sparql bulk query and output write script
adds daily social metrics
Change-Id: I5662bbab52e26d39faa34968c2f33412fdd63c20
---
M assets/metrics.owl
M assets/rdfq.xml
A bulk_sparql.R
A bulk_sparql.Rout
A data/sparql/spql1.tsv
A data/sparql/spql10.tsv
A data/sparql/spql11.tsv
A data/sparql/spql12.tsv
A data/sparql/spql13.tsv
A data/sparql/spql14.tsv
A data/sparql/spql15.tsv
A data/sparql/spql16.tsv
A data/sparql/spql17.tsv
A data/sparql/spql18.tsv
A data/sparql/spql19.tsv
A data/sparql/spql2.tsv
A data/sparql/spql20.tsv
A data/sparql/spql21.tsv
A data/sparql/spql3.tsv
A data/sparql/spql4.tsv
A data/sparql/spql5.tsv
A data/sparql/spql6.tsv
A data/sparql/spql7.tsv
A data/sparql/spql8.tsv
A data/sparql/spql9.tsv
M model.R
M output/server-RDFQ.R
M output/server-engagement.R
M output/server-recent.R
M utils.R
30 files changed, 219 insertions(+), 49 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikidata/analytics/dashboard
refs/changes/51/247051/1
diff --git a/assets/metrics.owl b/assets/metrics.owl
index 3428270..264bb94 100644
--- a/assets/metrics.owl
+++ b/assets/metrics.owl
@@ -526,4 +526,3 @@
-
diff --git a/assets/rdfq.xml b/assets/rdfq.xml
index 7cd825c..96487b2 100644
--- a/assets/rdfq.xml
+++ b/assets/rdfq.xml
@@ -10,15 +10,14 @@
http://wikiba.se/rdfq#;
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#;
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#;>
-
-
-
-
-
-
-
-
-
+
+
PREFIX%20wikibase%3A%20%3Chttp%3A%2F%2Fwikiba.se%2Fontology%23%3E
+
PREFIX%20wdt%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fdirect%2F%3E
+
PREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3E
+
PREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E
+
PREFIX%20prov%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2Fns%2Fprov%23%3E
+
PREFIX%20wdref%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Freference%2F%3E
+
SELECT (count(distinct(?s)) AS ?scount) WHERE {?s ?p
wikibase:GlobecoordinateValue}
@@ -149,4 +148,5 @@
SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
prov:wasDerivedFrom wdref:004ec6fbee857649acdbdbad4f97b2c8571df97b}
Statements Referenced to Wikipedia (with GUID
x)
-
\ No newline at end of file
+
+
diff --git a/bulk_sparql.R b/bulk_sparql.R
new file mode 100644
index 000..9c92d00
--- /dev/null
+++ b/bulk_sparql.R
@@ -0,0 +1,37 @@
+#Bulk Query of WDQS and write to TSV
+source("config.R")
+source("model.R")
+source("utils.R")
+
+output_path = "/srv/dashboards/shiny-server/wdm/data/sparql/"
+qlist <- read_file("./assets/rdfq.xml")
+rdfq <- xmlParse(qlist)
+queries <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfq:select", c(rdfq =
"http://wikiba.se/rdfq#;)))
+prefixes <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfq:prefix", c(rdfq =
"http://wikiba.se/rdfq#;)))
+comments <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfs:comment", c(rdfs =
"http://www.w3.org/2000/01/rdf-schema#;)))
+
+get_sparql_result <- function(uri = wdqs_uri, prefix, query) {
+ # escape_query <- curl_escape(query)
+ xml_result <- readLines(curl(paste0(uri, prefix, query)))
+ doc = xmlParse(xml_result)
+ result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:literal", c(sq =
"http://www.w3.org/2005/sparql-results#;)))
+ return(result)
+}
+
+write_tsv <- function(x, filename){
+ file_uri <- paste0(output_path, filename)
+ out = data.frame(Sys.Date(), x)
+ write.table(out, file=file_uri, append = TRUE, sep = "\t", row.names = FALSE)
+}
+
+bulk_sparql_query <- function(esc_queries) {
+ for(q in esc_queries) {
+x <- get_sparql_result(wdqs_uri, pfx, q)
+tsv_file <- paste0("spql", match(q, esc_queries), ".tsv")
+write_tsv(x, tsv_file)
+ }
+}
+
+esc_queries <- lapply(queries$text, curl_escape)
+pfx <- paste(prefixes$text, collapse="")
+bulk_sparql_query (esc_queries)
\ No newline at end of file
diff --git a/bulk_sparql.Rout b/bulk_sparql.Rout
new file mode 100644
index 000..3bb27d7
--- /dev/null
+++ b/bulk_sparql.Rout
@@ -0,0 +1,113 @@
+
+R version 3.2.2 (2015-08-14) -- "Fire Safety"
+Copyright (C) 2015 The R Foundation for Statistical Computing
+Platform: x86_64-pc-linux-gnu (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+ Natural language support but running in an English locale
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+