Addshore has submitted this change and it was merged.
Change subject: Remove old unused bulk sparql stuff
......................................................................
Remove old unused bulk sparql stuff
Change-Id: I621e2cf9c292c214ad98cfa93ed8a573b63fc88c
---
D src/sparql/Rcron.sh
D src/sparql/bulk_sparql.R
D src/sparql/config.R
D src/sparql/rdfq.xml
4 files changed, 0 insertions(+), 197 deletions(-)
Approvals:
Addshore: Verified; Looks good to me, approved
diff --git a/src/sparql/Rcron.sh b/src/sparql/Rcron.sh
deleted file mode 100644
index 5e210c1..0000000
--- a/src/sparql/Rcron.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#! /bin/bash
-R CMD BATCH /srv/dashboards/shiny-server/wdm/src/scripts/bulk_sparql.R
diff --git a/src/sparql/bulk_sparql.R b/src/sparql/bulk_sparql.R
deleted file mode 100644
index d9d2190..0000000
--- a/src/sparql/bulk_sparql.R
+++ /dev/null
@@ -1,37 +0,0 @@
-#Bulk Query of WDQS and write to TSV
-source("config.R")
-#TODO: create output path for analytics instance
-#output_path = "/a/aggregate-datasets/wikidata/sparql/"
-output_path = "/tmp/sparql/"
-qlist <- read_file("rdfq.xml")
-
-rdfq <- xmlParse(qlist)
-queries <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfq:select", c(rdfq =
"http://wikiba.se/rdfq#")))
-prefixes <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfq:prefix", c(rdfq =
"http://wikiba.se/rdfq#")))
-comments <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfs:comment", c(rdfs =
"http://www.w3.org/2000/01/rdf-schema#")))
-
-get_sparql_result <- function(uri = wdqs_uri, prefix, query) {
- xml_result <- readLines(curl(paste0(uri, prefix, query)))
- doc = xmlParse(xml_result)
- result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:literal", c(sq =
"http://www.w3.org/2005/sparql-results#")))
- return(result)
-}
-
-write_tsv <- function(result, filename){
- date = Sys.Date()
- file_uri <- paste0(output_path, filename)
- out = data.frame(date, result)
- write.table(out, file=file_uri, append = TRUE, sep = "\t", row.names =
FALSE, col.names = FALSE)
-}
-
-bulk_sparql_query <- function(esc_queries) {
- for(q in esc_queries) {
- result <- get_sparql_result(wdqs_uri, pfx, q)
- tsv_filename <- paste0("spql", match(q, esc_queries), ".tsv")
- write_tsv(result, tsv_filename)
- }
-}
-
-esc_queries <- lapply(queries$text, curl_escape)
-pfx <- paste(prefixes$text, collapse="")
-bulk_sparql_query (esc_queries)
diff --git a/src/sparql/config.R b/src/sparql/config.R
deleted file mode 100644
index a4d6d75..0000000
--- a/src/sparql/config.R
+++ /dev/null
@@ -1,6 +0,0 @@
-library(readr)
-library(curl)
-library(XML)
-
-wdqs_uri <- "https://query.wikidata.org/bigdata/namespace/wdq/sparql?query="
-
diff --git a/src/sparql/rdfq.xml b/src/sparql/rdfq.xml
deleted file mode 100644
index 96487b2..0000000
--- a/src/sparql/rdfq.xml
+++ /dev/null
@@ -1,152 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE rdf:RDF [
- <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
- <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
- <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
- <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
-]>
-
-
-<RDF xmlns="http://wikiba.se/rdfq#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
- <prefixes>
-
<prefix>PREFIX%20wikibase%3A%20%3Chttp%3A%2F%2Fwikiba.se%2Fontology%23%3E</prefix>
-
<prefix>PREFIX%20wdt%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fdirect%2F%3E</prefix>
-
<prefix>PREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3E</prefix>
-
<prefix>PREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E</prefix>
-
<prefix>PREFIX%20prov%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2Fns%2Fprov%23%3E</prefix>
-
<prefix>PREFIX%20wdref%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Freference%2F%3E</prefix>
- </prefixes>
- <!-- Globe Coordinate Values -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s ?p
wikibase:GlobecoordinateValue}</select>
- <rdfs:comment>count Globecoordinate Value</rdfs:comment>
- </rdf-query>
-
- <!-- Time Values -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s a
wikibase:TimeValue}</select>
- <rdfs:comment>count TimeValue</rdfs:comment>
- </rdf-query>
-
- <!-- Quantity Values -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s a
wikibase:QuantityValue}</select>
- <rdfs:comment>count QuantityValue</rdfs:comment>
- </rdf-query>
-
- <!-- Preferred Ranks -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wikibase:rank
wikibase:PreferredRank}</select>
- <rdfs:comment>count PreferredRank</rdfs:comment>
- </rdf-query>
-
- <!-- Qualifiers -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:qualifier ?o}</select>
- <rdfs:comment>count qualifier</rdfs:comment>
- </rdf-query>
-
- <!-- PropertyType is Wikibase Item -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:propertyType wikibase:WikibaseItem}</select>
- <rdfs:comment>count WikibaseItem</rdfs:comment>
- </rdf-query>
-
- <!-- PropertyType is CommonsMedia -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:propertyType wikibase:CommonsMedia}</select>
- <rdfs:comment>count Property Type = CommonsMedia</rdfs:comment>
- </rdf-query>
-
- <!-- PropertyType is Monolingualtext -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:propertyType wikibase:Monolingualtext}</select>
- <rdfs:comment>count Property Type = Monolingualtext</rdfs:comment>
- </rdf-query>
-
- <!-- PropertyType is Quantity -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:propertyType wikibase:Quantity}</select>
- <rdfs:comment>count Property Type = Quantity</rdfs:comment>
- </rdf-query>
-
- <!-- PropertyType is String -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:propertyType wikibase:String}</select>
- <rdfs:comment>count Property Type = String</rdfs:comment>
- </rdf-query>
-
- <!-- PropertyType is Time -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:propertyType wikibase:Time}</select>
- <rdfs:comment>count Property Type = Time</rdfs:comment>
- </rdf-query>
-
- <!-- PropertyType is URL -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
wikibase:propertyType wikibase:Url}</select>
- <rdfs:comment>count Property Type = Url</rdfs:comment>
- </rdf-query>
-
- <!-- Wikimedia Categories -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P31
wd:Q4167836}</select>
- <rdfs:comment>count instance of Wikimedia Categories</rdfs:comment>
- </rdf-query>
-
- <!-- Commons Categories -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P373
?o}</select>
- <rdfs:comment>count has property Commons Category</rdfs:comment>
- </rdf-query>
-
- <!-- Country of Citizenship -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P27
?o}</select>
- <rdfs:comment>count has property Country of Citizenship</rdfs:comment>
- </rdf-query>
-
- <!-- Given Name -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P735
?o}</select>
- <rdfs:comment>count has property Given Name</rdfs:comment>
- </rdf-query>
-
- <!-- Humans -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P31
wd:Q5}</select>
- <rdfs:comment>count instance of Human</rdfs:comment>
- </rdf-query>
-
- <!-- Entities with VIAF -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P214
?o}</select>
- <rdfs:comment>count has property VIAF</rdfs:comment>
- </rdf-query>
-
- <!-- Entities with OCLC -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P243
?o}</select>
- <rdfs:comment>count has property OCLC</rdfs:comment>
- </rdf-query>
-
- <!-- Get Property Label -->
- <rdf-query>
- <select>SELECT distinct ?o WHERE {wd:P735 ?p ?o
- SERVICE wikibase:label {
- bd:serviceParam wikibase:language "en" .
- wd:P735 rdfs:label ?o
- }
- }</select>
- <rdfs:comment>get Property Label e.g P735</rdfs:comment>
- </rdf-query>
-
- <!-- Statements Referenced to Wikipedia
(wdref:004ec6fbee857649acdbdbad4f97b2c8571df97b) -->
- <rdf-query>
- <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s
prov:wasDerivedFrom wdref:004ec6fbee857649acdbdbad4f97b2c8571df97b}</select>
- <rdfs:comment>Statements Referenced to Wikipedia (with GUID
x)</rdfs:comment>
- </rdf-query>
-</RDF>
-
--
To view, visit https://gerrit.wikimedia.org/r/260551
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I621e2cf9c292c214ad98cfa93ed8a573b63fc88c
Gerrit-PatchSet: 1
Gerrit-Project: analytics/limn-wikidata-data
Gerrit-Branch: master
Gerrit-Owner: Addshore <[email protected]>
Gerrit-Reviewer: Addshore <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits