Addshore has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/260551

Change subject: Remove old unused bulk sparql stuff
......................................................................

Remove old unused bulk sparql stuff

Change-Id: I621e2cf9c292c214ad98cfa93ed8a573b63fc88c
---
D src/sparql/Rcron.sh
D src/sparql/bulk_sparql.R
D src/sparql/config.R
D src/sparql/rdfq.xml
4 files changed, 0 insertions(+), 197 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/limn-wikidata-data 
refs/changes/51/260551/1

diff --git a/src/sparql/Rcron.sh b/src/sparql/Rcron.sh
deleted file mode 100644
index 5e210c1..0000000
--- a/src/sparql/Rcron.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#! /bin/bash
-R CMD BATCH /srv/dashboards/shiny-server/wdm/src/scripts/bulk_sparql.R
diff --git a/src/sparql/bulk_sparql.R b/src/sparql/bulk_sparql.R
deleted file mode 100644
index d9d2190..0000000
--- a/src/sparql/bulk_sparql.R
+++ /dev/null
@@ -1,37 +0,0 @@
-#Bulk Query of WDQS and write to TSV
-source("config.R")
-#TODO: create output path for analytics instance
-#output_path = "/a/aggregate-datasets/wikidata/sparql/"
-output_path = "/tmp/sparql/"
-qlist <- read_file("rdfq.xml")
-
-rdfq <- xmlParse(qlist)
-queries <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfq:select", c(rdfq = 
"http://wikiba.se/rdfq#";)))
-prefixes <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfq:prefix", c(rdfq = 
"http://wikiba.se/rdfq#";)))
-comments <- xmlToDataFrame(nodes = getNodeSet(rdfq, "//rdfs:comment", c(rdfs = 
"http://www.w3.org/2000/01/rdf-schema#";)))
-
-get_sparql_result <- function(uri = wdqs_uri, prefix, query) {
-  xml_result <- readLines(curl(paste0(uri, prefix, query)))
-  doc = xmlParse(xml_result)
-  result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:literal", c(sq = 
"http://www.w3.org/2005/sparql-results#";)))
-  return(result)
-}
-
-write_tsv <- function(result, filename){
-  date = Sys.Date()
-  file_uri <- paste0(output_path, filename)
-  out = data.frame(date, result)
-  write.table(out, file=file_uri, append = TRUE, sep = "\t", row.names = 
FALSE, col.names = FALSE)
-}
-
-bulk_sparql_query <- function(esc_queries) {
-  for(q in esc_queries) {
-    result <- get_sparql_result(wdqs_uri, pfx, q)
-    tsv_filename <- paste0("spql", match(q, esc_queries), ".tsv")
-    write_tsv(result, tsv_filename)
-  }
-}
-
-esc_queries <- lapply(queries$text, curl_escape)
-pfx <- paste(prefixes$text, collapse="")
-bulk_sparql_query (esc_queries)
diff --git a/src/sparql/config.R b/src/sparql/config.R
deleted file mode 100644
index a4d6d75..0000000
--- a/src/sparql/config.R
+++ /dev/null
@@ -1,6 +0,0 @@
-library(readr)
-library(curl)
-library(XML)
-
-wdqs_uri <- "https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=";
-
diff --git a/src/sparql/rdfq.xml b/src/sparql/rdfq.xml
deleted file mode 100644
index 96487b2..0000000
--- a/src/sparql/rdfq.xml
+++ /dev/null
@@ -1,152 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE rdf:RDF [
-    <!ENTITY owl "http://www.w3.org/2002/07/owl#"; >
-    <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#"; >
-    <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#"; >
-    <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; >
-]>
-
-
-<RDF xmlns="http://wikiba.se/rdfq#"; 
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-     xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#";>
-  <prefixes>
-    
<prefix>PREFIX%20wikibase%3A%20%3Chttp%3A%2F%2Fwikiba.se%2Fontology%23%3E</prefix>
-    
<prefix>PREFIX%20wdt%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fdirect%2F%3E</prefix>
-    
<prefix>PREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3E</prefix>
-    
<prefix>PREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E</prefix>
-    
<prefix>PREFIX%20prov%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2Fns%2Fprov%23%3E</prefix>
-    
<prefix>PREFIX%20wdref%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Freference%2F%3E</prefix>
-  </prefixes>
-  <!-- Globe Coordinate Values -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s ?p 
wikibase:GlobecoordinateValue}</select>
-    <rdfs:comment>count Globecoordinate Value</rdfs:comment>
-  </rdf-query>
-
-  <!-- Time Values -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s a 
wikibase:TimeValue}</select>
-    <rdfs:comment>count TimeValue</rdfs:comment>
-  </rdf-query>
-
-  <!-- Quantity Values -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s a 
wikibase:QuantityValue}</select>
-    <rdfs:comment>count QuantityValue</rdfs:comment>
-  </rdf-query>
-
-  <!-- Preferred Ranks -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wikibase:rank 
wikibase:PreferredRank}</select>
-    <rdfs:comment>count PreferredRank</rdfs:comment>
-  </rdf-query>
-
-  <!-- Qualifiers -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:qualifier ?o}</select>
-    <rdfs:comment>count qualifier</rdfs:comment>
-  </rdf-query>
-
-  <!-- PropertyType is Wikibase Item -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:propertyType wikibase:WikibaseItem}</select>
-    <rdfs:comment>count WikibaseItem</rdfs:comment>
-  </rdf-query>
-
-  <!-- PropertyType is CommonsMedia -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:propertyType wikibase:CommonsMedia}</select>
-    <rdfs:comment>count Property Type = CommonsMedia</rdfs:comment>
-  </rdf-query>
-
-  <!-- PropertyType is Monolingualtext -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:propertyType wikibase:Monolingualtext}</select>
-    <rdfs:comment>count Property Type = Monolingualtext</rdfs:comment>
-  </rdf-query>
-
-  <!-- PropertyType is Quantity -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:propertyType wikibase:Quantity}</select>
-    <rdfs:comment>count Property Type = Quantity</rdfs:comment>
-  </rdf-query>
-
-  <!-- PropertyType is String -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:propertyType wikibase:String}</select>
-    <rdfs:comment>count Property Type = String</rdfs:comment>
-  </rdf-query>
-
-  <!-- PropertyType is Time -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:propertyType wikibase:Time}</select>
-    <rdfs:comment>count Property Type = Time</rdfs:comment>
-  </rdf-query>
-
-  <!-- PropertyType is URL -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
wikibase:propertyType wikibase:Url}</select>
-    <rdfs:comment>count Property Type = Url</rdfs:comment>
-  </rdf-query>
-
-  <!-- Wikimedia Categories -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P31 
wd:Q4167836}</select>
-    <rdfs:comment>count instance of Wikimedia Categories</rdfs:comment>
-  </rdf-query>
-
-  <!-- Commons Categories -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P373 
?o}</select>
-    <rdfs:comment>count has property Commons Category</rdfs:comment>
-  </rdf-query>
-
-  <!-- Country of Citizenship -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P27 
?o}</select>
-    <rdfs:comment>count has property Country of Citizenship</rdfs:comment>
-  </rdf-query>
-
-  <!-- Given Name -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P735 
?o}</select>
-    <rdfs:comment>count has property Given Name</rdfs:comment>
-  </rdf-query>
-
-  <!-- Humans -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P31 
wd:Q5}</select>
-    <rdfs:comment>count instance of Human</rdfs:comment>
-  </rdf-query>
-
-  <!-- Entities with VIAF -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P214 
?o}</select>
-    <rdfs:comment>count has property VIAF</rdfs:comment>
-  </rdf-query>
-
-  <!-- Entities with OCLC -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s wdt:P243 
?o}</select>
-    <rdfs:comment>count has property OCLC</rdfs:comment>
-  </rdf-query>
-
-  <!-- Get Property Label -->
-  <rdf-query>
-    <select>SELECT distinct ?o WHERE {wd:P735 ?p ?o
-    SERVICE wikibase:label {
-      bd:serviceParam wikibase:language "en" .
-      wd:P735 rdfs:label ?o
-    }
-  }</select>
-    <rdfs:comment>get Property Label e.g P735</rdfs:comment>
-  </rdf-query>
-
-  <!-- Statements Referenced to Wikipedia 
(wdref:004ec6fbee857649acdbdbad4f97b2c8571df97b) -->
-  <rdf-query>
-    <select>SELECT (count(distinct(?s)) AS ?scount) WHERE {?s 
prov:wasDerivedFrom wdref:004ec6fbee857649acdbdbad4f97b2c8571df97b}</select>
-    <rdfs:comment>Statements Referenced to Wikipedia (with GUID 
x)</rdfs:comment>
-  </rdf-query>
-</RDF>
-

-- 
To view, visit https://gerrit.wikimedia.org/r/260551
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I621e2cf9c292c214ad98cfa93ed8a573b63fc88c
Gerrit-PatchSet: 1
Gerrit-Project: analytics/limn-wikidata-data
Gerrit-Branch: master
Gerrit-Owner: Addshore <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to