Christopher Johnson (WMDE) has uploaded a new change for review. https://gerrit.wikimedia.org/r/251830
Change subject: adds utils.R to property_usage.R script ...................................................................... adds utils.R to property_usage.R script Change-Id: I5d2c84b8f9ef3e51c76852b3baca03c64f2e81f1 --- M src/scripts/property_usage.R M src/utils.R 2 files changed, 4 insertions(+), 36 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikidata/analytics/dashboard refs/changes/30/251830/1 diff --git a/src/scripts/property_usage.R b/src/scripts/property_usage.R index 61d1e93..ac87f36 100644 --- a/src/scripts/property_usage.R +++ b/src/scripts/property_usage.R @@ -1,20 +1,6 @@ #Bulk Query of WDQS for Property Use Counts and write to TSV source("/srv/dashboards/shiny-server/wdm/src/config.R") - -get_sparql_result_from_uri <- function(uri = wdmrdf_uri, prefix, query) { - xml_result <- readLines(curl(paste0(uri, prefix, query))) - doc = xmlParse(xml_result) - result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:uri", c(sq = "http://www.w3.org/2005/sparql-results#"))) - return(result) -} - -get_estimated_card_from_prop_predicate <- function(uri = estcard.uri, predicate) { - xml_result <- getForm(uri, p=paste0("<http://www.wikidata.org/prop/statement/", predicate, ">")) - doc = xmlParse(xml_result) - result = xpathApply(doc, "//data[@rangeCount]", xmlGetAttr, "rangeCount") - return(result) - -} +source("/srv/dashboards/shiny-server/wdm/src/utils.R") write_prop_usage_counts <- function() { query <- get_property_list_query() @@ -31,7 +17,8 @@ setkey(prop_counts, id) dt_join_prop_usage <- props[prop_counts] dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3)] - write.table(dtjoin_prop_usage, "/srv/dashboards/shiny-server/wdm/data/sparql/prop_usage.tsv", sep = "\t", row.names = FALSE, col.names = FALSE) + dt_join_prop_usage <- setnames(dt_join_prop_usage, c("Property", "Count")) + write.table(dt_join_prop_usage, "/srv/dashboards/shiny-server/wdm/data/sparql/prop_usage.tsv", sep = "\t", row.names = FALSE) } write_prop_usage_counts() \ No newline at end of file diff --git a/src/utils.R b/src/utils.R index ce61144..b858894 100644 --- a/src/utils.R +++ b/src/utils.R @@ -254,23 +254,4 @@ result = xpathApply(doc, "//data[@rangeCount]", xmlGetAttr, "rangeCount") return(result) -} - -write_prop_usage_counts <- function() { - query <- get_property_list_query() - prefix <- get_property_label_prefixes() - plist <- get_sparql_result_from_uri(wdmrdf_uri, prefix, query) - props <- lapply(plist, function(x) gsub("http://www.wikidata.org/entity/", "", x)) - values <- lapply(props$text, function(x) get_estimated_card_from_prop_predicate(estcard.uri, x)) - vals <- do.call(c, unlist(values, recursive=FALSE)) - prop_counts <- data.table(vals) - props <- data.table(props$text) - props$id <- seq_len(nrow(props)) - prop_counts$id <- seq_len(nrow(prop_counts)) - setkey(props, id) - setkey(prop_counts, id) - dt_join_prop_usage <- props[prop_counts] - dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3)] - write.table(dtjoin_prop_usage, "/srv/dashboards/shiny-server/wdm/data/sparql/prop_usage.tsv", sep = "\t", row.names = FALSE, col.names = FALSE) -} - +} \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/251830 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I5d2c84b8f9ef3e51c76852b3baca03c64f2e81f1 Gerrit-PatchSet: 1 Gerrit-Project: wikidata/analytics/dashboard Gerrit-Branch: master Gerrit-Owner: Christopher Johnson (WMDE) <christopher.john...@wikimedia.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits