Nuria has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/327845 )
Change subject: i[WIP] POC of loading tile data into pivot
......................................................................
i[WIP] POC of loading tile data into pivot
Code and config likely to be much improved
Bug: T151832
Change-Id: Ie3ee2a33aeef68c970cd0284529a20803bfc7700
---
A oozie/maps/druid/load_map_tiles.template.json
A oozie/maps/druid/tiles_table.hql
2 files changed, 105 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/45/327845/1
diff --git a/oozie/maps/druid/load_map_tiles.template.json
b/oozie/maps/druid/load_map_tiles.template.json
new file mode 100644
index 0000000..442ee84
--- /dev/null
+++ b/oozie/maps/druid/load_map_tiles.template.json
@@ -0,0 +1,73 @@
+{
+ "type" : "index_hadoop",
+ "spec" : {
+ "ioConfig" : {
+ "type" : "hadoop",
+ "inputSpec" : {
+ "type" : "static",
+ "paths" : "*INPUT_PATH*"
+ }
+ },
+ "dataSchema" : {
+ "dataSource" : "tiles-poc",
+ "granularitySpec" : {
+ "type" : "uniform",
+ "segmentGranularity" : "day",
+ "queryGranularity" : "hour",
+ "intervals" : *INTERVALS_ARRAY*
+ },
+ "parser" : {
+ "type" : "string",
+ "parseSpec" : {
+ "format" : "json",
+ "dimensionsSpec" : {
+ "dimensions" : [
+ "project",
+ "agent_type",
+ "language",
+ "continent",
+ "country_code",
+ "country",
+ "ua_browser_family",
+ "ua_browser_major",
+ "ua_os_family",
+ "ua_os_major",
+ "ua_os_minor",
+ "host",
+ "used_on",
+ "style",
+ "zoom",
+ "scale",
+ "format",
+ ]
+ },
+ "timestampSpec" : {
+ "format" : "auto",
+ "column" : "ts"
+ }
+ }
+ },
+ "metricsSpec" : [
+ {
+ "name" : "tiles",
+ "type" : "count",
+ "fieldName": "tiles"
+ }
+ ]
+ },
+ "tuningConfig" : {
+ "type" : "hadoop",
+ "ignoreInvalidRows" : false,
+ "partitionsSpec" : {
+ "type" : "hashed",
+ "numShards" : 8
+ },
+ "jobProperties" : {
+ "mapreduce.reduce.memory.mb" : "8192",
+ "mapreduce.output.fileoutputformat.compress":
"org.apache.hadoop.io.compress.GzipCodec",
+ "mapreduce.job.queuename": "*HADOOP_QUEUE*"
+ }
+ }
+ }
+}
+
diff --git a/oozie/maps/druid/tiles_table.hql b/oozie/maps/druid/tiles_table.hql
new file mode 100644
index 0000000..d94d9cc
--- /dev/null
+++ b/oozie/maps/druid/tiles_table.hql
@@ -0,0 +1,32 @@
+SET parquet.compression = SNAPPY;
+
+CREATE TABLE IF NOT EXISTS ${destination_table} (
+ ts STRING COMMENT 'Timestamp, formatted as YYYY-MM-DDTHH:00:00Z',
+ agent_type STRING COMMENT 'user or bot/tool',
+ continent STRING COMMENT 'Continent of the accessing agents (computed using
maxmind GeoIP database)',
+ country_code STRING COMMENT 'Country ISO code of the accessing agents
(computed using maxmind GeoIP database)',
+ country STRING COMMENT 'Country (text) of the accessing agents (computed
using maxmind GeoIP database)',
+ ua_browser_family STRING COMMENT 'Browser extracted from UA (e.g. Firefox)',
+ ua_browser_major STRING,
+ ua_device_family STRING COMMENT 'Device extracted from User Agent',
+ ua_os_family STRING COMMENT 'Operating system extracted from User Agent',
+ ua_os_major STRING,
+ ua_os_minor STRING,
+ host STRING COMMENT 'Host of request (wikimedia or other)',
+ project STRING COMMENT 'Project name from request referer if it is a WMF
site (e.g. wikivoyage)',
+ language STRING COMMENT 'Language prefix from request referer if it is a WMF
site (e.g. ru)',
+ used_on STRING COMMENT 'A specific wiki page, Not a specific wiki page,
Wikimedia Labs, GeoHack, or -',
+ style STRING COMMENT 'e.g. osm-intl',
+ zoom STRING COMMENT 'zoom level of the tiles (1-18)',
+ scale STRING COMMENT 'e.g. 1.5',
+ format STRING COMMENT 'e.g. png',
+ tiles BIGINT COMMENT 'Number of tiles successfully requested'
+)
+COMMENT 'This is a table of Kartotherian usage (counts of tiles successfully
served).'
+PARTITIONED BY (
+ year INT COMMENT 'Unpadded year',
+ month INT COMMENT 'Unpadded month',
+ day INT COMMENT 'Unpadded day',
+ hour INT COMMENT 'Unpadded hour'
+)
+STORED AS SEQUENCEFILE;
--
To view, visit https://gerrit.wikimedia.org/r/327845
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie3ee2a33aeef68c970cd0284529a20803bfc7700
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Nuria <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits