Joal has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/405899 )

Change subject: Add ISP data to webrequest table
......................................................................

Add ISP data to webrequest table

Update script for table creation and modify oozie loading job.

Bug: T167907
Change-Id: Id2f73e702b020459e2348a31e429df0ca0b30a2d
---
M hive/webrequest/create_webrequest_table.hql
M oozie/webrequest/load/bundle.properties
M oozie/webrequest/load/refine_webrequest.hql
3 files changed, 7 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/99/405899/1

diff --git a/hive/webrequest/create_webrequest_table.hql 
b/hive/webrequest/create_webrequest_table.hql
index 9837680..749d1a6 100644
--- a/hive/webrequest/create_webrequest_table.hql
+++ b/hive/webrequest/create_webrequest_table.hql
@@ -55,7 +55,8 @@
     `pageview_info`     map<string, string>  COMMENT 'map containing project, 
language_variant and page_title values only when is_pageview = TRUE.',
     `page_id`           bigint  COMMENT 'MediaWiki page_id for this page 
title. For redirects this could be the page_id of the redirect or the page_id 
of the target. This may not always be set, even if the page is actually a 
pageview.',
     `namespace_id`      int     COMMENT 'MediaWiki namespace_id for this page 
title. This may not always be set, even if the page is actually a pageview.',
-    `tags`              array<string> COMMENT 'List containing tags qualifying 
the request, ex: [portal, wikidata]. Will be used to split webrequest into 
smaller subsets.'
+    `tags`              array<string> COMMENT 'List containing tags qualifying 
the request, ex: [portal, wikidata]. Will be used to split webrequest into 
smaller subsets.',
+    `isp_data`          map<string, string>  COMMENT 'Internet Service 
Provider data in a map with keys isp, organization, 
autonomous_system_organization and autonomous_system_number'
 
 )
 PARTITIONED BY (
diff --git a/oozie/webrequest/load/bundle.properties 
b/oozie/webrequest/load/bundle.properties
index ef592c8..58760ca 100644
--- a/oozie/webrequest/load/bundle.properties
+++ b/oozie/webrequest/load/bundle.properties
@@ -57,10 +57,10 @@
 webrequest_table                  = wmf.webrequest
 
 # Version of Hive UDF jar to import
-refinery_jar_version              = 0.0.53
+refinery_jar_version              = 0.0.58
 
 # Record version to keep track of changes
-record_version                    = 0.0.19
+record_version                    = 0.0.20
 
 # Hive table name.
 statistics_table                  = wmf_raw.webrequest_sequence_stats
diff --git a/oozie/webrequest/load/refine_webrequest.hql 
b/oozie/webrequest/load/refine_webrequest.hql
index 591f83c..a6e7676 100644
--- a/oozie/webrequest/load/refine_webrequest.hql
+++ b/oozie/webrequest/load/refine_webrequest.hql
@@ -57,6 +57,7 @@
 CREATE TEMPORARY FUNCTION get_pageview_info AS 
'org.wikimedia.analytics.refinery.hive.GetPageviewInfoUDF';
 CREATE TEMPORARY FUNCTION normalize_host AS 
'org.wikimedia.analytics.refinery.hive.HostNormalizerUDF';
 CREATE TEMPORARY FUNCTION get_tags AS 
'org.wikimedia.analytics.refinery.hive.GetWebrequestTagsUDF';
+CREATE TEMPORARY FUNCTION isp_data as 
'org.wikimedia.analytics.refinery.hive.GetISPDataUDF';
 
 
 INSERT OVERWRITE TABLE ${destination_table}
@@ -113,7 +114,8 @@
           WHEN '-' THEN NULL
           ELSE str_to_map(x_analytics, '\;', '=')['ns']
         END as namespace_id,
-        get_tags(uri_host, uri_path, uri_query, http_status, content_type, 
user_agent, x_analytics) as tags
+        get_tags(uri_host, uri_path, uri_query, http_status, content_type, 
user_agent, x_analytics) as tags,
+        isp_data(ip) as isp_data
     FROM
         ${source_table}
     WHERE

-- 
To view, visit https://gerrit.wikimedia.org/r/405899
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id2f73e702b020459e2348a31e429df0ca0b30a2d
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to