DCausse has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/384989 )

Change subject: Fetch inner hits and only the first page
......................................................................

Fetch inner hits and only the first page

Change-Id: Ifc2dcb24111bfececa5c448f886f2db3a2b39aff
---
M oozie/query_clicks/hourly/query_clicks_hourly.hql
1 file changed, 5 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/analytics 
refs/changes/89/384989/1

diff --git a/oozie/query_clicks/hourly/query_clicks_hourly.hql 
b/oozie/query_clicks/hourly/query_clicks_hourly.hql
index 8b825b5..a9099e5 100644
--- a/oozie/query_clicks/hourly/query_clicks_hourly.hql
+++ b/oozie/query_clicks/hourly/query_clicks_hourly.hql
@@ -56,6 +56,7 @@
 ADD JAR 
hdfs://analytics-hadoop/user/ebernhardson/refinery-hive-0.0.39-SNAPSHOT.jar;
 CREATE TEMPORARY FUNCTION get_pageview_info AS 
'org.wikimedia.analytics.refinery.hive.GetPageviewInfoUDF';
 CREATE TEMPORARY FUNCTION get_main_search_request AS 
'org.wikimedia.analytics.refinery.hive.GetMainSearchRequestUDF';
+CREATE TEMPORARY FUNCTION get_main_search_request_index AS 
'org.wikimedia.analytics.refinery.hive.GetMainSearchRequestIndexUDF';
 
 -- Generate row_timestamp, start_timestamp and end_timestamp variables to allow
 -- requesting the specified year/month/day/hour, and the following hour, from 
the
@@ -156,9 +157,11 @@
         -- Make sure we only extract from content index
         AND SIZE(get_main_search_request(csrs.wikiid, csrs.requests).indices) 
== 1
         AND get_main_search_request(csrs.wikiid, csrs.requests).indices[0] 
LIKE '%_content'
+        -- Only fetch first page for simplicity
+        AND get_main_search_request(csrs.wikiid, csrs.requests).hitsoffset = 0
         -- We only want 'normal' requests here. if the user requested more than
         -- the default 20 results filter them out
-        AND SIZE(csrs.hits) <= 20
+        AND SIZE(get_main_search_request(csrs.wikiid, csrs.requests).hits) <= 
20
 )
 
 INSERT OVERWRITE TABLE
@@ -174,7 +177,7 @@
     search_req.timestamp,
     search_req.wikiid,
     search_req.project,
-    search_req.hits,
+    get_main_search_request(search_req.requests).hits,
     web_req.clicks
 FROM
     search_req

-- 
To view, visit https://gerrit.wikimedia.org/r/384989
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifc2dcb24111bfececa5c448f886f2db3a2b39aff
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/analytics
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to