Bearloga has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/374569 )
Change subject: Use the nice Hadoop queue ...................................................................... Use the nice Hadoop queue This patch makes the Hive queries use the recently created "nice" queue (T156841) that is lower priority, so that any users running fast queries get precedence. Change-Id: I1bd9c126ec42207afb443532405c0a4c5d52622f --- M CHANGELOG.md M modules/metrics/external_traffic/referer_data M modules/metrics/external_traffic/referer_nonbot_data M modules/metrics/maps/tile_aggregates.R M modules/metrics/maps/users_by_country M modules/metrics/portal/pageviews.R M modules/metrics/portal/referer_data M modules/metrics/search/cirrus_aggregates.R M modules/metrics/search/search_api_usage M modules/metrics/search/sister_search_traffic M modules/metrics/wdqs/basic_usage 11 files changed, 19 insertions(+), 10 deletions(-) Approvals: Joal: Looks good to me, but someone else must approve Chelsyx: Verified; Looks good to me, approved diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a3cece..8610ba9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Change Log (Patch Notes) All notable changes to this project will be documented in this file. +## 2017/08/29 +- Switched Hive queries to use the "nice" queue ([T156841](https://phabricator.wikimedia.org/T156841)). See [this section](https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hive/Queries#Run_long_queries_in_a_screen_session_and_in_the_nice_queue) for additional details. + +## 2017/08/28 +- Added search results page dwell time ([T170468](https://phabricator.wikimedia.org/T170468)) + ## 2017/08/01 - Added maplink and mapframe prevalence tracking across wikis ([T170022](https://phabricator.wikimedia.org/T170022)) diff --git a/modules/metrics/external_traffic/referer_data b/modules/metrics/external_traffic/referer_data index bc67f86..da2610a 100755 --- a/modules/metrics/external_traffic/referer_data +++ b/modules/metrics/external_traffic/referer_data @@ -1,6 +1,6 @@ #!/bin/bash -hive -S -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; +hive -S --hiveconf mapred.job.queue.name=nice -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; CREATE TEMPORARY FUNCTION is_external_search AS 'org.wikimedia.analytics.refinery.hive.IsExternalSearchUDF'; CREATE TEMPORARY FUNCTION get_engine AS 'org.wikimedia.analytics.refinery.hive.IdentifySearchEngineUDF'; USE wmf; diff --git a/modules/metrics/external_traffic/referer_nonbot_data b/modules/metrics/external_traffic/referer_nonbot_data index 98ad9ff..335afd3 100755 --- a/modules/metrics/external_traffic/referer_nonbot_data +++ b/modules/metrics/external_traffic/referer_nonbot_data @@ -1,6 +1,6 @@ #!/bin/bash -hive -S -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; +hive -S --hiveconf mapred.job.queue.name=nice -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; CREATE TEMPORARY FUNCTION is_external_search AS 'org.wikimedia.analytics.refinery.hive.IsExternalSearchUDF'; CREATE TEMPORARY FUNCTION get_engine AS 'org.wikimedia.analytics.refinery.hive.IdentifySearchEngineUDF'; USE wmf; diff --git a/modules/metrics/maps/tile_aggregates.R b/modules/metrics/maps/tile_aggregates.R index 74ead22..401fb20 100644 --- a/modules/metrics/maps/tile_aggregates.R +++ b/modules/metrics/maps/tile_aggregates.R @@ -28,7 +28,8 @@ # - tile requests per style per zoom, e.g. "osm-z10", "osm-z11", ... # Get the per-user tile usage: -query <- paste0("SELECT +query <- paste0("SET mapred.job.queue.name=nice; +SELECT date, style, zoom, scale, format, cache, user_id, is_automata, COUNT(1) AS n FROM ( SELECT diff --git a/modules/metrics/maps/users_by_country b/modules/metrics/maps/users_by_country index b01b6ad..f377c74 100755 --- a/modules/metrics/maps/users_by_country +++ b/modules/metrics/maps/users_by_country @@ -1,6 +1,6 @@ #!/bin/bash -export HADOOP_HEAPSIZE=1024 && hive -e "USE wmf; +export HADOOP_HEAPSIZE=1024 && hive -S --hiveconf mapred.job.queue.name=nice -e "USE wmf; WITH maps_users AS ( SELECT DISTINCT '$1' AS date, diff --git a/modules/metrics/portal/pageviews.R b/modules/metrics/portal/pageviews.R index 952fe1a..2d87eca 100644 --- a/modules/metrics/portal/pageviews.R +++ b/modules/metrics/portal/pageviews.R @@ -17,7 +17,8 @@ # Build query: date_clause <- as.character(as.Date(opt$date), format = "year = %Y AND month = %m AND day = %d") -query <- paste0("USE wmf; +query <- paste0("SET mapred.job.queue.name=nice; +USE wmf; SELECT client_ip, COUNT(1) AS pageviews diff --git a/modules/metrics/portal/referer_data b/modules/metrics/portal/referer_data index 2d54380..b71d0ac 100755 --- a/modules/metrics/portal/referer_data +++ b/modules/metrics/portal/referer_data @@ -1,6 +1,6 @@ #!/bin/bash -hive -S -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; +hive -S --hiveconf mapred.job.queue.name=nice -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; CREATE TEMPORARY FUNCTION is_external_search AS 'org.wikimedia.analytics.refinery.hive.IsExternalSearchUDF'; CREATE TEMPORARY FUNCTION get_engine AS 'org.wikimedia.analytics.refinery.hive.IdentifySearchEngineUDF'; USE wmf; diff --git a/modules/metrics/search/cirrus_aggregates.R b/modules/metrics/search/cirrus_aggregates.R index ced83e0..dc36172 100644 --- a/modules/metrics/search/cirrus_aggregates.R +++ b/modules/metrics/search/cirrus_aggregates.R @@ -23,7 +23,8 @@ # Build query: date_clause <- as.character(as.Date(opt$date), format = "year = %Y AND month = %m AND day = %d") -query <- paste0("ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; +query <- paste0("SET mapred.job.queue.name=nice; +ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; CREATE TEMPORARY FUNCTION array_sum AS 'org.wikimedia.analytics.refinery.hive.ArraySumUDF'; CREATE TEMPORARY FUNCTION is_spider as 'org.wikimedia.analytics.refinery.hive.IsSpiderUDF'; CREATE TEMPORARY FUNCTION ua_parser as 'org.wikimedia.analytics.refinery.hive.UAParserUDF'; diff --git a/modules/metrics/search/search_api_usage b/modules/metrics/search/search_api_usage index f9de476..6f1a4f8 100755 --- a/modules/metrics/search/search_api_usage +++ b/modules/metrics/search/search_api_usage @@ -1,6 +1,6 @@ #!/bin/bash -hive -S -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; +hive -S --hiveconf mapred.job.queue.name=nice -e "ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; CREATE TEMPORARY FUNCTION search_classify AS 'org.wikimedia.analytics.refinery.hive.GetSearchRequestTypeUDF'; USE wmf; SELECT diff --git a/modules/metrics/search/sister_search_traffic b/modules/metrics/search/sister_search_traffic index 76c091c..0e5b7c6 100755 --- a/modules/metrics/search/sister_search_traffic +++ b/modules/metrics/search/sister_search_traffic @@ -1,6 +1,6 @@ #!/bin/bash -hive -S -e "USE wmf; +hive -S --hiveconf mapred.job.queue.name=nice -e "USE wmf; ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar; CREATE TEMPORARY FUNCTION normalize_host AS 'org.wikimedia.analytics.refinery.hive.GetHostPropertiesUDF'; WITH sister_search_pvs AS ( diff --git a/modules/metrics/wdqs/basic_usage b/modules/metrics/wdqs/basic_usage index 2c6f954..531ead0 100755 --- a/modules/metrics/wdqs/basic_usage +++ b/modules/metrics/wdqs/basic_usage @@ -1,6 +1,6 @@ #!/bin/bash -hive -S -e "USE wmf; +hive -S --hiveconf mapred.job.queue.name=nice -e "USE wmf; SELECT '$1' AS date, IF(uri_path = '/sparql', '/bigdata/namespace/wdq/sparql', uri_path) AS path, -- To view, visit https://gerrit.wikimedia.org/r/374569 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I1bd9c126ec42207afb443532405c0a4c5d52622f Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/golden Gerrit-Branch: master Gerrit-Owner: Bearloga <[email protected]> Gerrit-Reviewer: Bearloga <[email protected]> Gerrit-Reviewer: Chelsyx <[email protected]> Gerrit-Reviewer: Joal <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
