Ottomata has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/403206 )
Change subject: Use hadoop cluster name variable in camus templates ...................................................................... Use hadoop cluster name variable in camus templates This lets camus be puppetized in labs Bug: T166248 Change-Id: I164c84408110a1ffebc169ff0800720ed2b192fa --- M modules/camus/templates/eventbus.erb M modules/camus/templates/eventlogging.erb M modules/camus/templates/mediawiki.erb M modules/camus/templates/mediawiki_job.erb M modules/camus/templates/webrequest.erb M modules/profile/manifests/analytics/refinery/job/camus.pp 6 files changed, 18 insertions(+), 15 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/06/403206/1 diff --git a/modules/camus/templates/eventbus.erb b/modules/camus/templates/eventbus.erb index cf294bb..3070cb9 100644 --- a/modules/camus/templates/eventbus.erb +++ b/modules/camus/templates/eventbus.erb @@ -10,17 +10,17 @@ # final top-level data output directory, sub-directory will be dynamically # created for each topic pulled -etl.destination.path=hdfs://analytics-hadoop/wmf/data/raw/event +etl.destination.path=hdfs://<%= @hadoop_cluster_name %>/wmf/data/raw/event # Allow overwrites of previously imported files in etl.destination.path etl.destination.overwrite=true # HDFS location where you want to keep execution files, i.e. offsets, # error logs, and count files -etl.execution.base.path=hdfs://analytics-hadoop/wmf/camus/eventbus +etl.execution.base.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/eventbus # where completed Camus job output directories are kept, usually a sub-dir in the base.path -etl.execution.history.path=hdfs://analytics-hadoop/wmf/camus/eventbus/history +etl.execution.history.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/eventbus/history # Our # Our timestamps look like 2013-09-20T15:40:17+00:00 diff --git a/modules/camus/templates/eventlogging.erb b/modules/camus/templates/eventlogging.erb index 9397226..009a9d9 100644 --- a/modules/camus/templates/eventlogging.erb +++ b/modules/camus/templates/eventlogging.erb @@ -8,12 +8,12 @@ mapreduce.job.queuename=default # final top-level data output directory, sub-directory will be dynamically created for each topic pulled -etl.destination.path=hdfs://analytics-hadoop/wmf/data/raw/eventlogging +etl.destination.path=hdfs://<%= @hadoop_cluster_name %>/wmf/data/raw/eventlogging etl.destination.overwrite=true # HDFS location where you want to keep execution files, i.e. offsets, error logs, and count files -etl.execution.base.path=hdfs://analytics-hadoop/wmf/camus/eventlogging +etl.execution.base.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/eventlogging # where completed Camus job output directories are kept, usually a sub-dir in the base.path -etl.execution.history.path=hdfs://analytics-hadoop/wmf/camus/eventlogging/history +etl.execution.history.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/eventlogging/history # Our timestamps look like 2013-09-20T15:40:17 camus.message.timestamp.format=yyyy-MM-dd'T'HH:mm:ss diff --git a/modules/camus/templates/mediawiki.erb b/modules/camus/templates/mediawiki.erb index 2fa12d4..1293b12 100644 --- a/modules/camus/templates/mediawiki.erb +++ b/modules/camus/templates/mediawiki.erb @@ -8,12 +8,12 @@ mapreduce.job.queuename=essential # final top-level data output directory, sub-directory will be dynamically created for each topic pulled -etl.destination.path=hdfs://analytics-hadoop/wmf/data/raw/mediawiki +etl.destination.path=hdfs://<%= @hadoop_cluster_name %>/wmf/data/raw/mediawiki etl.destination.overwrite=true # HDFS location where you want to keep execution files, i.e. offsets, error logs, and count files -etl.execution.base.path=hdfs://analytics-hadoop/wmf/camus/mediawiki +etl.execution.base.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/mediawiki # where completed Camus job output directories are kept, usually a sub-dir in the base.path -etl.execution.history.path=hdfs://analytics-hadoop/wmf/camus/mediawiki/history +etl.execution.history.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/mediawiki/history # Concrete implementation of the Decoder class to use. camus.message.decoder.class=org.wikimedia.analytics.refinery.camus.coders.AvroBinaryMessageDecoder diff --git a/modules/camus/templates/mediawiki_job.erb b/modules/camus/templates/mediawiki_job.erb index 528ecbf..2babc94 100644 --- a/modules/camus/templates/mediawiki_job.erb +++ b/modules/camus/templates/mediawiki_job.erb @@ -10,17 +10,17 @@ # final top-level data output directory, sub-directory will be dynamically # created for each topic pulled -etl.destination.path=hdfs://analytics-hadoop/wmf/data/raw/mediawiki_job +etl.destination.path=hdfs://<%= @hadoop_cluster_name %>/wmf/data/raw/mediawiki_job # Allow overwrites of previously imported files in etl.destination.path etl.destination.overwrite=true # HDFS location where you want to keep execution files, i.e. offsets, # error logs, and count files -etl.execution.base.path=hdfs://analytics-hadoop/wmf/camus/mediawiki_job +etl.execution.base.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/mediawiki_job # where completed Camus job output directories are kept, usually a sub-dir in the base.path -etl.execution.history.path=hdfs://analytics-hadoop/wmf/camus/mediawiki_job/history +etl.execution.history.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/mediawiki_job/history # Our timestamps look like 2013-09-20T15:40:17+00:00 camus.message.timestamp.format=yyyy-MM-dd'T'HH:mm:ssXXX diff --git a/modules/camus/templates/webrequest.erb b/modules/camus/templates/webrequest.erb index dc3c501..187ee7f 100644 --- a/modules/camus/templates/webrequest.erb +++ b/modules/camus/templates/webrequest.erb @@ -13,12 +13,12 @@ fs.permissions.umask-mode=027 # final top-level data output directory, sub-directory will be dynamically created for each topic pulled -etl.destination.path=hdfs://analytics-hadoop/wmf/data/raw/webrequest +etl.destination.path=hdfs://<%= @hadoop_cluster_name %>/wmf/data/raw/webrequest etl.destination.overwrite=true # HDFS location where you want to keep execution files, i.e. offsets, error logs, and count files -etl.execution.base.path=hdfs://analytics-hadoop/wmf/camus/webrequest +etl.execution.base.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/webrequest # where completed Camus job output directories are kept, usually a sub-dir in the base.path -etl.execution.history.path=hdfs://analytics-hadoop/wmf/camus/webrequest/history +etl.execution.history.path=hdfs://<%= @hadoop_cluster_name %>/wmf/camus/webrequest/history # Our timestamps look like 2013-09-20T15:40:17 camus.message.timestamp.format=yyyy-MM-dd'T'HH:mm:ss diff --git a/modules/profile/manifests/analytics/refinery/job/camus.pp b/modules/profile/manifests/analytics/refinery/job/camus.pp index c471806..69cea54 100644 --- a/modules/profile/manifests/analytics/refinery/job/camus.pp +++ b/modules/profile/manifests/analytics/refinery/job/camus.pp @@ -14,6 +14,9 @@ $kafka_config = kafka_config($kafka_cluster_name) + # Used when configuring hdfs paths in camus templates. + $hadoop_cluster_name = $::profile::analytics::hadoop::common::cluster_name + # Make all uses of camus::job set default kafka_brokers and camus_jar. # If you build a new camus or refinery, and you want to use it, you'll # need to change these. You can also override these defaults -- To view, visit https://gerrit.wikimedia.org/r/403206 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I164c84408110a1ffebc169ff0800720ed2b192fa Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <ao...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits