BIGTOP-2325. Deployment recipes for HAWQ
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/10435d6e Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/10435d6e Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/10435d6e Branch: refs/heads/BIGTOP-2320 Commit: 10435d6e7b1db00e1a443894b33c18cab8001486 Parents: bb700d7 Author: Konstantin Boudnik <[email protected]> Authored: Tue Feb 16 21:27:27 2016 +0300 Committer: Konstantin Boudnik <[email protected]> Committed: Fri Mar 18 21:08:01 2016 -0700 ---------------------------------------------------------------------- .../puppet/hieradata/bigtop/cluster.yaml | 6 + bigtop-deploy/puppet/manifests/cluster.pp | 4 + .../puppet/modules/hawq/templates/gpcheck.cnf | 58 ++++ .../puppet/modules/hawq/templates/hawq-site.xml | 158 +++++++++ .../puppet/modules/hawq/templates/hawq.default | 40 +++ .../modules/hawq/templates/hdfs-client.xml | 331 +++++++++++++++++++ .../puppet/modules/hawq/templates/sysctl.conf | 24 ++ .../modules/hawq/templates/yarn-client.xml | 123 +++++++ bigtop-deploy/puppet/modules/hawq/tests/init.pp | 16 + .../src/common/hadoop/init-hcfs.json | 1 + bigtop-packages/src/common/hawq/hawq-master.svc | 4 +- bigtop-packages/src/common/hawq/hawq.default | 10 + bigtop-packages/src/deb/hawq/rules | 4 +- bigtop.bom | 2 +- bigtop_toolchain/manifests/libhdfs.pp | 39 ++- bigtop_toolchain/manifests/packages.pp | 2 +- 16 files changed, 809 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml index de98502..cf8ffcb 100644 --- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml +++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml @@ -174,3 +174,9 @@ zeppelin::server::spark_master_url: "yarn-client" zeppelin::server::hiveserver2_url: "jdbc:hive2://%{hiera('hadoop-hive::common::hiveserver2_host')}:%{hiera('hadoop-hive::common::hiveserver2_port')}" zeppelin::server::hiveserver2_user: "%{hiera('bigtop::hiveserver2_user')}" zeppelin::server::hiveserver2_password: "%{hiera('bigtop::hiveserver2_password')}" + +# hawq +bigtop::hawq_master_node: "%{hiera('bigtop::hawq_master_node')}" +bigtop::hawq_master_port: "5432" +bigtop::hawq_master_datadir: "/var/run/hawq/work/masterdd" +bigtop::hawq_segment_datadir: "/var/run/hawq/work/segmendd" http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/manifests/cluster.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/manifests/cluster.pp b/bigtop-deploy/puppet/manifests/cluster.pp index a0be567..f80ef5a 100644 --- a/bigtop-deploy/puppet/manifests/cluster.pp +++ b/bigtop-deploy/puppet/manifests/cluster.pp @@ -105,6 +105,9 @@ $roles_map = { zeppelin => { master => ["zeppelin-server"], }, + hawq => { + master => ["hawq"], + }, } class hadoop_cluster_node ( @@ -159,6 +162,7 @@ class node_with_roles ($roles = hiera("bigtop::roles")) inherits hadoop_cluster_ "hadoop_hive", "hadoop_oozie", "hadoop_pig", + "hawq", "sqoop2", "hadoop_zookeeper", "hcatalog", http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf b/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf new file mode 100644 index 0000000..11ae02f --- /dev/null +++ b/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf @@ -0,0 +1,58 @@ +[global] +configfile_version = 4 + +[linux.mount] +mount.points = / + +[linux.sysctl] +sysctl.kernel.shmmax = 500000000 +sysctl.kernel.shmmni = 4096 +sysctl.kernel.shmall = 4000000000 +sysctl.kernel.sem = 250 512000 100 2048 +sysctl.kernel.sysrq = 1 +sysctl.kernel.core_uses_pid = 1 +sysctl.kernel.msgmnb = 65536 +sysctl.kernel.msgmax = 65536 +sysctl.kernel.msgmni = 2048 +sysctl.net.ipv4.tcp_syncookies = 0 +sysctl.net.ipv4.ip_forward = 0 +sysctl.net.ipv4.conf.default.accept_source_route = 0 +sysctl.net.ipv4.tcp_tw_recycle = 1 +sysctl.net.ipv4.tcp_max_syn_backlog = 200000 +sysctl.net.ipv4.conf.all.arp_filter = 1 +sysctl.net.ipv4.ip_local_port_range = 1281 65535 +sysctl.net.core.netdev_max_backlog = 200000 +sysctl.vm.overcommit_memory = 2 +sysctl.fs.nr_open = 3000000 +sysctl.kernel.threads-max = 798720 +sysctl.kernel.pid_max = 798720 +# increase network +sysctl.net.core.rmem_max = 2097152 +sysctl.net.core.wmem_max = 2097152 + +[linux.limits] +soft.nofile = 2900000 +hard.nofile = 2900000 +soft.nproc = 131072 +hard.nproc = 131072 + +[linux.diskusage] +diskusage.monitor.mounts = / +diskusage.monitor.usagemax = 90% + +[hdfs] +dfs.mem.namenode.heap = 40960 +dfs.mem.datanode.heap = 6144 +# in hdfs-site.xml +dfs.support.append = true +dfs.client.enable.read.from.local = true +dfs.block.local-path-access.user = gpadmin +dfs.datanode.max.transfer.threads = 40960 +dfs.client.socket-timeout = 300000000 +dfs.datanode.socket.write.timeout = 7200000 +dfs.namenode.handler.count = 60 +ipc.server.handler.queue.size = 3300 +dfs.datanode.handler.count = 60 +ipc.client.connection.maxidletime = 3600000 +dfs.namenode.accesstime.precision = -1 + http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml b/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml new file mode 100644 index 0000000..713fa40 --- /dev/null +++ b/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml @@ -0,0 +1,158 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<configuration> + <property> + <name>hawq_master_address_host</name> + <value><%= @hawq_head %></value> + <description>The host name of hawq master.</description> + </property> + + <property> + <name>hawq_master_address_port</name> + <value><%= @hawq_head_port %></value> + <description>The port of hawq master.</description> + </property> + + <property> + <name>hawq_standby_address_host</name> + <value>none</value> + <description>The host name of hawq standby master.</description> + </property> + + <property> + <name>hawq_segment_address_port</name> + <value>40000</value> + <description>The port of hawq segment.</description> + </property> + + <property> + <name>hawq_dfs_url</name> + <value><%= @hadoop_head_node %>:<%= @hadoop_namenode_port %>/hawq_default</value> + <description>URL for accessing HDFS.</description> + </property> + + <property> + <name>hawq_master_directory</name> + <value><% @hawq_masterdata_dir %></value> + <description>The directory of hawq master.</description> + </property> + + <property> + <name>hawq_segment_directory</name> + <value><% @hawq_segmentdata_dir %>d</value> + <description>The directory of hawq segment.</description> + </property> + + <property> + <name>hawq_master_temp_directory</name> + <value>/tmp</value> + <description>The temporary directory reserved for hawq master.</description> + </property> + + <property> + <name>hawq_segment_temp_directory</name> + <value>/tmp</value> + <description>The temporary directory reserved for hawq segment.</description> + </property> + + <!-- HAWQ resource manager parameters --> + <property> + <name>hawq_global_rm_type</name> + <value>none</value> + <description>The resource manager type to start for allocating resource. + 'none' means hawq resource manager exclusively uses whole + cluster; 'yarn' means hawq resource manager contacts YARN + resource manager to negotiate resource. + </description> + </property> + + <property> + <name>hawq_rm_memory_limit_perseg</name> + <value>64GB</value> + <description>The limit of memory usage in a hawq segment when + hawq_global_rm_type is set 'none'. + </description> + </property> + + <property> + <name>hawq_rm_nvcore_limit_perseg</name> + <value>16</value> + <description>The limit of virtual core usage in a hawq segment when + hawq_global_rm_type is set 'none'. + </description> + </property> + + <property> + <name>hawq_rm_yarn_address</name> + <value><%= @hawq_yarn_rm_host %>:<%= @hawq_yarn_rm_port %></value> + <description>The address of YARN resource manager server.</description> + </property> + + <property> + <name>hawq_rm_yarn_scheduler_address</name> + <value>localhost:8030</value> + <description>The address of YARN scheduler server.</description> + </property> + + <property> + <name>hawq_rm_yarn_queue_name</name> + <value>default</value> + <description>The YARN queue name to register hawq resource manager.</description> + </property> + + <property> + <name>hawq_rm_yarn_app_name</name> + <value>hawq</value> + <description>The application name to register hawq resource manager in YARN.</description> + </property> + <!-- HAWQ resource manager parameters end here. --> + + <!-- HAWQ resource enforcement parameters --> + <property> + <name>hawq_re_cpu_enable</name> + <value>false</value> + <description>The control to enable/disable CPU resource enforcement.</description> + </property> + + <property> + <name>hawq_re_cgroup_mount_point</name> + <value>/sys/fs/cgroup</value> + <description>The mount point of CGroup file system for resource enforcement. + For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system. + </description> + </property> + + <property> + <name>hawq_re_cgroup_hierarchy_name</name> + <value>hawq</value> + <description>The name of the hierarchy to accomodate CGroup directories/files for resource enforcement. + For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system. + </description> + </property> + + <property> + <name>hawq_rm_nvseg_perquery_perseg_limit</name> + <value>8</value> + <description>This is something that init tries to push in</description> + </property> + <!-- HAWQ resource enforcement parameters end here. --> +</configuration> http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/modules/hawq/templates/hawq.default ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hawq.default b/bigtop-deploy/puppet/modules/hawq/templates/hawq.default new file mode 100644 index 0000000..8190260 --- /dev/null +++ b/bigtop-deploy/puppet/modules/hawq/templates/hawq.default @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export HAWQ_HOME="/usr/lib/hawq" +## Due to some weird scripting, hawq doesn't work without GPHOME +export GPHOME=$HAWQ_HOME +export HAWQ_CONF_DIR="/etc/hawq/conf" + +export HAWQ_PID_DIR="/var/run/hawq" +export HAWQ_LOG_DIR="/var/log/hawq" +export HAWQ_WORK_DIR="/var/run/hawq/work" +export HAWQ_IDENT_STRING=hawq + +export PATH=$HAWQ_HOME/bin:$HAWQ_HOME/ext/python/bin:$PATH +export LD_LIBRARY_PATH=$HAWQ_HOME/lib:$HAWQ_HOME/ext/python/lib:$LD_LIBRARY_PATH +export PYTHONPATH=$HAWQ_HOME/lib/python:$HAWQ_HOME/lib/python/pygresql:$PYTHONPATH +export OPENSSL_CONF=$HAWQ_CONF_DIR/openssl.cnf +export LIBHDFS3_CONF=$HAWQ_CONF_DIR/hdfs-client.xml +export LIBYARN_CONF=$HAWQ_CONF_DIR/yarn-client.xml +export HAWQSITE_CONF=$HAWQ_CONF_DIR/hawq-site.xml + +export HAWQ_MASTER_PORT=5432 +export HAWQ_SEGMENT_PORT=40000 +export HAWQ_MASTERDATA_DIR=<%= @hawq_masterdata_dir %> +export HAWQ_SEGMENTDATA_DIR=<%= @hawq_segmentdata_dir %> + +export HAWQ_TIMEOUT=30 +export HAWQ_SHUTDOWN_MODE=smart http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml b/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml new file mode 100644 index 0000000..3f08696 --- /dev/null +++ b/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml @@ -0,0 +1,331 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + + +<configuration> + + <!-- KDC + <property> + <name>hadoop.security.authentication</name> + <value>kerberos</value> + </property> + KDC --> + + <!-- HA + <property> + <name>dfs.nameservices</name> + <value>phdcluster</value> + </property> + + <property> + <name>dfs.ha.namenodes.phdcluster</name> + <value>nn1,nn2</value> + </property> + + <property> + <name>dfs.namenode.rpc-address.phdcluster.nn1</name> + <value>mdw:9000</value> + </property> + + <property> + <name>dfs.namenode.rpc-address.phdcluster.nn2</name> + <value>smdw:9000</value> + </property> + +<property> +<name>dfs.namenode.http-address.phdcluster.nn1</name> +<value>mdw:50070</value> +</property> + +<property> +<name>dfs.namenode.http-address.phdcluster.nn2</name> +<value>smdw:50070</value> +</property> + +HA --> + + <!-- RPC client configuration --> + <property> + <name>rpc.client.timeout</name> + <value>3600000</value> + <description> + timeout interval of a RPC invocation in millisecond. default is 3600000. + </description> + </property> + <property> + <name>rpc.client.connect.tcpnodelay</name> + <value>true</value> + <description> + whether set socket TCP_NODELAY to true when connect to RPC server. default is true. + </description> + </property> + + <property> + <name>rpc.client.max.idle</name> + <value>10000</value> + <description> + the max idle time of a RPC connection in millisecond. default is 10000. + </description> + </property> + + <property> + <name>rpc.client.ping.interval</name> + <value>10000</value> + <description> + the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000. + </description> + </property> + + <property> + <name>rpc.client.connect.timeout</name> + <value>600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000. + </description> + </property> + + <property> + <name>rpc.client.connect.retry</name> + <value>10</value> + <description> + the max retry times if the RPC client fail to setup the connection to server. default is 10. + </description> + </property> + + <property> + <name>rpc.client.read.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000. + </description> + </property> + + <property> + <name>rpc.client.write.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000. + </description> + </property> + + <property> + <name>rpc.client.socket.linger.timeout</name> + <value>-1</value> + <description> + set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1. + </description> + </property> + + <!-- dfs client configuration --> + <property> + <name>dfs.client.read.shortcircuit</name> + <value>true</value> + <description> + whether reading block file bypass datanode if the block and the client are on the same node. default is true. + </description> + </property> + + <property> + <name>dfs.default.replica</name> + <value>3</value> + <description> + the default number of replica. default is 3. + </description> + </property> + + <property> + <name>dfs.prefetchsize</name> + <value>10</value> + <description> + the default number of blocks which information will be prefetched. default is 10. + </description> + </property> + + <property> + <name>dfs.client.failover.max.attempts</name> + <value>15</value> + <description> + if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15. + </description> + </property> + + <property> + <name>dfs.default.blocksize</name> + <value>134217728</value> + <description> + default block size. default is 134217728. + </description> + </property> + + <property> + <name>dfs.client.log.severity</name> + <value>INFO</value> + <description> + the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO. + </description> + </property> + + <!-- input client configuration --> + <property> + <name>input.connect.timeout</name> + <value>600000</value> + <description> + the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000. + </description> + </property> + + <property> + <name>input.read.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000. + </description> + </property> + + <property> + <name>input.write.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000. + </description> + </property> + + <property> + <name>input.localread.default.buffersize</name> + <value>2097152</value> + <description> + number of bytes of the buffer which is used to hold the data from block file and verify checksum. + it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576. + </description> + </property> + + <property> + <name>input.localread.blockinfo.cachesize</name> + <value>1000</value> + <description> + the size of block file path information cache. default is 1000. + </description> + </property> + + <property> + <name>input.read.getblockinfo.retry</name> + <value>3</value> + <description> + the max retry times when the client fail to get block information from namenode. default is 3. + </description> + </property> + + <!-- output client configuration --> + <property> + <name>output.replace-datanode-on-failure</name> + <value>false</value> + <description> + whether the client add new datanode into pipeline if the number of nodes in pipeline is less the specified number of replicas. default is true. + </description> + </property> + + <property> + <name>output.default.chunksize</name> + <value>512</value> + <description> + the number of bytes of a chunk in pipeline. default is 512. + </description> + </property> + + <property> + <name>output.default.packetsize</name> + <value>65536</value> + <description> + the number of bytes of a packet in pipeline. default is 65536. + </description> + </property> + + <property> + <name>output.default.write.retry</name> + <value>10</value> + <description> + the max retry times when the client fail to setup the pipeline. default is 10. + </description> + </property> + + <property> + <name>output.connect.timeout</name> + <value>600000</value> + <description> + the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000. + </description> + </property> + + <property> + <name>output.read.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000. + </description> + </property> + + <property> + <name>output.write.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000. + </description> + </property> + + <property> + <name>output.packetpool.size</name> + <value>1024</value> + <description> + the max number of packets in a file's packet pool. default is 1024. + </description> + </property> + + <property> + <name>output.close.timeout</name> + <value>900000</value> + <description> + the timeout interval in millisecond when close an output stream. default is 900000. + </description> + </property> + + <property> + <name>dfs.domain.socket.path</name> + <value>/var/lib/hadoop-hdfs/dn_socket</value> + <description> + Optional. This is a path to a UNIX domain socket that will be used for + communication between the DataNode and local HDFS clients. + If the string "_PORT" is present in this path, it will be replaced by the + TCP port of the DataNode. + </description> + </property> + + <property> + <name>dfs.client.use.legacy.blockreader.local</name> + <value>false</value> + <description> + Legacy short-circuit reader implementation based on HDFS-2246 is used + if this configuration parameter is true. + This is for the platforms other than Linux + where the new implementation based on HDFS-347 is not available. + </description> + </property> + +</configuration> http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf b/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf new file mode 100644 index 0000000..2ac1598 --- /dev/null +++ b/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf @@ -0,0 +1,24 @@ +kernel.shmmax = 1000000000 +kernel.shmmni = 4096 +kernel.shmall = 4000000000 +kernel.sem = 250 512000 100 2048 +kernel.sysrq = 1 +kernel.core_uses_pid = 1 +kernel.msgmnb = 65536 +kernel.msgmax = 65536 +kernel.msgmni = 2048 +net.ipv4.tcp_syncookies = 0 +net.ipv4.ip_forward = 0 +net.ipv4.conf.default.accept_source_route = 0 +net.ipv4.tcp_tw_recycle = 1 +net.ipv4.tcp_max_syn_backlog = 200000 +net.ipv4.conf.all.arp_filter = 1 +net.ipv4.ip_local_port_range = 1281 65535 +net.core.netdev_max_backlog = 200000 +vm.overcommit_memory = 2 +fs.nr_open = 3000000 +kernel.threads-max = 798720 +kernel.pid_max = 798720 +# increase network +net.core.rmem_max=2097152 +net.core.wmem_max=2097152 http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml b/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml new file mode 100644 index 0000000..a5f9df7 --- /dev/null +++ b/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml @@ -0,0 +1,123 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + + +<configuration> + + <!-- KDC + <property> + <name>hadoop.security.authentication</name> + <value>kerberos</value> + </property> + KDC --> + + <!-- HA + <property> + <name>yarn.resourcemanager.ha</name> + <value>%RESOURCEMANAGER%:8032,%RESOURCEMANAGER2%:8032</value> + </property> + <property> + <name>yarn.resourcemanager.scheduler.ha</name> + <value>%RESOURCEMANAGER%:8030,%RESOURCEMANAGER2%:8030</value> + </property> + HA --> + + <!-- RPC client configuration --> + <property> + <name>rpc.client.timeout</name> + <value>3600000</value> + <description> + timeout interval of a RPC invocation in millisecond. default is 3600000. + </description> + </property> + + <property> + <name>rpc.client.connect.tcpnodelay</name> + <value>true</value> + <description> + whether set socket TCP_NODELAY to true when connect to RPC server. default is true. + </description> + </property> + + <property> + <name>rpc.client.max.idle</name> + <value>10000</value> + <description> + the max idle time of a RPC connection in millisecond. default is 10000. + </description> + </property> + + <property> + <name>rpc.client.ping.interval</name> + <value>10000</value> + <description> + the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000. + </description> + </property> + + <property> + <name>rpc.client.connect.timeout</name> + <value>600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000. + </description> + </property> + + <property> + <name>rpc.client.connect.retry</name> + <value>10</value> + <description> + the max retry times if the RPC client fail to setup the connection to server. default is 10. + </description> + </property> + + <property> + <name>rpc.client.read.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000. + </description> + </property> + + <property> + <name>rpc.client.write.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000. + </description> + </property> + + <property> + <name>rpc.client.socket.linger.timeout</name> + <value>-1</value> + <description> + set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1. + </description> + </property> + + <property> + <name>yarn.client.failover.max.attempts</name> + <value>15</value> + <description> + if multiply resource managers are configured, it is the max retry times when the yarn client try to issue a RPC call. default is 15. + </description> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-deploy/puppet/modules/hawq/tests/init.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/hawq/tests/init.pp b/bigtop-deploy/puppet/modules/hawq/tests/init.pp new file mode 100644 index 0000000..37a5a08 --- /dev/null +++ b/bigtop-deploy/puppet/modules/hawq/tests/init.pp @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +hawq::cluster_node { "test-hawq-node": } http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-packages/src/common/hadoop/init-hcfs.json ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/common/hadoop/init-hcfs.json b/bigtop-packages/src/common/hadoop/init-hcfs.json index f71f385..dea5f90 100644 --- a/bigtop-packages/src/common/hadoop/init-hcfs.json +++ b/bigtop-packages/src/common/hadoop/init-hcfs.json @@ -64,6 +64,7 @@ ["/tmp/hadoop-yarn","777","mapred","mapred"], ["/var/log/hadoop-yarn/apps","1777","yarn","mapred"], ["/hbase",null,"hbase","hbase"], + ["/hawq_default","755","hawq","hawq"], ["/solr",null,"solr","solr"], ["/benchmarks","777",null,null], ["/user","755","HCFS_SUPER_USER",null], http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-packages/src/common/hawq/hawq-master.svc ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/common/hawq/hawq-master.svc b/bigtop-packages/src/common/hawq/hawq-master.svc index 4fa5097..e5aca0d 100644 --- a/bigtop-packages/src/common/hawq/hawq-master.svc +++ b/bigtop-packages/src/common/hawq/hawq-master.svc @@ -34,7 +34,7 @@ start() { exit 0 fi - su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} start $SVC_OPTS" + su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} start master $SVC_OPTS" checkstatusofproc RETVAL=$? @@ -51,7 +51,7 @@ cat <<'__EOT__' stop() { log_success_msg "Stopping $DESC (${DAEMON}): " - su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} stop $SVC_OPTS" + su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} stop master $SVC_OPTS" sleep 3 RETVAL=$? http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-packages/src/common/hawq/hawq.default ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/common/hawq/hawq.default b/bigtop-packages/src/common/hawq/hawq.default index f8cf6e1..574e845 100644 --- a/bigtop-packages/src/common/hawq/hawq.default +++ b/bigtop-packages/src/common/hawq/hawq.default @@ -21,6 +21,8 @@ export HAWQ_LOG_DIR="/var/log/hawq" export HAWQ_WORK_DIR="/var/run/hawq/work" export HAWQ_IDENT_STRING=hawq +export HAWQ_DATA_DIR=$HAWQ_WORK_DIR/hawq-data-directory + export PATH=$HAWQ_HOME/bin:$HAWQ_HOME/ext/python/bin:$PATH export LD_LIBRARY_PATH=$HAWQ_HOME/lib:$HAWQ_HOME/ext/python/lib:$LD_LIBRARY_PATH export PYTHONPATH=$HAWQ_HOME/lib/python:$HAWQ_HOME/lib/python/pygresql:$PYTHONPATH @@ -28,3 +30,11 @@ export OPENSSL_CONF=$HAWQ_CONF_DIR/openssl.cnf export LIBHDFS3_CONF=$HAWQ_CONF_DIR/hdfs-client.xml export LIBYARN_CONF=$HAWQ_CONF_DIR/yarn-client.xml export HAWQSITE_CONF=$HAWQ_CONF_DIR/hawq-site.xml + +export HAWQ_MASTER_PORT=5432 +export HAWQ_SEGMENT_PORT=40000 +export HAWQ_MASTERDATA_DIR=$HAWQ_DATA_DIR/masterdd +export HAWQ_SEGMENTDATA_DIR=$HAWQ_DATA_DIR/segmentdd + +export HAWQ_TIMEOUT=30 +export HAWQ_SHUTDOWN_MODE=smart http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop-packages/src/deb/hawq/rules ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/deb/hawq/rules b/bigtop-packages/src/deb/hawq/rules index b4a3c9a..1e256a1 100755 --- a/bigtop-packages/src/deb/hawq/rules +++ b/bigtop-packages/src/deb/hawq/rules @@ -30,7 +30,7 @@ hawq_pkg_name=hawq dh $@ override_dh_auto_build: - bash debian/do-component-build + bash debian/do-component-build override_dh_auto_install: @@ -46,7 +46,7 @@ override_dh_auto_install: bash debian/init.d.tmpl debian/hawq-master.svc deb debian/${hawq_pkg_name}-master-service.init bash debian/init.d.tmpl debian/hawq-segment.svc deb debian/${hawq_pkg_name}-segment-service.init -## Let's override the auto_configure and auto_clean to make sure existing +## Let's override the auto_configure and auto_clean to make sure existing ## top-level Makefile doesn't interfere with the package creation override_dh_auto_configure: http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop.bom ---------------------------------------------------------------------- diff --git a/bigtop.bom b/bigtop.bom index f8a6878..5670546 100644 --- a/bigtop.bom +++ b/bigtop.bom @@ -90,7 +90,7 @@ bigtop { zookeeper:['hadoop', 'hbase'], hadoop:['ignite-hadoop', 'hbase', 'crunch', 'pig', 'hive', 'tez', 'sqoop', 'sqoop2', 'oozie', 'mahout', 'flume', 'giraph', 'solr', 'crunch', 'spark', - 'phoenix', 'tachyon', 'kafka', 'ycsb', 'kite', 'hama', 'zeppelin', + 'phoenix', 'tachyon', 'kafka', 'ycsb', 'kite', 'hama', 'zeppelin', 'hawq', 'tajo', 'apex' ], hbase:['phoenix','giraph','ycsb'], http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop_toolchain/manifests/libhdfs.pp ---------------------------------------------------------------------- diff --git a/bigtop_toolchain/manifests/libhdfs.pp b/bigtop_toolchain/manifests/libhdfs.pp index 4698108..eef5dd9 100644 --- a/bigtop_toolchain/manifests/libhdfs.pp +++ b/bigtop_toolchain/manifests/libhdfs.pp @@ -18,8 +18,32 @@ class bigtop_toolchain::libhdfs { include bigtop_toolchain::packages - case $operatingsystem { + /Ubuntu|Debian/: { + $hackrepourl = "https://bintray.com/artifact/download/wangzw/deb/dists/trusty/contrib/binary-amd64" + $libhdfs = "libhdfs3_2.2.31-1_amd64.deb" + $libhdfs_dev = "libhdfs3-dev_2.2.31-1_amd64.deb" + + $bisonurl_dev = "http://launchpadlibrarian.net/140087283" + $bisonurl = "http://launchpadlibrarian.net/140087282" + $bison = "bison_2.7.1.dfsg-1_amd64.deb" + $bison_dev = "libbison-dev_2.7.1.dfsg-1_amd64.deb" + + exec {"install": + path => "/usr/bin:/bin:/usr/sbin:/sbin", + cwd => "/usr/src", + command => "dpkg --install $libhdfs $libhdfs_dev $bison $bison_dev", + require => [ Exec[ 'download', 'download-bison' ], Package[ $packages::pkgs ] ], + } + + exec {"download-bison": + path => "/usr/bin", + cwd => "/usr/src", + command => "curl -L $bisonurl/$bison -o $bison ; curl -L $bisonurl_dev/$bison_dev -o $bison_dev", + creates => "/usr/src/$bison, /usr/src/$bison_dev", + } + } + /(?i:(centos|fedora|amazon))/: { $apache_prefix = nearest_apache_mirror() @@ -27,12 +51,6 @@ class bigtop_toolchain::libhdfs { $libhdfs = "libhdfs3-2.2.31-1.el7.centos.x86_64.rpm" $libhdfs_dev = "libhdfs3-devel-2.2.31-1.el7.centos.x86_64.rpm" - exec {"download": - path => "/usr/bin", - cwd => "/usr/src", - command => "curl -L $hackrepourl/$libhdfs -o $libhdfs ; curl -L $hackrepourl/$libhdfs_dev -o $libhdfs_dev", - creates => "/usr/src/$libhdfs, /usr/src/$libhdfs_dev", - } exec {"install": path => "/usr/bin", @@ -42,4 +60,11 @@ class bigtop_toolchain::libhdfs { } } } + + exec {"download": + path => "/usr/bin", + cwd => "/usr/src", + command => "curl -L $hackrepourl/$libhdfs -o $libhdfs ; curl -L $hackrepourl/$libhdfs_dev -o $libhdfs_dev", + creates => "/usr/src/$libhdfs, /usr/src/$libhdfs_dev", + } } http://git-wip-us.apache.org/repos/asf/bigtop/blob/10435d6e/bigtop_toolchain/manifests/packages.pp ---------------------------------------------------------------------- diff --git a/bigtop_toolchain/manifests/packages.pp b/bigtop_toolchain/manifests/packages.pp index d80a3ed..817c89d 100644 --- a/bigtop_toolchain/manifests/packages.pp +++ b/bigtop_toolchain/manifests/packages.pp @@ -14,7 +14,7 @@ # limitations under the License. class bigtop_toolchain::packages { - case $operatingsystem{ + case $operatingsystem { /(?i:(centos|fedora))/: { # Fedora 20 and CentOS 7 or above are using mariadb, while CentOS 6 is still mysql if ($operatingsystem == "CentOS") and ($operatingsystemmajrelease <=6) {
