Repository: trafodion Updated Branches: refs/heads/master dd301dc47 -> 290570e72
[TRAFODION-2958] Consolidate the master and backup files in dcs conf folder to a single file (masters) Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/2f48d073 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/2f48d073 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/2f48d073 Branch: refs/heads/master Commit: 2f48d0734fcdd112b6bcca56a1fef27333b1d004 Parents: 88e4752 Author: Anuradha Hegde <[email protected]> Authored: Tue Mar 13 02:23:30 2018 +0000 Committer: Anuradha Hegde <[email protected]> Committed: Tue Mar 13 02:23:30 2018 +0000 ---------------------------------------------------------------------- RAT_README | 3 +- core/sqf/sql/scripts/dcscheck | 19 +- core/sqf/sql/scripts/dcsstart | 8 +- core/sqf/sql/scripts/dcsstop | 9 +- core/sqf/sql/scripts/install_local_hadoop | 5 + core/sqf/sql/scripts/install_traf_components | 2 +- core/sqf/sql/scripts/sqcheck | 6 +- dcs/bin/dcs-config.sh | 14 +- dcs/bin/dcs-daemon.sh | 48 +++- dcs/bin/getActiveMaster.sh | 80 +++++++ dcs/bin/master-backup.sh | 36 +-- dcs/bin/scripts/dcsbind.sh | 64 ++++-- dcs/bin/scripts/dcsunbind.sh | 43 ++-- dcs/bin/scripts/parse_dcs_site.py | 18 +- dcs/bin/start-dcs.sh | 9 +- dcs/bin/stop-dcs.sh | 16 +- dcs/conf/backup-masters | 0 dcs/conf/master | 0 dcs/conf/masters | 0 .../main/asciidoc/_chapters/configuration.adoc | 220 ++++++++----------- .../TRAFODION/2.1/configuration/dcs-env.xml | 7 +- .../2.1/package/scripts/trafodionnode.py | 13 +- install/python-installer/scripts/dcs_setup.py | 9 +- 23 files changed, 353 insertions(+), 276 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/RAT_README ---------------------------------------------------------------------- diff --git a/RAT_README b/RAT_README index 66a448f..1799dd1 100644 --- a/RAT_README +++ b/RAT_README @@ -59,8 +59,7 @@ $TRAF_HOME/../sql/bin/SqlciErrors.txt -> this file does not handle comments dcs/conf/servers -dcs/conf/backup-masters -dcs/conf/master +dcs/conf/masters : Apache Trafodion DCS default configuration file -> configuration file do not handle comments http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/core/sqf/sql/scripts/dcscheck ---------------------------------------------------------------------- diff --git a/core/sqf/sql/scripts/dcscheck b/core/sqf/sql/scripts/dcscheck index 4a30716..6ac373b 100755 --- a/core/sqf/sql/scripts/dcscheck +++ b/core/sqf/sql/scripts/dcscheck @@ -23,7 +23,6 @@ jpscmd=$JAVA_HOME/bin/jps cfg_mxo_cnt=0 act_mxo_cnt=0 down_mxo_cnt=0 -backup_dcsmaster_cnt=0 cfg_dcsmaster_cnt=0 actual_dcsmaster_cnt=0 down_dcsmaster_cnt=0 @@ -73,14 +72,14 @@ if ( [ $sq_stat == 0 ] || [ $sq_stat == 1 ] ); then ### Get the configured primary node for DcsMaster if [ -s ${DCS_INSTALL_DIR}/conf/master ]; then - primary_dcsmaster=`cat ${DCS_INSTALL_DIR}/conf/master | /bin/egrep -v '^#|^$'` + #primary_dcsmaster=`cat ${DCS_INSTALL_DIR}/conf/masters | /bin/egrep -v '^#|^$'` + primary_dcsmaster=`head -n 1 ${DCS_INSTALL_DIR}/conf/masters` fi ### Get the configured number of DcsMaster's - if [ -s ${DCS_INSTALL_DIR}/conf/backup-masters ]; then - let backup_dcsmaster_cnt=`/bin/egrep -cv '#|^$' ${DCS_INSTALL_DIR}/conf/backup-masters` - let cfg_dcsmaster_cnt=$backup_dcsmaster_cnt+1 - list_of_backups=`cat ${DCS_INSTALL_DIR}/conf/backup-masters | /bin/egrep -v '^#|^$'|paste -sd ' ' -` + if [ -s ${DCS_INSTALL_DIR}/conf/masters ]; then + let cfg_dcsmaster_cnt=`/bin/egrep -cv '#|^$' ${DCS_INSTALL_DIR}/conf/masters` + list_of_masters=`cat ${DCS_INSTALL_DIR}/conf/masters | /bin/egrep -v '^#|^$'|paste -sd ' ' -` else let cfg_dcsmaster_cnt=1 fi @@ -162,13 +161,13 @@ if ( [ $sq_stat == 0 ] || [ $sq_stat == 1 ] ); then if [[ ! -z "$primary_dcsmaster" ]]; then echo "Configured Primary DcsMaster: \"$primary_dcsmaster\"" fi - if [[ ! -z "$list_of_backups" ]]; then - echo "Configured Backup DcsMasters: \"$list_of_backups\"" + if [[ ! -z "$list_of_masters" ]]; then + echo "Configured DcsMasters: \"$list_of_masters\"" fi if ( [ ! -z "$activeMaster" ] && [ ! -z "$activeDcsPid" ] ); then - echo "Active DcsMaster : \"$activeMaster\", pid $activeDcsPid" + echo "Active DcsMaster : \"$activeMaster\", pid $activeDcsPid" else - echo "Active DcsMaster : \"$activeMaster\"" + echo "Active DcsMaster : \"$activeMaster\"" fi echo fi http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/core/sqf/sql/scripts/dcsstart ---------------------------------------------------------------------- diff --git a/core/sqf/sql/scripts/dcsstart b/core/sqf/sql/scripts/dcsstart index 86d7d3a..a14da20 100755 --- a/core/sqf/sql/scripts/dcsstart +++ b/core/sqf/sql/scripts/dcsstart @@ -23,7 +23,7 @@ # #Check if Trafodion is up and operational -sqcheck -f +sqcheck -f > /dev/null sq_stat=$? if ( [ $sq_stat == 0 ] || [ $sq_stat == 1 ] ); then DCS_START_CMD=${DCS_INSTALL_DIR}/bin/start-dcs.sh @@ -33,13 +33,7 @@ if ( [ $sq_stat == 0 ] || [ $sq_stat == 1 ] ); then exit 1; fi - if [ ! -e ${DCS_START_CMD} ]; then - echo "${DCS_START_CMD} not found." - exit 1; - fi - echo "Starting the DCS environment now" - cd ${DCS_INSTALL_DIR} ${DCS_START_CMD} cds else http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/core/sqf/sql/scripts/dcsstop ---------------------------------------------------------------------- diff --git a/core/sqf/sql/scripts/dcsstop b/core/sqf/sql/scripts/dcsstop index 5b37e91..9eca963 100755 --- a/core/sqf/sql/scripts/dcsstop +++ b/core/sqf/sql/scripts/dcsstop @@ -29,16 +29,11 @@ if [ -z ${DCS_INSTALL_DIR} ]; then exit 1; fi -if [ ! -e ${DCS_STOP_CMD} ]; then - echo "${DCS_STOP_CMD} not found." - exit 1; -fi +echo "Shutting down the DCS environment now" +${DCS_STOP_CMD} if [[ $ENABLE_HA == "true" ]]; then ${DCS_INSTALL_DIR}/bin/scripts/dcsunbind.sh fi -echo "Shutting down the DCS environment now" -cd ${DCS_INSTALL_DIR} -${DCS_STOP_CMD} cds http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/core/sqf/sql/scripts/install_local_hadoop ---------------------------------------------------------------------- diff --git a/core/sqf/sql/scripts/install_local_hadoop b/core/sqf/sql/scripts/install_local_hadoop index 7c2b062..497d917 100755 --- a/core/sqf/sql/scripts/install_local_hadoop +++ b/core/sqf/sql/scripts/install_local_hadoop @@ -1641,6 +1641,11 @@ EOF fi # end of HBase setup +if [[ ! -z $FAST_LOCAL_HADOOP ]]; then + echo "FAST_LOCAL_HADOOP is set. Exiting..." + exit 0 +fi + cd $MY_SW_ROOT if [ $INSTALL_TPCDS -eq 1 ]; then http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/core/sqf/sql/scripts/install_traf_components ---------------------------------------------------------------------- diff --git a/core/sqf/sql/scripts/install_traf_components b/core/sqf/sql/scripts/install_traf_components index 35cd7c8..0bf6b40 100755 --- a/core/sqf/sql/scripts/install_traf_components +++ b/core/sqf/sql/scripts/install_traf_components @@ -290,7 +290,7 @@ then echo "Adding swtrafci script..." | tee -a ${MY_LOG_FILE} cat <<EOF >$TRAF_HOME/sql/scripts/swtrafci #!/bin/sh -$TRAF_HOME/trafci/bin/trafci +$TRAF_HOME/trafci/bin/trafci.sh -h localhost:$MY_DCS_MASTER_PORT -u db__root -p zz EOF chmod +x $TRAF_HOME/sql/scripts/swtrafci else http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/core/sqf/sql/scripts/sqcheck ---------------------------------------------------------------------- diff --git a/core/sqf/sql/scripts/sqcheck b/core/sqf/sql/scripts/sqcheck index c5825a8..6449ec7 100755 --- a/core/sqf/sql/scripts/sqcheck +++ b/core/sqf/sql/scripts/sqcheck @@ -112,14 +112,12 @@ function fillArray { } function getDcsInfo { - dcsznode=/$USER/dcs/master if [ -d $DCS_INSTALL_DIR ];then ### Get the configured number of DcsMaster's - if [ -s ${DCS_INSTALL_DIR}/conf/backup-masters ]; then - let backup_dcsmaster_cnt=`/bin/egrep -cv '#|^$' ${DCS_INSTALL_DIR}/conf/backup-masters` - let cfg_dcsmaster_cnt=$backup_dcsmaster_cnt+1 + if [ -s ${DCS_INSTALL_DIR}/conf/masters ]; then + let cfg_dcsmaster_cnt=`/bin/egrep -cv '#|^$' ${DCS_INSTALL_DIR}/conf/masters` else let cfg_dcsmaster_cnt=1 fi http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/dcs-config.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/dcs-config.sh b/dcs/bin/dcs-config.sh index b823807..236cd95 100755 --- a/dcs/bin/dcs-config.sh +++ b/dcs/bin/dcs-config.sh @@ -48,6 +48,7 @@ if [ -z "$DCS_HOME" ]; then export DCS_HOME=`dirname "$this"`/.. fi +foreground="false" #check to see if the conf dir or dcs home are given as an optional arguments while [ $# -gt 1 ] do @@ -63,6 +64,10 @@ do hosts=$1 shift DCS_SERVERS=$hosts + elif [ "--foreground" = "$1" ] + then + shift + foreground="true" else # Presume we are at end of options and break break @@ -73,10 +78,8 @@ done DCS_CONF_DIR="${DCS_CONF_DIR:-$DCS_HOME/conf}" # List of DCS servers. DCS_SERVERS="${DCS_SERVERS:-$DCS_CONF_DIR/servers}" -# DCS primary master. -DCS_PRIMARY_MASTER="${DCS_PRIMARY_MASTER:-$DCS_CONF_DIR/master}" -# List of DCS secondary masters. -DCS_BACKUP_MASTERS="${DCS_BACKUP_MASTERS:-$DCS_CONF_DIR/backup-masters}" +#List of DCS masters +DCS_MASTERS="${DCS_MASTERS:-$DCS_CONF_DIR/masters}" # Source the dcs-env.sh. Will have JAVA_HOME defined. if [ -f "${DCS_CONF_DIR}/dcs-env.sh" ]; then @@ -87,6 +90,9 @@ fi if [ -f "${TRAF_HOME}/sqenv.sh" ]; then savedir=`pwd` cd $TRAF_HOME + if [[ -f /etc/trafodion/trafodion_config ]]; then + . /etc/trafodion/trafodion_config + fi . ./sqenv.sh cd $savedir fi http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/dcs-daemon.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/dcs-daemon.sh b/dcs/bin/dcs-daemon.sh index a331c3e..36f9650 100755 --- a/dcs/bin/dcs-daemon.sh +++ b/dcs/bin/dcs-daemon.sh @@ -32,7 +32,7 @@ # DCS_NICENESS The scheduling priority for daemons. Defaults to 0. # -usage="Usage: dcs-daemon.sh [--config <conf-dir>]\ +usage="Usage: dcs-daemon.sh [--foreground] [--config <conf-dir>]\ (start|stop|restart) <dcs-command> \ <args...>" @@ -105,14 +105,16 @@ if [ "$DCS_PID_DIR" = "" ]; then DCS_PID_DIR="$DCS_HOME/tmp" fi -#if [ "$DCS_IDENT_STRING" = "" ]; then +#DCS_IDENT_STRING can be set in environment to uniquely identify dcs instances +if [ $command == "master" ] || [ $command == "master-backup" ]; then + export DCS_IDENT_STRING="$USER" +else export DCS_IDENT_STRING="$USER-$instance" -#fi +fi # Some variables # Work out java location so can print version into log. if [ "$JAVA_HOME" != "" ]; then - #echo "run java in $JAVA_HOME" JAVA_HOME=$JAVA_HOME fi if [ "$JAVA_HOME" = "" ]; then @@ -128,6 +130,7 @@ logout=$DCS_LOG_DIR/$DCS_LOG_PREFIX.out loggc=$DCS_LOG_DIR/$DCS_LOG_PREFIX.gc loglog="${DCS_LOG_DIR}/${DCS_LOGFILE}" pid=$DCS_PID_DIR/dcs-$DCS_IDENT_STRING-$command.pid +stopmode=$DCS_PID_DIR/dcs-server-stop if [ "$DCS_USE_GC_LOGFILE" = "true" ]; then export DCS_GC_OPTS=" -Xloggc:${loggc}" @@ -138,13 +141,25 @@ if [ "$DCS_NICENESS" = "" ]; then export DCS_NICENESS=0 fi +if [[ $startStop == 'conditional-start' ]] +then + if [[ -f $stopmode ]] + then + echo "Server stopped intentionally, no restart" + exit 5 + else + startStop=start + fi +fi + case $startStop in (start) + rm -f $stopmode # leaving stop-mode mkdir -p "$DCS_PID_DIR" if [ -f $pid ]; then if kill -0 `cat $pid` > /dev/null 2>&1; then - echo $command `cat $pid`. Stop it first. + echo $command running as process `cat $pid`. Stop it first. exit -2 fi fi @@ -155,14 +170,27 @@ case $startStop in # Add to the command log file vital stats on our environment. # echo "`date` Starting $command on `hostname`" >> $loglog # echo "`ulimit -a`" >> $loglog 2>&1 - nohup nice -n $DCS_NICENESS "$DCS_HOME"/bin/dcs \ - --config "${DCS_CONF_DIR}" \ - $command "$@" $startStop > "$logout" 2>&1 < /dev/null & - echo $! > $pid - sleep 1; head "$logout" + if [[ $foreground == "true" ]] + then + renice -n $DCS_NICENESS $$ + echo $$ > $pid + exec > "$logout" 2>&1 < /dev/null + exec "$DCS_HOME"/bin/dcs \ + --config "${DCS_CONF_DIR}" \ + $command "$@" $startStop + echo "Error: exec failed" + exit 1 + else + nohup nice -n $DCS_NICENESS "$DCS_HOME"/bin/dcs \ + --config "${DCS_CONF_DIR}" \ + $command "$@" $startStop > "$logout" 2>&1 < /dev/null & + echo $! > $pid + sleep 1; head "$logout" + fi ;; (stop) + touch $stopmode # entering stop-mode if [ -f $pid ]; then # kill -0 == see if the PID exists if kill -0 `cat $pid` > /dev/null 2>&1; then http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/getActiveMaster.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/getActiveMaster.sh b/dcs/bin/getActiveMaster.sh new file mode 100755 index 0000000..af969d4 --- /dev/null +++ b/dcs/bin/getActiveMaster.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +#/** +# @@@ START COPYRIGHT @@@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# @@@ END COPYRIGHT @@@ +# */ + +# Get the activeMaster hostname + +setup_sqpdsh +A_PDSH=$SQPDSHA + +function getActiveMaster { + + tmpdcsconfig=`mktemp -t` + if [[ $? != 0 ]]; then + echo "Error while getting a temporary file for tmpdcsconfig. Exiting." + exit 3 + fi + + python $DCS_INSTALL_DIR/bin/scripts/parse_dcs_site.py > $tmpdcsconfig + masterport=`cat $tmpdcsconfig |grep "^dcs.master.port:"| cut -f2 -d":"` + + if [[ ! -z $CLUSTERNAME ]]; then + if [[ $ENABLE_HA == "true" ]]; then + + floatip_interface=`cat $tmpdcsconfig |grep "^dcs.master.floating.ip.external.interface:"| cut -f2 -d":"` + keepalived=`cat $tmpdcsconfig |grep "^dcs.master.keepalived:"| cut -f2 -d":"` + + if [[ $floatip_interface == "default" ]]; then + floatip_interface=`/sbin/route |grep "0.0.0.0" |awk '{print $8}'` + fi + + if ! grep -q ^ec2 /sys/hypervisor/uuid 2>/dev/null ; then + # Non-AWS system + interface_to_use=$floatip_interface":"$masterport + else + interface_to_use=$floatip_interface + fi + + if [[ ${keepalived} != "true" ]]; then + activeMaster=`$A_PDSH /sbin/ip addr show |grep $interface_to_use$ |cut -d':' -f1` + else + activeMaster=`$A_PDSH /sbin/ifconfig |grep $interface_to_use |cut -d':' -f1` + fi + else + tmpnetstat=`$A_PDSH /bin/netstat -antp 2>/dev/null |grep -w :$masterport` + tmpcurrentMaster=`echo $tmpnetstat |cut -f1 -d":" |awk '{print $1}'` + if [[ ${tmpcurrentMaster} == "tcp" ]]; then + activeMaster=`hostname -f` + else + activeMaster=$tmpcurrentMaster + fi + fi + else + activeMaster=localhost + fi + + rm -f $tmpdcsconfig + echo $activeMaster +} + +getActiveMaster http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/master-backup.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/master-backup.sh b/dcs/bin/master-backup.sh index ee396d5..dc46be3 100755 --- a/dcs/bin/master-backup.sh +++ b/dcs/bin/master-backup.sh @@ -25,8 +25,8 @@ # # Environment Variables # -# DCS_BACKUP_MASTERS File naming remote hosts. -# Default is ${DCS_CONF_DIR}/backup-masters +# DCS_MASTERS File for specifying all DcsMaster hosts +# Default is ${DCS_CONF_DIR}/masters # DCS_CONF_DIR Alternate Dcs conf dir. Default is ${DCS_HOME}/conf. # DCS_SLAVE_SLEEP Seconds to sleep between spawning remote commands. # DCS_SSH_OPTS Options passed to ssh when running remote commands. @@ -46,47 +46,35 @@ bin=`cd "$bin">/dev/null; pwd` . "$bin"/dcs-config.sh -# If the master backup file is specified in the command line, -# then it takes precedence over the definition in -# dcs-env.sh. Save it here. -HOSTLIST=$DCS_BACKUP_MASTERS - -if [ "$HOSTLIST" = "" ]; then - if [ "$DCS_BACKUP_MASTERS" = "" ]; then - export HOSTLIST="${DCS_CONF_DIR}/backup-masters" - else - export HOSTLIST="${DCS_BACKUP_MASTERS}" - fi -fi - +activeMaster=$($DCS_INSTALL_DIR/bin/getActiveMaster.sh) args=${@// /\\ } args=${args/master-backup/master} instance=2 -if [ -f $HOSTLIST ]; then +if [ -f ${DCS_INSTALL_DIR}/conf/masters ]; then while read master do - if [ "$master" == "localhost" ] || [ "$master" == "$HOSTNAME" ] ; then - eval $"$args $instance" 2>&1 | sed "s/^/$master: /" & + if [[ ! -z $activeMaster && "$master" =~ $activeMaster ]]; then + echo "$activeMaster is the current active DcsMaster" else + L_PDSH="ssh -q -n $DCS_SSH_OPTS" if ${DCS_SLAVE_PARALLEL:-true}; then - ssh -q -n $DCS_SSH_OPTS $master $"$args $instance"\ + ${L_PDSH} $master $"$args $instance"\ 2>&1 | sed "s/^/$master: /" & else # run each command serially - ssh -q -n $DCS_SSH_OPTS $master $"$args $instance" \ - 2>&1 | sed "s/^/$master: /" & + ${L_PDSH} $master $"$args $instance" \ + 2>&1 | sed "s/^/$master: /" fi - fi - + fi if [ "$DCS_SLAVE_SLEEP" != "" ]; then sleep $DCS_SLAVE_SLEEP fi let instance++ - done < "$HOSTLIST" + done < "${DCS_INSTALL_DIR}/conf/masters" fi wait http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/scripts/dcsbind.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/scripts/dcsbind.sh b/dcs/bin/scripts/dcsbind.sh index d791034..fb0eeb4 100755 --- a/dcs/bin/scripts/dcsbind.sh +++ b/dcs/bin/scripts/dcsbind.sh @@ -95,10 +95,10 @@ fi function check_node { dcsEcho "checking node $1" - for myinterface in `pdsh -N -w $1 /sbin/ip link show|awk -F': ' '/^[0-9]+:.*/ {print $2;}'`; do - ip_output=$(pdsh -N -w $1 /sbin/ip addr show $myinterface) + for myinterface in `$L_PDSH -w $1 /sbin/ip link show|cut -d: -f2- | cut -c2- | awk -F': ' '/^[0-9]+:.*/ {print $2;}'`; do + ip_output=$($L_PDSH -w $1 /sbin/ip addr show $myinterface | cut -d: -f2-) if [ $gv_externalip_set -eq 1 -a $external_only -eq 1 ]; then - myifport=`echo "$ip_output" | grep $gv_float_external_ip` + myifport=`echo "$ip_output" | grep $gv_float_external_ip/` status=$? if [ $status -eq 0 ]; then tempinterface=`echo $gv_float_external_interface:$gv_port` @@ -109,8 +109,8 @@ function check_node { unbindip=`echo "$myifport"|awk '{print $2}'` unbindlb=`echo "$myifport"|awk '{print $NF}'` dcsEcho "external ip $gv_float_external_ip is already in use on node $1 bound to interface $myinterface($unbindlb) - unbind..." - dcsEcho "pdsh -S -w $1 sudo /sbin/ip addr del $unbindip dev $myinterface label $unbindlb" - pdsh -S -w $1 sudo /sbin/ip addr del $unbindip dev $myinterface label $unbindlb + dcsEcho "$L_PDSH -w $1 sudo /sbin/ip addr del $unbindip dev $myinterface" + $L_PDSH -w $1 sudo /sbin/ip addr del $unbindip dev $myinterface status=$? if [ $status -ne 0 ]; then @@ -132,7 +132,7 @@ function Check_VirtualIP_InUse_Unbind { #check if external ip is in use dcsEcho "check all nodes $allMyNodes" - externalNodes=`pdsh $allMyNodes /sbin/ip addr show | grep $gv_float_external_ip | awk -F' ' '/^.+:[[:space:]]+.*/ {print $1;}' | cut -d':' -f1 | sed '/^$/d'` + externalNodes=`$L_PDSH $allMyNodes /sbin/ip addr show | grep -w $gv_float_external_ip | awk -F' ' '/^.+:[[:space:]]+.*/ {print $1;}' | cut -d':' -f1 | sed '/^$/d'` if [ ! -z "$externalNodes" ]; then dcsEcho "find possible node `echo $externalNodes`" external_only=1 @@ -153,7 +153,7 @@ if [ $gv_externalip_set -eq 1 ]; then bcast=`/sbin/ip addr show $gv_float_external_interface | grep "inet .*$gv_float_external_interface\$" | awk '{print $4}'` mask=`/sbin/ip addr show $gv_float_external_interface | grep "inet .*$gv_float_external_interface\$" | awk '{print $2}' | cut -d'/' -f2` - /sbin/ip addr show| grep 'inet [^[:space:]]\+ '| awk '{print $2}'| sed -e 's/\/.*//'|grep $gv_float_external_ip > /dev/null + /sbin/ip addr show| grep 'inet [^[:space:]]\+ '| awk '{print $2}'| sed -e 's/\/.*//'|grep -w $gv_float_external_ip > /dev/null status=$? if [ $status -eq 0 ]; then dcsEcho "external ip is already bound on node $gv_myhostname - skip bind step" @@ -231,7 +231,7 @@ fi } function configure_route_tables { - gv_default_interface=eth0 + gv_default_interface=$(/sbin/route | grep default | awk '{print $(NF)}') bcast=`/sbin/ip addr show $gv_default_interface | grep "inet .*$gv_default_interface\$" | awk '{print $4}'` status=$? if [ $status -ne 0 ]; then @@ -356,35 +356,65 @@ gv_float_internal_ip=`echo $gv_float_external_ip` dcsEcho "gv_float_external_ip :" $gv_float_external_ip dcsEcho "gv_float_internal_ip :" $gv_float_internal_ip -#Check if AWS_CLOUD environment variable defined -if [[ $AWS_CLOUD != "true" ]]; then +if ! grep -q ^ec2 /sys/hypervisor/uuid 2>/dev/null ; then + # Non-AWS system + L_PDSH="pdsh -S" Check_VirtualIP_InUse_Unbind BindFloatIp else - awscmd="/usr/local/bin/aws ec2 --output text " - device_index_to_use=`echo $gv_float_external_interface | sed -e "s@eth\([0-9][0-9]*\)@\1@"` + # AWS system + awscmd="/usr/bin/aws ec2 --output text " + device_index_to_use=`echo $gv_float_external_interface | sed 's/[^0-9]//g'` dcsEcho "Using device index $device_index_to_use for $gv_float_external_interface" + # Test if .aws file exists on this node + awstmp=`mktemp -t` + if [[ $? != 0 ]]; then + dcsEcho "Error while getting a temporary file for $awstmp. Exiting." + exit $gv_error + fi + + $awscmd describe-instances --query 'Reservations[*].Instances[*].[InstanceId,PrivateDnsName]' >$awstmp 2>/dev/null + if [[ $? != 0 ]]; then + dcsEcho "Missing .aws config files on node $gv_myhostname" + rm -f $awstmp + exit $gv_error + fi + + rm -f $awstmp # Get instance Id of the instance - INSTANCEID=`$awscmd describe-instances |grep -i instances |grep -i $gv_myhostname |cut -f8` + INSTANCEID=`$awscmd describe-instances --query 'Reservations[*].Instances[*].[InstanceId,PrivateDnsName]' |grep -i -w $gv_myhostname |cut -f1` dcsEcho "Using Instance id $INSTANCEID" # Get the network interface configured for the vpc - NETWORKINTERFACE=`$awscmd describe-network-interfaces| grep -i networkinterfaces| grep -i $gv_float_internal_ip|cut -f5` + NETWORKINTERFACE=`$awscmd describe-network-interfaces --query 'NetworkInterfaces[*].[NetworkInterfaceId,PrivateIpAddress]' |grep -i -w $gv_float_internal_ip |cut -f1` dcsEcho "Using network interface $NETWORKINTERFACE" # Get the attachment id for the network interface - ATTACH_ID=`$awscmd describe-network-interfaces --network-interface-ids $NETWORKINTERFACE |grep -i attachment |cut -f3` + ATTACH_ID=`$awscmd describe-network-interfaces --network-interface-ids $NETWORKINTERFACE --filters Name=attachment.device-index,Values=$device_index_to_use --query 'NetworkInterfaces[*].[Attachment.AttachmentId]'` if [ ! -z "$ATTACH_ID" ]; then dcsEcho "Detaching attachment Id:" $ATTACH_ID $awscmd detach-network-interface --attachment-id $ATTACH_ID + network_interface_status=`$awscmd describe-network-interfaces --filters Name=attachment.attachment-id,Values=$ATTACH_ID --query NetworkInterfaces[*].[Status]` + while [[ "$network_interface_status" = "in-use" ]] + do + dcsEcho "Attachment Status ... " $network_interface_status + sleep 10 + network_interface_status=`$awscmd describe-network-interfaces --filters Name=attachment.attachment-id,Values=$ATTACH_ID --query NetworkInterfaces[*].[Status]` + done fi dcsEcho "Going to attach network interface $NETWORKINTERFACE to the another instance" - sleep 10 NEWATTACH_ID=`$awscmd attach-network-interface --network-interface-id $NETWORKINTERFACE --instance-id $INSTANCEID --device-index $device_index_to_use` dcsEcho "New attachment Id " $NEWATTACH_ID - sleep 10 + newattachment_status=`$awscmd describe-network-interfaces --filters Name=attachment.attachment-id,Values=$NEWATTACH_ID --query NetworkInterfaces[*].[Attachment.Status]` + while [[ "$newattachment_status" != "attached" ]] + do + dcsEcho "New Attachment Status ... " $newattachment_status + sleep 10 + newattachment_status=`$awscmd describe-network-interfaces --filters Name=attachment.attachment-id,Values=$NEWATTACH_ID --query NetworkInterfaces[*].[Attachment.Status]` + done + configure_route_tables fi http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/scripts/dcsunbind.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/scripts/dcsunbind.sh b/dcs/bin/scripts/dcsunbind.sh index e58f4b3..6f2111e 100755 --- a/dcs/bin/scripts/dcsunbind.sh +++ b/dcs/bin/scripts/dcsunbind.sh @@ -24,26 +24,24 @@ # function check_node { - for myinterface in `$SQ_PDSH -N -w $1 /sbin/ip link show|awk -F': ' '/^[0-9]+:.*/ {print $2;}'`; do - ip_output=$($SQ_PDSH -N -w $1 /sbin/ip addr show $myinterface) + for myinterface in `$L_PDSH -w $1 /sbin/ip link show|cut -d: -f2- | cut -c2- | awk -F': ' '/^[0-9]+:.*/ {print $2;}'`; do + ip_output=$($L_PDSH -w $1 /sbin/ip addr show $myinterface | cut -d: -f2- | cut -c2-) if [ $gv_externalip_set -eq 1 -a $external_only -eq 1 ]; then - myifport=`echo "$ip_output" | grep $gv_float_external_ip` + myifport=`echo "$ip_output" | grep -w $gv_float_external_ip` status=$? if [ $status -eq 0 ]; then - tempinterface=`echo $gv_float_external_interface:$gv_port` + tempinterface=`echo $gv_float_interface:$gv_port` # check if another interface is bound to this virtual ip address echo "$myifport" | grep "$tempinterface" > /dev/null if [ $? -eq 1 -o "$1" != "$gv_myhostname" ]; then unbindip=`echo "$myifport" | awk '{print $2}'` unbindlb=`echo "$myifport"|awk '{print $NF}'` - echo "External ip $gv_float_external_ip is in use on node $1 bound to interface $myinterface($unbindlb) - unbind..." - $SQ_PDSH -S -w $1 sudo /sbin/ip addr del $unbindip dev $myinterface label $unbindlb + echo "Virtual ip $gv_float_external_ip is in use on node $1 bound to interface $myinterface($unbindlb) - unbinding..." + $L_PDSH -w $1 sudo /sbin/ip addr del $unbindip dev $myinterface status=$? if [ $status -ne 0 ]; then echo "Failed to unbind - status is $status" exit -1 - else - echo "Unbind successful" fi fi # endif node+name match fi # endif looking for external ip @@ -52,9 +50,8 @@ function check_node { } function Check_VirtualIP_InUse_And_Unbind { - echo "check all nodes to see if external virtual ip address is in use and unbind if necessary" mynode="" - externalNodes=`$SQ_PDSH $MY_NODES /sbin/ip addr show | grep $gv_float_external_ip | awk -F' ' '/^.+:[[:space:]]+.*/ {print $1;}' | cut -d':' -f1 | sed '/^$/d'` + externalNodes=`$L_PDSH $MY_NODES /sbin/ip addr show | grep -w $gv_float_external_ip | awk -F' ' '/^.+:[[:space:]]+.*/ {print $1;}' | cut -d':' -f1 | sed '/^$/d'` if [ ! -z "$externalNodes" ]; then external_only=1 internal_only=0 @@ -69,24 +66,28 @@ function Check_VirtualIP_InUse_And_Unbind { if [[ $ENABLE_HA == "false" ]]; then exit 0 fi - -gv_float_internal_ip=`python $DCS_INSTALL_DIR/bin/scripts/parse_dcs_site.py|cut -d$'\n' -f2` -gv_float_external_ip=`python $DCS_INSTALL_DIR/bin/scripts/parse_dcs_site.py|cut -d$'\n' -f2` -gv_float_interface=`python $DCS_INSTALL_DIR/bin/scripts/parse_dcs_site.py|cut -d$'\n' -f1` -gv_port=`python $DCS_INSTALL_DIR/bin/scripts/parse_dcs_site.py|cut -d$'\n' -f3` +dcsunbindtmp=`mktemp -t` +python $DCS_INSTALL_DIR/bin/scripts/parse_dcs_site.py > $dcsunbindtmp +gv_float_internal_ip=`cat $dcsunbindtmp |grep "^dcs.master.floating.ip.external.ip.address:"| cut -f2 -d":"` +gv_float_external_ip=$gv_float_internal_ip +gv_float_interface=`cat $dcsunbindtmp |grep "^dcs.master.floating.ip.external.interface:"| cut -f2 -d":"` +device_index_to_use=`echo $gv_float_interface | sed 's/[^0-9]//g'` +gv_port=`cat $dcsunbindtmp |grep "^dcs.master.port:"| cut -f2 -d":"` if [[ -z $gv_port ]]; then gv_port=23400 fi gv_externalip_set=1 gv_internalip_set=1 -if [[ $AWS_CLOUD == "true" ]]; then - awscmd="/usr/local/bin/aws ec2 --output text " +if grep -q ^ec2 /sys/hypervisor/uuid 2>/dev/null ; then + # AWS system + awscmd="/usr/bin/aws ec2 --output text " + #Get the network interface - NETWORKINTERFACE=`$awscmd describe-network-interfaces| grep -i networkinterfaces| grep -i $gv_float_internal_ip|cut -f5` + NETWORKINTERFACE=`$awscmd describe-network-interfaces --query 'NetworkInterfaces[*].[NetworkInterfaceId,PrivateIpAddress]' |grep -i -w $gv_float_internal_ip |cut -f1` # Get the attachment id for the network interface - ATTACH_ID=`$awscmd describe-network-interfaces --network-interface-ids $NETWORKINTERFACE |grep -i attachment |cut -f3` + ATTACH_ID=`$awscmd describe-network-interfaces --network-interface-ids $NETWORKINTERFACE --filters Name=attachment.device-index,Values=$device_index_to_use --query 'NetworkInterfaces[*].[Attachment.AttachmentId]'` echo "Detaching attachment Id:" $ATTACH_ID if [ ! -z "$ATTACH_ID" ]; then @@ -94,6 +95,10 @@ if [[ $AWS_CLOUD == "true" ]]; then echo "Detached interface :" $NETWORKINTERFACE fi else + # non-AWS + L_PDSH="pdsh -S" + Check_VirtualIP_InUse_And_Unbind fi +rm -f $dcsunbindtmp exit 0 http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/scripts/parse_dcs_site.py ---------------------------------------------------------------------- diff --git a/dcs/bin/scripts/parse_dcs_site.py b/dcs/bin/scripts/parse_dcs_site.py index c5947a4..aab5a00 100755 --- a/dcs/bin/scripts/parse_dcs_site.py +++ b/dcs/bin/scripts/parse_dcs_site.py @@ -31,16 +31,8 @@ if not dcsconfig_dir: doc = minidom.parse(dcsconfig_dir+"/dcs-site.xml") props = doc.getElementsByTagName("property") for prop in props: - pname = prop.getElementsByTagName("name")[0] - if (pname.firstChild.data == "dcs.master.port"): - pvalue = prop.getElementsByTagName("value")[0] - dcsPort=pvalue.firstChild.data - print("%s" % (dcsPort)) - if (pname.firstChild.data == "dcs.master.floating.ip.external.ip.address"): - pvalue = prop.getElementsByTagName("value")[0] - float_ipaddress=pvalue.firstChild.data - print("%s" % (float_ipaddress)) - if (pname.firstChild.data == "dcs.master.floating.ip.external.interface"): - pvalue = prop.getElementsByTagName("value")[0] - float_interface=pvalue.firstChild.data - print("%s" % (float_interface)) + tagName = prop.getElementsByTagName ("name")[0] + pname=tagName.childNodes[0].data + tagValue = prop.getElementsByTagName("value")[0] + pvalue=tagValue.childNodes[0].data + print("%s:%s" % (pname,pvalue)) http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/start-dcs.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/start-dcs.sh b/dcs/bin/start-dcs.sh index 8894453..fcd4677 100755 --- a/dcs/bin/start-dcs.sh +++ b/dcs/bin/start-dcs.sh @@ -47,8 +47,8 @@ then fi if [ -z "$master" ] ; then - if [ ! -z "${DCS_PRIMARY_MASTER}" ] && [ -s ${DCS_PRIMARY_MASTER} ] ; then - master_node=`cat ${DCS_PRIMARY_MASTER}| egrep -v '^#|^$'` + if [ ! -z "${DCS_MASTERS}" ] && [ -s ${DCS_MASTERS} ] ; then + master_node=`head -n 1 ${DCS_MASTERS}` if [ ! -z "$master_node" ] ; then master=`echo $master_node | awk '{print $1}'` fi @@ -59,8 +59,9 @@ if [ "$master" == "" ] || [ "$master" == "localhost" ] || [ "$master" == "$(host "$bin"/dcs-daemon.sh --config "${DCS_CONF_DIR}" start master else remote_cmd="cd ${DCS_HOME}; $bin/dcs-daemon.sh --config ${DCS_CONF_DIR} start master" - ssh -q -n $DCS_SSH_OPTS $master $remote_cmd 2>&1 | sed "s/^/$master: /" + L_PDSH="ssh -q -n $DCS_SSH_OPTS" + ${L_PDSH} $master $remote_cmd 2>&1 | sed "s/^/$master: /" fi "$bin"/dcs-daemons.sh --config "${DCS_CONF_DIR}" --hosts "${DCS_SERVERS}" start server -"$bin"/dcs-daemons.sh --config "${DCS_CONF_DIR}" --hosts "${DCS_BACKUP_MASTERS}" start master-backup +"$bin"/dcs-daemons.sh --config "${DCS_CONF_DIR}" --hosts "${DCS_MASTERS}" start master-backup http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/bin/stop-dcs.sh ---------------------------------------------------------------------- diff --git a/dcs/bin/stop-dcs.sh b/dcs/bin/stop-dcs.sh index fbb1818..cdd40ad 100755 --- a/dcs/bin/stop-dcs.sh +++ b/dcs/bin/stop-dcs.sh @@ -36,7 +36,7 @@ then exit $errCode fi -"$bin"/dcs-daemons.sh --config "${DCS_CONF_DIR}" --hosts "${DCS_BACKUP_MASTERS}" stop master-backup +"$bin"/dcs-daemons.sh --config "${DCS_CONF_DIR}" --hosts "${DCS_MASTERS}" stop master-backup master=`$bin/dcs --config "${DCS_CONF_DIR}" org.trafodion.dcs.zookeeper.ZkUtil /$USER/dcs/master|tail -n 1` errCode=$? @@ -49,12 +49,16 @@ then exit $errCode fi -if [ "$master" == "" ] || [ "$master" == "$(hostname -f)" ] ; then - "$bin"/dcs-daemon.sh --config "${DCS_CONF_DIR}" stop master -else + activeMaster=$($DCS_INSTALL_DIR/bin/getActiveMaster.sh) + remote_cmd="cd ${DCS_HOME}; $bin/dcs-daemon.sh --config ${DCS_CONF_DIR} stop master" - ssh -q -n $DCS_SSH_OPTS $master $remote_cmd 2>&1 | sed "s/^/$master: /" -fi + L_PDSH="ssh -q -n $DCS_SSH_OPTS" + + if [[ ! -z $activeMaster ]]; then + ${L_PDSH} $activeMaster $remote_cmd 2>&1 | sed "s/^/$activeMaster: /" + else + ${L_PDSH} $master $remote_cmd 2>&1 | sed "s/^/$master: /" + fi "$bin"/dcs-daemons.sh --config "${DCS_CONF_DIR}" --hosts "${DCS_SERVERS}" stop server "$bin"/dcs-daemons.sh --config "${DCS_CONF_DIR}" stop zookeeper http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/conf/backup-masters ---------------------------------------------------------------------- diff --git a/dcs/conf/backup-masters b/dcs/conf/backup-masters deleted file mode 100644 index e69de29..0000000 http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/conf/master ---------------------------------------------------------------------- diff --git a/dcs/conf/master b/dcs/conf/master deleted file mode 100644 index e69de29..0000000 http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/conf/masters ---------------------------------------------------------------------- diff --git a/dcs/conf/masters b/dcs/conf/masters new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/dcs/src/main/asciidoc/_chapters/configuration.adoc ---------------------------------------------------------------------- diff --git a/dcs/src/main/asciidoc/_chapters/configuration.adoc b/dcs/src/main/asciidoc/_chapters/configuration.adoc index 39180c6..b4a4c3d 100644 --- a/dcs/src/main/asciidoc/_chapters/configuration.adoc +++ b/dcs/src/main/asciidoc/_chapters/configuration.adoc @@ -28,7 +28,7 @@ :experimental: This chapter is the Not-So-Quick start guide to DCS configuration. -Please read this chapter carefully and ensure that all requirements have +Please read this chapter carefully and ensure that all requirements have been satisfied. Failure to do so will cause you (and us) grief debugging strange errors. DCS uses the same configuration mechanism as Apache Hadoop. @@ -36,10 +36,10 @@ All configuration files are located in the _conf/_ directory. [TIP] ==== -Be careful editing XML. Make sure you close all elements. Run your file through +xmllint+ or similar to +Be careful editing XML. Make sure you close all elements. Run your file through +xmllint+ or similar to ensure well-formedness of your document after an edit session. ==== - + .Keep Configuration In Sync Across the Cluster [WARNING] ==== @@ -48,101 +48,71 @@ DCS will not do this for you. Use +rsync+, +scp+, or another secure mechanism fo A restart is needed for servers to pick up changes. ==== -This section lists required services and some required system configuration. - -== Java -.Java -[cols="1,1,1,4", options="header"] -|=== -|DCS Version -|JDK 6 -|JDK 7 -|JDK 8 +This section lists required services and some required system configuration. -|1.1 -|Not Supported -|yes -|Running with JDK 8 has not been tested. +== Java +DCS is configured to use default version of JDK as defined by Trafodion configuration. -|1.0 -|yes -|Not Supported -|Not Supported +[[os]] +== Operating System -|=== +=== ssh -[[os]] -== Operating System - -=== ssh - _ssh_ must be installed and _sshd_ must be running to use DCS's' scripts to manage remote DCS daemons. You must be able to ssh to all nodes, including your local node, using passwordless login (Google "ssh passwordless login"). -=== DNS +=== DNS Both forward and reverse DNS resolving should work. If your machine has multiple interfaces, DCS will use the interface that the primary hostname resolves to. -=== Loopback IP +=== Loopback IP DCS expects the loopback IP address to be 127.0.0.1. Ubuntu and some other distributions, for example, will default to 127.0.1.1 and this will cause problems for you. _/etc/hosts_ should look something like this: ---- 127.0.0.1 localhost 127.0.0.1 ubuntu.ubuntu-domain ubuntu ---- - -=== NTP +=== NTP The clocks on cluster members should be in basic alignments. Some skew is tolerable but wild skew could generate odd behaviors. Run link:http://en.wikipedia.org/wiki/Network_Time_Protocol[NTP] on your cluster, or an equivalent. -=== Windows -DCS is not supported on Windows. +== Run modes -== Run modes - -=== Single Node +=== Single Node This is the default mode. Single node is what is described in the <<quickstart,quickstart>> section. In single node, it runs all DCS daemons and a local ZooKeeper all on the same node. Zookeeper binds to a well known port. -=== Multi Node +=== Multi Node Multi node is where the daemons are spread across all nodes in the cluster. Before proceeding, ensure you have a working Trafodion instance. -Below we describe the different setups. Starting, verification and exploration of your install. Configuration is described in a -section that follows, <<running.and.confirming.your.installation,Running and Confirming Your Installation>>. +Below sections describe the various configuration files that needs to be set up for starting DCS processes. Configuration is described in a +section that follows, <<running.and.confirming.your.installation,Running and Confirming Your Installation>>. To set up a multi-node deploy, you will need to configure DCS by editing files in the DCS _conf_ directory. - + You may need to edit _conf/dcs-env.sh_ to tell DCS which _java_ to use. In this file you set DCS environment variables such as the heap size and other options for the _JVM_, the preferred location for log files, etc. Set `JAVA_HOME` to point at the root of your _java_ install. -==== _servers_ +==== _servers_ -In addition, a multi-node deploy requires that you -modify _conf/servers_. The _servers_ file lists all hosts that you would have running -DcsServers, one host per line or the host name followed by the number of master executor servers. +A multi-node deploy requires that you modify _conf/servers_. The _servers_ file lists all hosts that you would have running +DcsServers, one host per line or the host name followed by the number of master executor servers. All servers listed in this file will be started and stopped when DCS start or stop is run. -==== _backup-masters_ - -The _backup-masters_ file lists all hosts that you would have running -backup DcsMaster processes, one host per line. All servers listed in this file will be started -and stopped when DCS start or stop is run. +==== _masters_ -==== _master_ +The _masters_ file lists the host of the primary and backup DcsMaster processes, one host per line. All servers listed +in this file will be started and stopped when DCS stop and start is run. -The _master_ file lists the host of the primary DcsMaster process. Only one host is allowed to -be the primary master. The server listed in this file will be started -and stopped when DCS start or stop is run. - -==== ZooKeeper and DCS +==== ZooKeeper and DCS See section <<zookeeper,Zookeeper>> for ZooKeeper setup for DCS. [[running.and.confirming.your.installation]] -=== Running and Confirming Your Installation +=== Running and Confirming Your Installation -Make sure Trafodion is running first. Start and stop the Trafodion instance by running _sqstart.sh_ over in the -`TRAF_HOME/sql/scripts` directory. You can ensure it started properly by testing with _sqcheck_. +Before you start DCS make sure Trafodion is up and running first. Start and stop the Trafodion instance by running _sqstart.sh_ over in the +`$TRAF_HOME/sql/scripts` directory. You can ensure it started properly by testing with _sqcheck_. If you are managing your own ZooKeeper, start it and confirm its running else, DCS will start up ZooKeeper for you as part of its start process. @@ -151,17 +121,17 @@ Start DCS with the following command: bin/start-dcs.sh ---- -Run the above from the `DCS_HOME` directory. +Run the above from the `DCS_HOME` directory. You should now have a running DCS instance. DCS logs can be found in the _logs_ subdirectory. Check them out especially if DCS had trouble starting. -DCS also puts up a UI listing vital attributes and metrics. By default its deployed on the DcsMaster -host at port 24410 (DcsServers put up an informational http server at 24430+their instance number). +DCS also puts up a UI listing vital attributes and metrics. By default its deployed on the DcsMaster +host at port 24410 (DcsServers put up an informational http server at 24430+their instance number). If the DcsMaster were running on a host named `master.example.org` on the default port, to see the DcsMaster's homepage you'd point your browser at _http://master.example.org:24410_. - + To stop DCS after exiting the DCS shell enter ---- ./bin/stop-dcs.sh @@ -170,12 +140,12 @@ stopping dcs............... Shutdown can take a moment to complete. It can take longer if your cluster is comprised of many machines. [[zookeeper]] -== ZooKeeper +== ZooKeeper DCS depends on a running ZooKeeper cluster.All participating nodes and clients need to be able to access the running ZooKeeper ensemble. DCS by default manages a ZooKeeper "cluster" for you. It will start and stop the ZooKeeper ensemble as part of the DCS start/stop process. You can also manage the ZooKeeper ensemble independent of DCS and just point DCS at -the cluster it should use. To toggle DCS management of ZooKeeper, use the `DCS_MANAGES_ZK` variable in +the cluster it should use. To toggle DCS management of ZooKeeper, use the `DCS_MANAGES_ZK` variable in _conf/dcs-env.sh_. This variable, which defaults to `true`, tells DCS whether to start/stop the ZooKeeper ensemble servers as part of DCS start/stop. @@ -185,22 +155,22 @@ _conf/dcs-site.xml_. A ZooKeeper configuration option can be set as a property i _dcs-site.xml_ XML configuration file by prefacing the ZooKeeper option name with `dcs.zookeeper.property`. For example, the `clientPort` setting in ZooKeeper can be changed by setting the `dcs.zookeeper.property.clientPort` property. For all default values used by DCS, including ZooKeeper -configuration, see section <<dcs_default_configurations,DCS Default Configuration>>. Look for the `dcs.zookeeper.property` prefix +configuration, see section <<dcs_default_configurations,DCS Default Configuration>>. Look for the `dcs.zookeeper.property` prefix For the full list of ZooKeeper configurations, see ZooKeeper's _zoo.cfg_. DCS does not ship with a _zoo.cfg_ so you will need to browse the _conf_ directory in an appropriate ZooKeeper download. You must at least list the ensemble servers in _dcs-site.xml_ using the `dcs.zookeeper.quorum` property. This property -defaults to a single ensemble member at `localhost` which is not suitable for a fully distributed DCS. +defaults to a single ensemble member at `localhost` which is not suitable for a fully distributed DCS. (It binds to the local machine only and remote clients will not be able to connect). How many ZooKeepers should I run? You can run a ZooKeeper ensemble that comprises 1 node only but in production it is recommended that you run a ZooKeeper ensemble of 3, 5 or 7 machines; the more members a nensemble has, the more tolerant the ensemble is of host -failures. Also, run an odd number of machines. In ZooKeeper, an even number of peers is supported, but it is normally not used -because an even sized ensemble requires, proportionally, more peers to form a quorum than an odd sized ensemble requires. For example, an -ensemble with 4 peers requires 3 to form a quorum, while an ensemble with 5 also requires 3 to form a quorum. Thus, an ensemble of 5 allows 2 peers to -fail, and thus is more fault tolerant than the ensemble of 4, which allows only 1 down peer. +failures. Also, run an odd number of machines. In ZooKeeper, an even number of peers is supported, but it is normally not used +because an even sized ensemble requires, proportionally, more peers to form a quorum than an odd sized ensemble requires. For example, an +ensemble with 4 peers requires 3 to form a quorum, while an ensemble with 5 also requires 3 to form a quorum. Thus, an ensemble of 5 allows 2 peers to +fail, and thus is more fault tolerant than the ensemble of 4, which allows only 1 down peer. Give each ZooKeeper server around 1GB of RAM, and if possible, its own dedicated disk (A dedicated disk is the best thing you can do to ensure a performant ZooKeeper ensemble). For very heavily loaded clusters, run ZooKeeper servers on separate machines @@ -211,8 +181,8 @@ port 2222 (the default is 2181) ensure `DCS_MANAGE_ZK` is commented out or set t and then edit _conf/dcs-site.xml_ and set `dcs.zookeeper.property.clientPort` and `dcs.zookeeper.quorum`. You should also set `dcs.zookeeper.property.dataDir` to other than the default as the default has ZooKeeper persist data under _/tmp_ which is often cleared on system -restart. In the example below we have ZooKeeper persist to _/user/local/zookeeper_. - +restart. In the example below we have ZooKeeper persist to _/user/local/zookeeper_. + [source,xml] ---- <configuration> @@ -250,16 +220,16 @@ restart. In the example below we have ZooKeeper persist to _/user/local/zookeepe === Using existing ZooKeeper ensemble To point DCS at an existing ZooKeeper cluster, one that is not managed by DCS, uncomment and set `DCS_MANAGES_ZK` -in _conf/dcs-env.sh_ to `false` +in _conf/dcs-env.sh_ to `false` -[source,console] +[source,console] ---- # Tell DCS whether it should manage it's own instance of Zookeeper or not. export DCS_MANAGES_ZK=false ---- - + Next set ensemble locations and client port, if non-standard, in -_dcs-site.xml_, or add a suitably configured _zoo.cfg_ to DCS's _CLASSPATH_. +_dcs-site.xml_, or add a suitably configured _zoo.cfg_ to DCS's _CLASSPATH_. DCS will prefer the configuration found in _zoo.cfg_ over any settings in _dcs-site.xml_. When DCS manages ZooKeeper, it will start/stop the @@ -278,28 +248,28 @@ ZooKeeper cluster, unrelated to DCS. Just make sure to uncomment and set DCS shuts down, it doesn't take ZooKeeper down with it. For more information about running a distinct ZooKeeper -cluster, see the link:http://hadoop.apache.org/zookeeper/docs/current/zookeeperStarted.html[ZooKeeper Getting Started Guide]. -Additionally, see the link:http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A7[ZooKeeper Wiki] or the -link:http://zookeeper.apache.org/doc/r3.3.3/zookeeperAdmin.html#sc_zkMulitServerSetup[ZooKeeper documentation] +cluster, see the link:http://hadoop.apache.org/zookeeper/docs/current/zookeeperStarted.html[ZooKeeper Getting Started Guide]. +Additionally, see the link:http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A7[ZooKeeper Wiki] or the +link:http://zookeeper.apache.org/doc/r3.3.3/zookeeperAdmin.html#sc_zkMulitServerSetup[ZooKeeper documentation] for more information on ZooKeeper sizing. - + == Configuration Files - + === _dcs-site.xml_ and _dcs-default.xml_ You add site-specific configuration to the _dcs-site.xml_ file, for DCS, site specific customizations go into the file _conf/dcs-site.xml_. For the list of configurable properties, see <<dcs_default_configurations,DCS Default Configuration>> below or view the raw _dcs-default.xml_ source file in the DCS source code at _src/main/resources_. Not all configuration options make it out to _dcs-default.xml_. Configuration -that it is thought rare anyone would change can exist only in code; the only way +that it is thought rare anyone would change can exist only in code; the only way to turn up such configurations is via a reading of the source code itself. - + Currently, changes here will require a cluster restart for DCS to notice the change. - + //The file dcs-default.xml is generated as part of the build of the dcs site. See the dcs pom.xml. //The generated file is an asciidoc file. // dcs/src/main/asciidoc -// +// include::../../../../target/asciidoc/dcs-default.adoc[] === _dcs-env.sh_ @@ -307,21 +277,19 @@ Set DCS environment variables in this file. Examples include options to pass the an DCS daemon such as heap size and garbage collector configs. You can also set configurations for DCS configuration, log directories, niceness, ssh options, where to locate process pid files, etc. Open the file at _conf/dcs-env.sh_ and peruse its content. Each option is fairly well documented. Add your own environment variables here if you want them read by DCS daemons on startup. Changes done to this file requires restart of DCS. - + === _log4j.properties_ Edit this file to change rate at which DCS files are rolled over and, to change the level at which DCS logs messages. Changes done to this file will require restart of DCS. -=== _master_ -A plain-text file which lists hostname or host IP address on which the primary master process should be started. Only one host is allowed to be the primary master - -=== _backup-masters_ -A plain-text file which lists hosts on which the backup master process should be started. Only one host per line is allowed +=== _masters_ +A plain-text file which lists of hostname or host IP address on which the primary and backup master process should be started. The first entry will be the primary DcsMaster and the renamining lines +will be the backup DcsMaster nodes. Only one host per line is allowed === _servers_ A plain-text file which lists hosts on which the DcsServer server process should be started. Only one host per line or the host name followed by the count or number of master executor servers. All servers listed in this file will be started and stopped when DCS start or stop is run. - -== Example Configurations + +== Example Configurations === Basic Distributed DCS Install @@ -357,9 +325,9 @@ _servers_, and _dcs-env.sh_, found in the DCS _conf_ directory might look like. ==== _servers_ -In this file, you list the nodes that will run DcsServers. In this case, +In this file, you list the nodes that will run DcsServers. In this case, there are two DcsServrs per node each starting a single mxosrvr: -[source,console] +[source,console] ---- example1 example2 @@ -380,21 +348,17 @@ Alternatively, you can list the nodes followed by the number of mxosrvrs: example4 2 ---- -==== _master_ - -In this file, you list the node that will run primary DcsMasters. -[source,console] ----- - example4 ----- +==== _masters_ -==== _backup-masters_ - -In this file, you list the nodes that will run backup DcsMasters. In this case, -there is a backup master running on the second node: -[source,console] +In this file, you list all the nodes that will run DcsMasters. The first entry +will be the primary DcsMaster and the remaining nodes will be the backup DcsMasters. +In the below example, host4 will be the primary DcsMaster node and host5 and host6 are +the backup DcsMaster nodes +[source,console] ---- - example2 + host4 + host5 + host6 ---- ==== _dcs-env.sh_ @@ -403,7 +367,7 @@ Below we use a _diff_ to show the differences from default in the _dcs-env.sh_ f are setting the DCS heap to be 4G instead of the default 128M. [source,console] ----- +---- $ git diff dcs-env.sh diff --git a/conf/dcs-env.sh b/conf/dcs-env.sh index e70ebc6..96f8c27 100644 @@ -411,58 +375,58 @@ index e70ebc6..96f8c27 100644 +++ b/conf/dcs-env.sh @@ -31,7 +31,7 @@ export JAVA_HOME=/usr/java/jdk1.7.0/ # export DCS_CLASSPATH= - + # The maximum amount of heap to use, in MB. Default is 128. -# export DCS_HEAPSIZE=128 +export DCS_HEAPSIZE=4096 - + # Extra Java runtime options. # Below are what we set by default. May only work with SUN JVM. ---- - + Use _rsync_ to copy the content of the _conf_ directory to all nodes of the cluster. - + [[ha.configurations]] -== High Availability(HA) Configuration -The master configuration file for DcsMaster may be configured by adding the host name to the _conf/master_ file. If the master is -configured to start on the remote node then, during start of dcs the primary master will be started on the remote -node. If the _conf/master_ file is empty then the primary master will be started on the host where the dcs start script was run. -Similarly, DcsMaster backup servers may be configured by adding host names to the _conf/backup-masters_ file. They are -started and stopped automatically by the _bin/master-backup.sh_ script whenever DCS is started or stopped. Every backup +== High Availability(HA) Configuration +The master configuration file for DcsMaster may be configured by adding the host name to the _conf/masters_ file. If the master is +configured to start on the remote node then, during start of dcs the primary master will be started on the remote +node. If the _conf/masters_ file is empty then the primary master will be started on the host where the dcs start script was run. +Similarly, DcsMaster backup servers may be configured by adding additional host names to the _conf/masters_ file. They are +started and stopped automatically by the _bin/master-backup.sh_ script whenever DCS is started or stopped. Every backup DcsMaster follows the current leader DcsMaster watching for it to fail. If failure of the leader occurs, first backup DcsMaster in line for succession checks to see if floating IP is enabled. If enabled it executes the _bin/scripts/dcsbind.sh_ script to add a floating IP address to an interface on its node. It then continues with normal initialization and eventually starts listening for new client connections. It may take several seconds for the take over to complete. When a failed node is restored a new DcsMaster backup may -be started manually by executing the _bin/dcs-daemon.sh_ script on the restored node. +be started manually by executing the _dcstart_ script from any node. ---- ->bin/dcs-daemon.sh start master ----- +>`TRAF_HOME/sql/scripts/dcsstart` +---- The newly created DcsMaster backup process will take its place at the back of the line waiting for the current DcsMaster leader to fail. - + === `dcs.master.port` The default value is 23400. This is the port the DcsMaster listener binds to waiting for JDBC/ODBC T4 client connections. The value may need to be changed if this port number conflicts with other ports in use on your cluster. - + To change this configuration, edit _conf/dcs-site.xml_, copy the changed file around the cluster and restart. === `dcs.master.port.range` The default value is 100. This is the total number of ports that MXOSRVRs will scan trying to find an available port to use. You must ensure the value is large enough to support the -number of MXOSRVRs configured in _conf/servers_. +number of MXOSRVRs configured in _conf/servers_. === `dcs.master.floating.ip` -The default value is false. When set to true the floating IP feature in the DcsMaster is enabled via the _bin/dcsbind.sh_ script. +The default value is false. When set to true the floating IP feature in the DcsMaster is enabled via the _bin/dcsbind.sh_ script. This allows backup DcsMaster to take over and set the floating IP address. === `dcs.master.floating.ip.external.interface` -There is no default value. You must ensure the value contains the correct interface for your network configuration. +There is no default value. You must ensure the value contains the correct interface for your network configuration. === `dcs.master.floating.ip.external.ip.address` There is no default value. It is important that you set this to the dotted IP address appropriate for your network. - + To change this configuration, edit _dcs-site.xml_, copy the changed file to all nodes in the cluster and restart dcs. http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/configuration/dcs-env.xml ---------------------------------------------------------------------- diff --git a/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/configuration/dcs-env.xml b/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/configuration/dcs-env.xml index d44366d..b168db1 100644 --- a/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/configuration/dcs-env.xml +++ b/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/configuration/dcs-env.xml @@ -101,11 +101,8 @@ export DCS_OPTS="-XX:+UseConcMarkSweepGC" # export DCS_REST_OPTS="$DCS_REST_OPTS $DCS_JMX_BASE -Dcom.sun.management.jmxremote.port=10103" # export DCS_ZOOKEEPER_OPTS="$DCS_ZOOKEEPER_OPTS $DCS_JMX_BASE -Dcom.sun.management.jmxremote.port=10104" -# File naming host on which DCS Primary Master is configured to run. $DCS_HOME/conf/master by default. -# export DCS_PRIMARY_MASTER=${DCS_HOME}/conf/master - -# File naming hosts on which DCS Backup Masters is configured to run. $DCS_HOME/conf/backup-masters by default. -# export DCS_BACKUP_MASTERS=${DCS_HOME}/conf/backup-masters +# File naming hosts on which DCS Masters is configured to run. $DCS_HOME/conf/masters by default. +# export DCS_MASTERS=${DCS_HOME}/conf/masters # File naming hosts on which DCS Servers will run. $DCS_HOME/conf/servers by default. # export DCS_SERVERS=${DCS_HOME}/conf/servers http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/package/scripts/trafodionnode.py ---------------------------------------------------------------------- diff --git a/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/package/scripts/trafodionnode.py b/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/package/scripts/trafodionnode.py index 584d25d..642851e 100755 --- a/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/package/scripts/trafodionnode.py +++ b/install/ambari-installer/traf-mpack/common-services/TRAFODION/2.1/package/scripts/trafodionnode.py @@ -138,15 +138,8 @@ class Node(Script): content = InlineTemplate(params.dcs_log4j_template), mode=0644) - serverlist = params.dcs_mast_node_list[0] + '\n' - File(os.path.join(trafhome,"master"), - owner = params.traf_user, - group = params.traf_group, - content = serverlist, - mode=0644) - - serverlist = '\n'.join(params.dcs_back_node_list) + '\n' - File(os.path.join(trafhome,"backup-masters"), + serverlist = '\n'.join(params.dcs_mast_node_list[1:len(params.dcs_mast_node_list)]) + '\n' + File(os.path.join(trafhome,"masters"), owner = params.traf_user, group = params.traf_group, content = serverlist, @@ -172,7 +165,7 @@ class Node(Script): owner=params.traf_user, mode=0644) # install DCS conf files - cmd = "source ~/.bashrc ; mv -f ~/dcs-env.sh ~/log4j.properties ~/dcs-site.xml ~/master ~/backup-masters ~/servers $DCS_INSTALL_DIR/conf/" + cmd = "source ~/.bashrc ; mv -f ~/dcs-env.sh ~/log4j.properties ~/dcs-site.xml ~/masters ~/servers $DCS_INSTALL_DIR/conf/" Execute(cmd,user=params.traf_user) XmlConfig("rest-site.xml", http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f48d073/install/python-installer/scripts/dcs_setup.py ---------------------------------------------------------------------- diff --git a/install/python-installer/scripts/dcs_setup.py b/install/python-installer/scripts/dcs_setup.py index 0d94da0..239d953 100755 --- a/install/python-installer/scripts/dcs_setup.py +++ b/install/python-installer/scripts/dcs_setup.py @@ -39,8 +39,7 @@ def run(): dcs_conf_dir = '%s/dcs-%s/conf' % (traf_home, traf_ver) dcs_srv_file = dcs_conf_dir + '/servers' - dcs_master_file = dcs_conf_dir + '/master' - dcs_bkmaster_file = dcs_conf_dir + '/backup-masters' + dcs_master_file = dcs_conf_dir + '/masters' dcs_site_file = dcs_conf_dir + '/dcs-site.xml' rest_site_file = '%s/rest-%s/conf/rest-site.xml' % (traf_home, traf_ver) @@ -57,7 +56,7 @@ def run(): ### modify dcs config files ### # modify master dcs_master = nodes[0] - append_file(dcs_master_file, dcs_master) + append_file(dcs_master_file, dcs_master+'\n') # modify dcs-site.xml net_interface = run_cmd('ip route |grep default|awk \'{print $5}\'') @@ -82,9 +81,9 @@ def run(): dcs_floating_ip_cfg = 'export DCS_MASTER_FLOATING_IP=%s' % dcs_floating_ip append_file(TRAF_CFG_FILE, dcs_floating_ip_cfg) - # modify backup_master + # modify master with backup master host for dcs_backup_node in dcs_backup_nodes.split(','): - append_file(dcs_bkmaster_file, dcs_backup_node) + append_file(dcs_master_file, dcs_backup_node) p.write_xml()
