dlmarion commented on code in PR #5439:
URL: https://github.com/apache/accumulo/pull/5439#discussion_r2060155927


##########
server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java:
##########
@@ -80,8 +89,21 @@ static class Opts extends Help {
     boolean zapCompactors = false;
     @Parameter(names = "-sservers", description = "remove scan server locks")
     boolean zapScanServers = false;
+    @Parameter(names = "--gc", description = "remove gc server locks")
+    boolean zapGc = false;
+    @Parameter(names = "--monitor", description = "remove monitor server 
locks")
+    boolean zapMonitor = false;

Review Comment:
   `--gc` should probably be added to 
[MiniAccumuloClusterImpl](https://github.com/apache/accumulo/blob/2.1/minicluster/src/main/java/org/apache/accumulo/miniclusterImpl/MiniAccumuloClusterImpl.java#L840).
 Both should be added to 
[UpgradeUtil](https://github.com/apache/accumulo/blob/2.1/server/base/src/main/java/org/apache/accumulo/server/util/UpgradeUtil.java#L135)



##########
assemble/bin/accumulo-cluster:
##########
@@ -646,10 +655,170 @@ function control_services() {
   if [[ $ARG_LOCAL == 0 && $ARG_ALL == 1 && ($operation == "stop" || 
$operation == "kill") ]]; then
     if ! isDebug; then
       echo "Cleaning all server entries in ZooKeeper"
-      "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager 
-tservers -compaction-coordinators -compactors -sservers
+      "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager 
-tservers -compaction-coordinators -compactors -sservers --gc --monitor
+    fi
+  fi
+
+}
+
+function prune_group() {
+  local service_type=$1
+  local group=$2
+  local expectedCount=$3
+  declare -a hosts
+  read -r -a hosts <<<"$4"
+
+  if isDebug; then
+    echo "$(blue DEBUG) starting prune for service:$service_type group:$group 
expected:$expectedCount"
+  fi
+
+  if [ -z ${AC_TMP_DIR+x} ]; then
+    echo "$(red ERROR): AC_TMP_DIR is not set"
+    exit 1
+  fi
+  local 
exclude_file="$AC_TMP_DIR/accumulo-zoozap-exclude-$service_type-$group.txt"
+  touch "$exclude_file"
+
+  # Determine the host:ports known by the accumulo cluster script, these 
should be kept
+  for host in "${hosts[@]}"; do
+    "${SSH[@]}" "$host" bash -c "'$bin/accumulo-service $service_type list'" | 
grep -E "^[a-zA-Z0-9]+_${group}_[0-9]+" | head -n "$expectedCount" | awk 
'{print $3}' | tr ',' '\n' | awk '{print "'"$host"':" $1}' >>"$exclude_file"
+  done
+
+  local lockTypeOpt
+  case $service_type in
+    manager)
+      lockTypeOpt="-manager"
+      ;;
+    compaction-coordinator)
+      lockTypeOpt="-compaction-coordinators"
+      ;;
+    compactor)
+      lockTypeOpt="-compactors"
+      ;;
+    tserver)
+      lockTypeOpt="-tservers"
+      ;;
+    sserver)
+      lockTypeOpt="-sservers"
+      ;;
+    gc)
+      lockTypeOpt="--gc"
+      ;;
+    monitor)
+      lockTypeOpt="--monitor"
+      ;;
+    *)
+      echo "Prune does not support $service_type"
+      exit 1
+      ;;
+  esac
+
+  if isDebug; then
+    "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap "$lockTypeOpt" 
-verbose --include-groups "$group" --exclude-host-ports "$exclude_file" 
--dry-run
+  else
+    "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap "$lockTypeOpt" 
-verbose --include-groups "$group" --exclude-host-ports "$exclude_file"
+  fi
+}
+
+# Kills extra server processes that are not needed according to the
+# cluster.yaml file.  Conceptually this code is trying to reconcile the
+# following three sets of servers.
+#
+#  1. The notional goal set of servers specified by cluster.yaml
+#  2. The set of servers processes seen in zookeeper
+#  3. The set of server processes known to the accumulo-cluster script.  This
+#     is derived from pid files on hosts in set 1.
+#
+# This function attempts to find extra servers in set 2 that are not specified
+# by set 1.  When it does find extra servers it removes their zookeeper locks
+# avoiding removing locks of servers in set 3. The following are different
+# situations the code will see and handle.
+#
+#  * When a host is not cluster.yaml but has some processes listed in
+#    zookeeper.  For this case all of the process with that host can be killed.
+#  * When a resource group is not in cluster.yaml but has some processes listed
+#    in zookeeper.  For this case all of the processes with that resource group
+#    can be killed.
+#  * When a host is in cluster.yaml with a target of 3 processes but has 6
+#    processes listed in zookeeper.  For this case want to kill 3 processes 
that
+#    do not have pid files on the host.
+#
+function prune() {
+  if [[ $ARG_LOCAL == 1 ]]; then
+    # Currently the code is structured to remove all extra servers in a single 
resource group.  Finer granularity is not supported.
+    echo "$(red ERROR): Prune does not support running locally"
+    exit 1
+  fi
+
+  if [[ -z ${AC_TMP_DIR+x} ]]; then
+    echo "AC_TMP_DIR is not set"
+    exit 1
+  fi
+  local service_json="$AC_TMP_DIR/accumulo-service.json"
+  "$accumulo_cmd" admin serviceStatus --json >"$service_json" 2>/dev/null || 
exit 1
+
+  local var_name
+  local hosts
+  declare -a groups
+
+  local manager
+  if [[ $ARG_ALL == 1 || $ARG_MANAGER == 1 ]]; then
+    prune_group "manager" "default" "1" "$MANAGER_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_GC == 1 ]]; then
+    prune_group "gc" "default" "1" "$GC_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_MONITOR == 1 ]]; then
+    prune_group "monitor" "default" "1" "$MONITOR_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_COORDINATOR == 1 ]]; then
+    prune_group "compaction-coordinator" "default" "1" "$COORDINATOR_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_TSERVER == 1 ]]; then
+    #TODO in main need to adapt to having RGs for tservers
+    prune_group "tserver" "default" "$TSERVERS_PER_HOST_default" 
"$TSERVER_HOSTS_default"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_SSERVER == 1 ]]; then
+    groups=()
+    if [[ -n $ARG_SSERVER_GROUP ]]; then
+      read -r -a groups <<<"$ARG_SSERVER_GROUP"
+    else
+      # find all groups known in zookeeper, this will allow pruning entire 
groups that do not even exists in cluster.yaml

Review Comment:
   ```suggestion
         # find all groups known in zookeeper, this will allow pruning entire 
groups that do not even exist in cluster.yaml
   ```



##########
assemble/bin/accumulo-cluster:
##########
@@ -646,10 +655,170 @@ function control_services() {
   if [[ $ARG_LOCAL == 0 && $ARG_ALL == 1 && ($operation == "stop" || 
$operation == "kill") ]]; then
     if ! isDebug; then
       echo "Cleaning all server entries in ZooKeeper"
-      "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager 
-tservers -compaction-coordinators -compactors -sservers
+      "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager 
-tservers -compaction-coordinators -compactors -sservers --gc --monitor
+    fi
+  fi
+
+}
+
+function prune_group() {
+  local service_type=$1
+  local group=$2
+  local expectedCount=$3
+  declare -a hosts
+  read -r -a hosts <<<"$4"
+
+  if isDebug; then
+    echo "$(blue DEBUG) starting prune for service:$service_type group:$group 
expected:$expectedCount"
+  fi
+
+  if [ -z ${AC_TMP_DIR+x} ]; then
+    echo "$(red ERROR): AC_TMP_DIR is not set"
+    exit 1
+  fi
+  local 
exclude_file="$AC_TMP_DIR/accumulo-zoozap-exclude-$service_type-$group.txt"
+  touch "$exclude_file"
+
+  # Determine the host:ports known by the accumulo cluster script, these 
should be kept
+  for host in "${hosts[@]}"; do
+    "${SSH[@]}" "$host" bash -c "'$bin/accumulo-service $service_type list'" | 
grep -E "^[a-zA-Z0-9]+_${group}_[0-9]+" | head -n "$expectedCount" | awk 
'{print $3}' | tr ',' '\n' | awk '{print "'"$host"':" $1}' >>"$exclude_file"
+  done
+
+  local lockTypeOpt
+  case $service_type in
+    manager)
+      lockTypeOpt="-manager"
+      ;;
+    compaction-coordinator)
+      lockTypeOpt="-compaction-coordinators"
+      ;;
+    compactor)
+      lockTypeOpt="-compactors"
+      ;;
+    tserver)
+      lockTypeOpt="-tservers"
+      ;;
+    sserver)
+      lockTypeOpt="-sservers"
+      ;;
+    gc)
+      lockTypeOpt="--gc"
+      ;;
+    monitor)
+      lockTypeOpt="--monitor"
+      ;;
+    *)
+      echo "Prune does not support $service_type"
+      exit 1
+      ;;
+  esac
+
+  if isDebug; then
+    "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap "$lockTypeOpt" 
-verbose --include-groups "$group" --exclude-host-ports "$exclude_file" 
--dry-run
+  else
+    "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap "$lockTypeOpt" 
-verbose --include-groups "$group" --exclude-host-ports "$exclude_file"
+  fi
+}
+
+# Kills extra server processes that are not needed according to the
+# cluster.yaml file.  Conceptually this code is trying to reconcile the
+# following three sets of servers.
+#
+#  1. The notional goal set of servers specified by cluster.yaml
+#  2. The set of servers processes seen in zookeeper
+#  3. The set of server processes known to the accumulo-cluster script.  This
+#     is derived from pid files on hosts in set 1.
+#
+# This function attempts to find extra servers in set 2 that are not specified
+# by set 1.  When it does find extra servers it removes their zookeeper locks
+# avoiding removing locks of servers in set 3. The following are different
+# situations the code will see and handle.
+#
+#  * When a host is not cluster.yaml but has some processes listed in
+#    zookeeper.  For this case all of the process with that host can be killed.
+#  * When a resource group is not in cluster.yaml but has some processes listed
+#    in zookeeper.  For this case all of the processes with that resource group
+#    can be killed.
+#  * When a host is in cluster.yaml with a target of 3 processes but has 6
+#    processes listed in zookeeper.  For this case want to kill 3 processes 
that
+#    do not have pid files on the host.
+#
+function prune() {
+  if [[ $ARG_LOCAL == 1 ]]; then
+    # Currently the code is structured to remove all extra servers in a single 
resource group.  Finer granularity is not supported.
+    echo "$(red ERROR): Prune does not support running locally"
+    exit 1
+  fi
+
+  if [[ -z ${AC_TMP_DIR+x} ]]; then
+    echo "AC_TMP_DIR is not set"
+    exit 1
+  fi
+  local service_json="$AC_TMP_DIR/accumulo-service.json"
+  "$accumulo_cmd" admin serviceStatus --json >"$service_json" 2>/dev/null || 
exit 1
+
+  local var_name
+  local hosts
+  declare -a groups
+
+  local manager
+  if [[ $ARG_ALL == 1 || $ARG_MANAGER == 1 ]]; then
+    prune_group "manager" "default" "1" "$MANAGER_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_GC == 1 ]]; then
+    prune_group "gc" "default" "1" "$GC_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_MONITOR == 1 ]]; then
+    prune_group "monitor" "default" "1" "$MONITOR_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_COORDINATOR == 1 ]]; then
+    prune_group "compaction-coordinator" "default" "1" "$COORDINATOR_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_TSERVER == 1 ]]; then
+    #TODO in main need to adapt to having RGs for tservers
+    prune_group "tserver" "default" "$TSERVERS_PER_HOST_default" 
"$TSERVER_HOSTS_default"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_SSERVER == 1 ]]; then
+    groups=()
+    if [[ -n $ARG_SSERVER_GROUP ]]; then
+      read -r -a groups <<<"$ARG_SSERVER_GROUP"
+    else
+      # find all groups known in zookeeper, this will allow pruning entire 
groups that do not even exists in cluster.yaml
+      readarray -t groups < <(jq -r ".summaries.S_SERVER.resourceGroups | .[] 
" "$service_json")

Review Comment:
   In `parse_args` we check that `getopt` exists to fail-fast. We might want to 
do the same thing here in `prune`, do a check at the start of the method that 
`jq` exists and fail with a helpful message if not.



##########
assemble/bin/accumulo-cluster:
##########
@@ -646,10 +655,170 @@ function control_services() {
   if [[ $ARG_LOCAL == 0 && $ARG_ALL == 1 && ($operation == "stop" || 
$operation == "kill") ]]; then
     if ! isDebug; then
       echo "Cleaning all server entries in ZooKeeper"
-      "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager 
-tservers -compaction-coordinators -compactors -sservers
+      "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager 
-tservers -compaction-coordinators -compactors -sservers --gc --monitor
+    fi
+  fi
+
+}
+
+function prune_group() {
+  local service_type=$1
+  local group=$2
+  local expectedCount=$3
+  declare -a hosts
+  read -r -a hosts <<<"$4"
+
+  if isDebug; then
+    echo "$(blue DEBUG) starting prune for service:$service_type group:$group 
expected:$expectedCount"
+  fi
+
+  if [ -z ${AC_TMP_DIR+x} ]; then
+    echo "$(red ERROR): AC_TMP_DIR is not set"
+    exit 1
+  fi
+  local 
exclude_file="$AC_TMP_DIR/accumulo-zoozap-exclude-$service_type-$group.txt"
+  touch "$exclude_file"
+
+  # Determine the host:ports known by the accumulo cluster script, these 
should be kept
+  for host in "${hosts[@]}"; do
+    "${SSH[@]}" "$host" bash -c "'$bin/accumulo-service $service_type list'" | 
grep -E "^[a-zA-Z0-9]+_${group}_[0-9]+" | head -n "$expectedCount" | awk 
'{print $3}' | tr ',' '\n' | awk '{print "'"$host"':" $1}' >>"$exclude_file"
+  done
+
+  local lockTypeOpt
+  case $service_type in
+    manager)
+      lockTypeOpt="-manager"
+      ;;
+    compaction-coordinator)
+      lockTypeOpt="-compaction-coordinators"
+      ;;
+    compactor)
+      lockTypeOpt="-compactors"
+      ;;
+    tserver)
+      lockTypeOpt="-tservers"
+      ;;
+    sserver)
+      lockTypeOpt="-sservers"
+      ;;
+    gc)
+      lockTypeOpt="--gc"
+      ;;
+    monitor)
+      lockTypeOpt="--monitor"
+      ;;
+    *)
+      echo "Prune does not support $service_type"
+      exit 1
+      ;;
+  esac
+
+  if isDebug; then
+    "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap "$lockTypeOpt" 
-verbose --include-groups "$group" --exclude-host-ports "$exclude_file" 
--dry-run
+  else
+    "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap "$lockTypeOpt" 
-verbose --include-groups "$group" --exclude-host-ports "$exclude_file"
+  fi
+}
+
+# Kills extra server processes that are not needed according to the
+# cluster.yaml file.  Conceptually this code is trying to reconcile the
+# following three sets of servers.
+#
+#  1. The notional goal set of servers specified by cluster.yaml
+#  2. The set of servers processes seen in zookeeper
+#  3. The set of server processes known to the accumulo-cluster script.  This
+#     is derived from pid files on hosts in set 1.
+#
+# This function attempts to find extra servers in set 2 that are not specified
+# by set 1.  When it does find extra servers it removes their zookeeper locks
+# avoiding removing locks of servers in set 3. The following are different
+# situations the code will see and handle.
+#
+#  * When a host is not cluster.yaml but has some processes listed in
+#    zookeeper.  For this case all of the process with that host can be killed.
+#  * When a resource group is not in cluster.yaml but has some processes listed
+#    in zookeeper.  For this case all of the processes with that resource group
+#    can be killed.
+#  * When a host is in cluster.yaml with a target of 3 processes but has 6
+#    processes listed in zookeeper.  For this case want to kill 3 processes 
that
+#    do not have pid files on the host.
+#
+function prune() {
+  if [[ $ARG_LOCAL == 1 ]]; then
+    # Currently the code is structured to remove all extra servers in a single 
resource group.  Finer granularity is not supported.
+    echo "$(red ERROR): Prune does not support running locally"
+    exit 1
+  fi
+
+  if [[ -z ${AC_TMP_DIR+x} ]]; then
+    echo "AC_TMP_DIR is not set"
+    exit 1
+  fi
+  local service_json="$AC_TMP_DIR/accumulo-service.json"
+  "$accumulo_cmd" admin serviceStatus --json >"$service_json" 2>/dev/null || 
exit 1
+
+  local var_name
+  local hosts
+  declare -a groups
+
+  local manager
+  if [[ $ARG_ALL == 1 || $ARG_MANAGER == 1 ]]; then
+    prune_group "manager" "default" "1" "$MANAGER_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_GC == 1 ]]; then
+    prune_group "gc" "default" "1" "$GC_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_MONITOR == 1 ]]; then
+    prune_group "monitor" "default" "1" "$MONITOR_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_COORDINATOR == 1 ]]; then
+    prune_group "compaction-coordinator" "default" "1" "$COORDINATOR_HOSTS"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_TSERVER == 1 ]]; then
+    #TODO in main need to adapt to having RGs for tservers
+    prune_group "tserver" "default" "$TSERVERS_PER_HOST_default" 
"$TSERVER_HOSTS_default"
+  fi
+
+  if [[ $ARG_ALL == 1 || $ARG_SSERVER == 1 ]]; then
+    groups=()
+    if [[ -n $ARG_SSERVER_GROUP ]]; then
+      read -r -a groups <<<"$ARG_SSERVER_GROUP"
+    else
+      # find all groups known in zookeeper, this will allow pruning entire 
groups that do not even exists in cluster.yaml
+      readarray -t groups < <(jq -r ".summaries.S_SERVER.resourceGroups | .[] 
" "$service_json")
     fi
+
+    for group in "${groups[@]}"; do
+      var_name="SSERVERS_PER_HOST_$group"
+      local expected=${!var_name:-0}
+
+      hosts="SSERVER_HOSTS_$group"
+      prune_group "sserver" "$group" "$expected" "${!hosts}"
+    done
+
   fi
 
+  if [[ $ARG_ALL == 1 || $ARG_COMPACTOR == 1 ]]; then
+    groups=()
+    if [[ -n $ARG_COMPACTOR_GROUP ]]; then
+      read -r -a groups <<<"$ARG_COMPACTOR_GROUP"
+    else
+      # find all groups known in zookeeper, this will allow pruning entire 
groups that do not even exists in cluster.yaml

Review Comment:
   ```suggestion
         # find all groups known in zookeeper, this will allow pruning entire 
groups that do not even exist in cluster.yaml
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to