Github user paul-rogers commented on a diff in the pull request:

    https://github.com/apache/drill/pull/1082#discussion_r164324919
  
    --- Diff: distribution/src/resources/auto-setup.sh ---
    @@ -0,0 +1,222 @@
    +#!/usr/bin/env bash
    +# Licensed to the Apache Software Foundation (ASF) under one or more
    +# contributor license agreements.  See the NOTICE file distributed with
    +# this work for additional information regarding copyright ownership.
    +# The ASF licenses this file to You under the Apache License, Version 2.0
    +# (the "License"); you may not use this file except in compliance with
    +# the License.  You may obtain a copy of the License at
    +#
    +#     http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS,
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +# See the License for the specific language governing permissions and
    +# limitations under the License.
    +
    +# This file is invoked by drill-config.sh during a Drillbit startup and 
provides
    +# default checks and autoconfiguration.
    +# Distributions should not put anything in this file. Checks can be
    +# specified in ${DRILL_HOME}/conf/distrib-setup.sh
    +# Users should not put anything in this file. Additional checks can be 
defined
    +# and put in ${DRILL_CONF_DIR}/drill-setup.sh instead.
    +# To FAIL any check, return with a non-zero return code
    +# e.g.
    +# if [ $status == "FAILED" ]; return 1; fi
    +
    
+###==========================================================================
    +# FEATURES
    +# 1. Provides checks and auto-configuration for memory settings
    
+###==========================================================================
    +
    +# Convert Java memory value to MB
    +function valueInMB() {
    +  if [ -z "$1" ]; then echo ""; return; fi
    +  local inputTxt=`echo $1| tr '[A-Z]' '[a-z]'`
    +  local inputValue=`echo ${inputTxt:0:${#inputTxt}-1}`;
    +  # Extracting Numeric Value
    +  if [[ "$inputTxt" == *g ]]; then
    +    let valueInMB=$inputValue*1024
    +  elif [[ "$DbitMaxProcMem" == *k ]]; then
    +    let valueInMB=$inputValue/1024
    +  elif [[ "$inputTxt" == *m ]]; then
    +    let valueInMB=$inputValue
    +  elif [[ "$inputTxt" == *% ]]; then
    +    #TotalRAM_inMB*percentage [Works on Linux]
    +    let valueInMB=$inputValue*$totalRAM_inMB/100;
    +  else
    +    echo error;
    +    return 1;
    +  fi
    +  echo "$valueInMB"
    +  return
    +}
    +
    +# Convert Java memory value to GB
    +function valueInGB() {
    +  if [ -z "$1" ]; then echo ""; return; fi
    +  local inputTxt=`echo $1| tr '[A-Z]' '[a-z]'`
    +  local inputValue=`echo ${inputTxt:0:${#inputTxt}-1}`;
    +  # Extracting Numeric Value
    +  if [[ "$inputTxt" == *g ]]; then
    +    let valueInGB=$inputValue
    +  elif [[ "$DbitMaxProcMem" == *k ]]; then
    +    let valueInGB=$inputValue/1024/1024
    +  elif [[ "$inputTxt" == *m ]]; then
    +    let valueInGB=$inputValue/1024
    +  elif [[ "$inputTxt" == *% ]]; then
    +    #TotalRAM_inMB*percentage [Works on Linux]
    +    let valueInGB=$inputValue*`cat /proc/meminfo | grep MemTotal | tr ' ' 
'\n'| grep '[0-9]'`/1024/1024/100;
    +  else
    +    echo error;
    +    return 1;
    +  fi
    +  echo "$valueInGB"
    +  return
    +}
    +
    +# Estimates code cache based on total heap and direct
    +function estCodeCacheInMB() {
    +  local totalHeapAndDirect=$1
    +  if [ $totalHeapAndDirect -le 4096 ]; then echo 512;
    +  elif [ $totalHeapAndDirect -le 10240 ]; then echo 768;
    +  else echo 1024;
    +  fi
    +}
    +
    +#Print Current Allocation
    +function printCurrAllocation()
    +{
    +  if [ -n "$DRILLBIT_MAX_PROC_MEM" ]; then echo -e 
"\tDRILLBIT_MAX_PROC_MEM=$DRILLBIT_MAX_PROC_MEM"; fi
    +  if [ -n "$DRILL_HEAP" ]; then echo -e "\tDRILL_HEAP=$DRILL_HEAP"; fi
    +  if [ -n "$DRILL_MAX_DIRECT_MEMORY" ]; then echo -e 
"\tDRILL_MAX_DIRECT_MEMORY=$DRILL_MAX_DIRECT_MEMORY"; fi
    +  if [ -n "$DRILLBIT_CODE_CACHE_SIZE" ]; then
    +    echo -e "\tDRILLBIT_CODE_CACHE_SIZE=$DRILLBIT_CODE_CACHE_SIZE "
    +    echo -e "\t*NOTE: It is recommended not to specify 
DRILLBIT_CODE_CACHE_SIZE as this will be auto-computed based on the HeapSize 
and would not exceed 1GB"
    +  fi
    +}
    +
    
+#============================================================================
    +# Check and auto-configuration for memory settings
    
+#----------------------------------------------------------------------------
    +#Default (Track status of this check: "" => Continue checking ; "PASSED" 
=> no more check required)
    +AutoMemConfigStatus=""
    +
    +#Computing existing system information
    +# Tested on Linux (CentOS/RHEL/Ubuntu); Cygwin (Win10Pro-64bit)
    +if [[ "$OSTYPE" == *linux* ]] || [[ "$OSTYPE" == cygwin* ]]; then
    +  let totalRAM_inMB=`cat /proc/meminfo | grep MemTotal | tr ' ' '\n'| grep 
'[0-9]'`/1024
    +  let freeRAM_inMB=`cat /proc/meminfo | grep MemFree | tr ' ' '\n'| grep 
'[0-9]'`/1024
    +elif [[ "$OSTYPE" == darwin* ]]; then
    +  # Mac OSX
    +  #Refer for math: https://apple.stackexchange.com/a/196925
    +  #Page Size
    +  let macOSPageSize=`vm_stat | grep 'page size' | grep -o -E '[0-9]+'`
    +  #MemoryUsage on MacOS
    +  let freePg=`vm_stat | grep free | awk '{ print $NF }' | sed 's/\.//'`
    +  let activePg=`vm_stat | grep -w 'active:' | awk '{ print $NF }' | sed 
's/\.//'`
    +  let speculativePg=`vm_stat | grep speculative | awk '{ print $NF }' | 
sed 's/\.//'`
    +  let fileCachePg=`vm_stat | grep File-backed | awk '{ print $NF }' | sed 
's/\.//'`
    +  let wiredMemPg=`vm_stat | grep 'wired down' | awk '{ print $NF }' | sed 
's/\.//'`
    +  let compressedPg=`vm_stat | grep 'occupied by compressor' | awk '{ print 
$NF }' | sed 's/\.//'`
    +  #Total
    +  let 
totalRAM_inPages=$freePg+$activePg+$speculativePg+$fileCachePg+$wiredMemPg+$compressedPg
    +  let totalRAM_inMB=$totalRAM_inPages*$macOSPageSize/1048576
    +  let freeRAM_inMB=$freePg*$macOSPageSize/1048576
    +elif [[ "$OSTYPE" == "msys" ]]; then
    +  # Msys env on MinGW (TODO: Pending verification)
    +  let totalRAM_inMB=`cat /proc/meminfo | grep MemTotal | tr ' ' '\n'| grep 
'[0-9]'`/1024
    +  let freeRAM_inMB=`cat /proc/meminfo | grep MemFree | tr ' ' '\n'| grep 
'[0-9]'`/1024
    +else
    +  # Unknown OS
    +  echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] Unknown OS ("$OSTYPE"). Will 
not attempt to auto-configure memory"
    +  AutoMemConfigStatus="PASSED"
    +fi
    +
    +#Read current values
    +DbitMaxProcMem=$(valueInMB $DRILLBIT_MAX_PROC_MEM)
    +DbitMaxDirectMem=$(valueInMB $DRILL_MAX_DIRECT_MEMORY)
    +DbitMaxHeapMem=$(valueInMB $DRILL_HEAP)
    +DbitMaxCodeCacheMem=$(valueInMB $DRILLBIT_CODE_CACHE_SIZE)
    +
    +# Alert for %age usage
    +if [[ "$DRILLBIT_MAX_PROC_MEM" == *% ]] && [ -z "$AutoMemConfigStatus" ]; 
then
    +  echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] "$DRILLBIT_MAX_PROC_MEM" of 
System Memory ("$(valueInGB $totalRAM_inMB'm')" GB) translates to "$(valueInGB 
$DbitMaxProcMem'm')" GB"
    +fi
    +
    +### Performing Auto-Configuration
    +if [ -z "$DbitMaxProcMem" ] && [ -z "$AutoMemConfigStatus" ]; then
    +  if [ -n "$DbitMaxDirectMem" ] && [ -n "$DbitMaxHeapMem" ]; then
    +    ## [SCENARIO 1]: TotalCap is NOT Defined, but Heap&Direct ARE Defined 
(i.e. no limit)
    +    let currTotal=$DbitMaxDirectMem+$DbitMaxHeapMem
    +    #Estimating CodeCache size of current total
    +    if [ -z "$DbitMaxCodeCacheMem" ]; then export 
DRILLBIT_CODE_CACHE_SIZE=$(estCodeCacheInMB $currTotal)'m'; fi
    +  fi
    +  # Default values will be loaded for unspecified memory parameters
    +  AutoMemConfigStatus="PASSED"
    +elif [ -z "$AutoMemConfigStatus" ]; then
    +  ## Scenario: Total IS Defined
    +  if [ -z "$DbitMaxCodeCacheMem" ]; then
    +    let DbitMaxCodeCacheMem=$(estCodeCacheInMB $DbitMaxProcMem)
    +    export DRILLBIT_CODE_CACHE_SIZE=$DbitMaxCodeCacheMem'm'
    +  fi
    +  if [ -n "$DbitMaxHeapMem" ] && [ -n "$DbitMaxDirectMem" ]; then
    +    ## [SCENARIO 2]: Heap &Direct ARE Defined
    +    let 
calcTotalInMB=$DbitMaxDirectMem+$DbitMaxHeapMem+$DbitMaxCodeCacheMem
    +    # Fail if exceeding process limit
    +    if [ $calcTotalInMB -gt $DbitMaxProcMem ]; then
    +      echo `date +%Y-%m-%d" "%H:%M:%S`"  [ERROR]    Unable to start 
Drillbit due to memory constraint violations"
    +      echo "  Total Memory Requested : "$(valueInGB $calcTotalInMB'm')" GB"
    +      echo "  Check the following settings to possibly modify (or increase 
the Max Memory Permitted):"
    +      printCurrAllocation
    +      exit 127
    +    else
    +      #All numbers align
    +      let deltaInGB=($DbitMaxProcMem-$calcTotalInMB)/1024
    +      if [ $deltaInGB -gt 1 ]; then
    +        echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] You have an allocation 
of "$deltaInGB" GB that is currently unused from a total of "$(valueInGB 
$DbitMaxProcMem'm')" GB. You can increase your existing memory configuration to 
use this extra memory";
    +        printCurrAllocation
    +      fi
    +    fi
    +  elif [ -n "$DbitMaxHeapMem" ] && [ -z "$DbitMaxDirectMem" ]; then
    +    ## [SCENARIO 3]: Total and only Heap is defined
    +    let 
DbitMaxDirectMem=$DbitMaxProcMem-$DbitMaxHeapMem-$DbitMaxCodeCacheMem
    +  elif [ -z "$DbitMaxHeapMem" ] && [ -n "$DbitMaxDirectMem" ]; then
    +    ## [SCENARIO 4]: Total and only Direct is defined
    +    let 
DbitMaxHeapMem=$DbitMaxProcMem-$DbitMaxDirectMem-$DbitMaxCodeCacheMem
    +  elif [ -z "$DbitMaxDirectMem" ] && [ -z "$DbitMaxHeapMem" ]; then
    +    ## [SCENARIO 5]: Only Total is defined
    +    ## Compute Direct & Heap
    +    let DbitMaxProcMemInGB=$(valueInGB $DbitMaxProcMem'm')
    +    let DbitMaxHeapMemInGB=`echo $DbitMaxProcMemInGB | awk 
'{heap=-13.2+6.12*log($1); if (heap<1) {heap=1}; printf "%0.0f\n", heap }'`
    +    let DbitMaxHeapMem=$(valueInMB $DbitMaxHeapMemInGB'g')
    +    let 
DbitMaxDirectMem=$DbitMaxProcMem-$DbitMaxHeapMem-$DbitMaxCodeCacheMem
    +  fi
    +  ## Export computed values
    +  export DRILL_HEAP=$(valueInGB $DbitMaxHeapMem'm')"G"
    +  export DRILL_MAX_DIRECT_MEMORY=$(valueInGB $DbitMaxDirectMem'm')"G"
    +  export DRILLBIT_CODE_CACHE_SIZE=$DbitMaxCodeCacheMem'm'
    +fi
    +
    +### Broad check for System Level capacity
    +if [ -z "$AutoMemConfigStatus" ]; then
    +  # Rereading for recently exported env var
    +  DbitMaxDirectMem=$(valueInMB $DRILL_MAX_DIRECT_MEMORY)
    +  DbitMaxHeapMem=$(valueInMB $DRILL_HEAP)
    +  DbitMaxCodeCacheMem=$(valueInMB $DRILLBIT_CODE_CACHE_SIZE)
    +  let 
totalDBitMem_inMB=$DbitMaxDirectMem+$DbitMaxHeapMem+$DbitMaxCodeCacheMem
    +  if [ $totalDBitMem_inMB -gt $totalRAM_inMB ]; then
    +    echo `date +%Y-%m-%d" "%H:%M:%S`"  [ERROR] Total Memory Allocation for 
Drillbit ("$(valueInGB $totalDBitMem_inMB'm')"GB) exceeds total system memory 
("$(valueInGB $totalRAM_inMB'm')"GB)"
    +    echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] Drillbit not will start up"
    +    exit 127
    +  elif [ $totalDBitMem_inMB -gt $freeRAM_inMB ]; then
    +    echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] Total Memory Allocation for 
Drillbit ("$(valueInGB $totalDBitMem_inMB'm')"GB) exceeds available free memory 
("$(valueInGB $freeRAM_inMB'm')"GB)"
    +    echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] Drillbit will start up, but 
can potentially crash due to oversubscribing of system memory."
    +  fi
    +fi
    +
    +#Implicit that checks have passed
    +AutoMemConfigStatus="PASSED"
    --- End diff --
    
    In Drill 1.8, we added a "debug" option to `drillbit.sh`:
    
    ```
    drillbit.sh --debug
    ```
    
    This does nothing other than dump the environment and config settings. That 
feature has saved our bacon on several occasions because we can easily see 
exactly what Drill will use when launched.
    
    Should we provide something like that here? In debug mode, emit the values 
that this script thinks it is getting as input, then emit the auto-configured 
values that it is setting as output. This will be handy if, say, a community 
user seems to have a problem and we want to see what the script is doing.
    
    If you feel that the `--debug` output already shows this info (because it 
dumps the environment), then all is good. Else, consider if we need something 
more here.


---

Reply via email to