On Mon, Apr 26, 2010 at 7:42 PM, Michael Hanselmann <[email protected]> wrote:
> Until now, exactly one attempt was made to establish a connection.
> If it failed, the whole import/export for the disk in question
> aborted. Retrying will make it more reliable.
>
> Unfortunately the listening part can't be made completely
> resiliant against DoS attacks without larger and complex changes
> to the daemon and using socat's EXEC: address combined with
> forking.
>
> Signed-off-by: Michael Hanselmann <[email protected]>
> ---
>  daemons/import-export            |   82 +++++++++++++++++++++++++++++--------
>  test/import-export_unittest.bash |    2 +-
>  2 files changed, 65 insertions(+), 19 deletions(-)
>
> diff --git a/daemons/import-export b/daemons/import-export
> index 08674e7..0b1a8dc 100755
> --- a/daemons/import-export
> +++ b/daemons/import-export
> @@ -83,10 +83,12 @@ MIN_UPDATE_INTERVAL = 5.0
>  #: Give child process up to 5 seconds to exit after sending a signal
>  CHILD_LINGER_TIMEOUT = 5.0
>
> +#: How long to wait for a connection to be established
> +DEFAULT_CONNECT_TIMEOUT = 60
> +
>  # Common options for socat
>  SOCAT_TCP_OPTS = ["keepalive", "keepidle=60", "keepintvl=10", "keepcnt=5"]
>  SOCAT_OPENSSL_OPTS = ["verify=1", "cipher=HIGH", "method=TLSv1"]
> -SOCAT_CONNECT_TIMEOUT = 60
>
>
>  # Global variable for options
> @@ -185,6 +187,12 @@ class StatusFile:
>     """
>     self._data.connected = True
>
> +  def GetConnected(self):
> +    """Determines whether the daemon is connected.
> +
> +    """
> +    return self._data.connected
> +
>   def SetExitStatus(self, exit_status, error_message):
>     """Sets the exit status and an error message.
>
> @@ -237,10 +245,11 @@ def _ProcessSocatOutput(status_file, level, msg):
>         status_file.SetListenPort(port)
>         return True
>
> -    m = TRANSFER_LOOP_RE.match(msg)
> -    if m:
> -      status_file.SetConnected()
> -      return True
> +    if not status_file.GetConnected():
> +      m = TRANSFER_LOOP_RE.match(msg)
> +      if m:
> +        status_file.SetConnected()
> +        return True
>
>   return False
>
> @@ -311,6 +320,11 @@ def GetSocatCommand(mode):
>     addr1 = [
>       "OPENSSL-LISTEN:%s" % port,
>       "reuseaddr",
> +
> +      # Retry to listen if connection wasn't established successfully, up to
> +      # 100 times a second. Note that this still leaves room for DoS attacks.
> +      "forever",
> +      "intervall=0.01",
>       ] + common_addr_opts
>     addr2 = ["stdout"]
>
> @@ -318,7 +332,13 @@ def GetSocatCommand(mode):
>     addr1 = ["stdin"]
>     addr2 = [
>       "OPENSSL:%s:%s" % (options.host, options.port),
> -      "connect-timeout=%s" % SOCAT_CONNECT_TIMEOUT,
> +
> +      # How long to wait per connection attempt
> +      "connect-timeout=%s" % options.connect_timeout,
> +
> +      # Retry a few times before giving up to connect (once per second)
> +      "retry=%s" % options.connect_retries,
> +      "intervall=1",
>       ] + common_addr_opts
>
>   else:
> @@ -400,7 +420,7 @@ def GetCommand(mode, socat_stderr_fd):
>
>
>  def ProcessChildIO(child, socat_stderr_read_fd, status_file, child_logger,
> -                   signal_notify, signal_handler):
> +                   signal_notify, signal_handler, mode):
>   """Handles the child processes' output.
>
>   """
> @@ -429,19 +449,38 @@ def ProcessChildIO(child, socat_stderr_read_fd, 
> status_file, child_logger,
>         utils.SetNonblockFlag(fd, True)
>         poller.register(fd, select.POLLIN)
>
> -      timeout_calculator = None
> +      if options.connect_timeout and mode == constants.IEM_IMPORT:
> +        listen_timeout = locking.RunningTimeout(options.connect_timeout, 
> True)
> +      else:
> +        listen_timeout = None
> +
> +      exit_timeout = None
> +
>       while True:
>         # Break out of loop if only signal notify FD is left
>         if len(fdmap) == 1 and signal_notify.fileno() in fdmap:
>           break
>
> -        if timeout_calculator:
> -          timeout = timeout_calculator.Remaining() * 1000
> +        timeout = None
> +
> +        if listen_timeout and not exit_timeout:
> +          if status_file.GetConnected():
> +            listen_timeout = None
> +          elif listen_timeout.Remaining() < 0:
> +            logging.info("Child process didn't establish connection in time")
> +            child.Kill(signal.SIGTERM)
> +            exit_timeout = \
> +              locking.RunningTimeout(CHILD_LINGER_TIMEOUT, True)
> +            # Next block will calculate timeout
> +          else:
> +            # Not yet connected, check again in a second
> +            timeout = 1000
> +
> +        if exit_timeout:
> +          timeout = exit_timeout.Remaining() * 1000
>           if timeout < 0:
>             logging.info("Child process didn't exit in time")
>             break
> -        else:
> -          timeout = None
>
>         for fd, event in utils.RetryOnSignal(poller.poll, timeout):
>           if event & (select.POLLIN | event & select.POLLPRI):
> @@ -456,13 +495,13 @@ def ProcessChildIO(child, socat_stderr_read_fd, 
> status_file, child_logger,
>                 # Signal handling
>                 if signal_handler.called:
>                   signal_handler.Clear()
> -                  if timeout_calculator:
> +                  if exit_timeout:
>                     logging.info("Child process still has about %0.2f seconds"
> -                                 " to exit", timeout_calculator.Remaining())
> +                                 " to exit", exit_timeout.Remaining())
>                   else:
>                     logging.info("Giving child process %0.2f seconds to exit",
>                                  CHILD_LINGER_TIMEOUT)
> -                    timeout_calculator = \
> +                    exit_timeout = \
>                       locking.RunningTimeout(CHILD_LINGER_TIMEOUT, True)
>             else:
>               poller.unregister(fd)
> @@ -478,7 +517,7 @@ def ProcessChildIO(child, socat_stderr_read_fd, 
> status_file, child_logger,
>
>       # If there was a timeout calculator, we were waiting for the child to
>       # finish, e.g. due to a signal
> -      return not bool(timeout_calculator)
> +      return not bool(exit_timeout)
>     finally:
>       socat_stderr_lines.close()
>   finally:
> @@ -510,6 +549,13 @@ def ParseOptions():
>                     help="Remote hostname")
>   parser.add_option("--port", dest="port", action="store", type="int",
>                     help="Remote port")
> +  parser.add_option("--connect-retries", dest="connect_retries", 
> action="store",
> +                    type="int", default=0,
> +                    help=("How many times the connection should be retried"
> +                          " (export only)"))
> +  parser.add_option("--connect-timeout", dest="connect_timeout", 
> action="store",
> +                    type="int", default=DEFAULT_CONNECT_TIMEOUT,
> +                    help="Timeout for connection to be established 
> (seconds)")
>   parser.add_option("--cmd-prefix", dest="cmd_prefix", action="store",
>                     type="string", help="Command prefix")
>   parser.add_option("--cmd-suffix", dest="cmd_suffix", action="store",
> @@ -635,8 +681,8 @@ def main():
>             utils.RetryOnSignal(os.close, socat_stderr_write_fd)
>
>             if ProcessChildIO(child, socat_stderr_read_fd, status_file,
> -                              child_logger, signal_wakeup,
> -                              signal_handler):
> +                              child_logger, signal_wakeup, signal_handler,
> +                              mode):
>               # The child closed all its file descriptors and there was no
>               # signal
>               # TODO: Implement timeout instead of waiting indefinitely
> diff --git a/test/import-export_unittest.bash 
> b/test/import-export_unittest.bash
> index 3b992ed..58ab812 100755
> --- a/test/import-export_unittest.bash
> +++ b/test/import-export_unittest.bash
> @@ -23,7 +23,7 @@ set -o pipefail
>
>  export PYTHON=${PYTHON:=python}
>
> -impexpd="$PYTHON daemons/import-export"
> +impexpd="$PYTHON daemons/import-export --connect-timeout=1 
> --connect-retries=1"
>
>  # Add "-d" for debugging
>  #impexpd+=' -d'
> --
> 1.7.0.4

LGTM

>
>
>
> --
> Subscription settings: 
> http://groups.google.com/group/ganeti-devel/subscribe?hl=en
>

Reply via email to