Rush has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/326153 )

Change subject: tools: job to copytruncate logs in place
......................................................................


tools: job to copytruncate logs in place

This is meant to help alleviate pressure on our storage
while we investigate more permanent logging solutions.

Proposal:
* Runs daily and tails n lines out of logs that have
  breached y size
* On the defined rotation day of the week it rotates
  logs even if they have not violated size limits
* Allows specifying a minimum size for rotation
* Handles permission issues that native things like
  logrotate suffer from in this multitenant environment
* effectively keeps 4 weeks of history assuming logging
  does not violate the safe sizes limits
* Defaults are stored in config file, running directly
  CLI args will override defaults
* Initial candidate is the Toolforge admin Tool

Bug: T152235
Change-Id: I86c819a80e66cb1997dd5ba8ac07d63b423d73ac
---
A modules/labstore/files/logcleanup.py
A modules/role/files/labs/labstore/secondary/logcleanup-config.yaml
M modules/role/manifests/labs/nfs/secondary.pp
3 files changed, 420 insertions(+), 13 deletions(-)

Approvals:
  Rush: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/modules/labstore/files/logcleanup.py 
b/modules/labstore/files/logcleanup.py
new file mode 100644
index 0000000..89d27ee
--- /dev/null
+++ b/modules/labstore/files/logcleanup.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+
+from glob import glob
+from datetime import date
+
+import argparse
+import calendar
+import logging
+import os
+import shutil
+import sys
+import yaml
+
+description = """
+Rotate and truncate log files in a directory agnostic to source.
+
+Dyanmic-ish cleanup and management of log files in a multitentant
+environment where the stock logrotate is not suited.
+
+Meant to be run daily to ensure files are not growing larger than
+max_truncate_size but will only do normal rotation operations if
+file is also larger than min_rotate_size and it is rotation_day.
+
+This will be invasive if run multiple times on the same day as
+there is no reliable mechanism to determined when the last rotation
+has occurred.
+
+If a '.norotate' file exists in a directory we will skip it.
+
+Defaults are stored in the config file at /etc/logcleanup.yaml
+"""
+
+
+def die(msg):
+    logging.error(msg)
+    sys.exit(1)
+
+
+def tail(f, window=20):
+    """ Returns the last `window` lines of file `f` as a list.
+    stackoverflow 136168
+    :return: str
+    """
+    if window == 0:
+        return []
+    BUFSIZ = 1024
+    f.seek(0, 2)
+    bytes = f.tell()
+    size = window + 1
+    block = -1
+    data = []
+    while size > 0 and bytes > 0:
+        if bytes - BUFSIZ > 0:
+            # Seek back one whole BUFSIZ
+            f.seek(block * BUFSIZ, 2)
+            # read BUFFER
+            data.insert(0, f.read(BUFSIZ))
+        else:
+            # file too small, start from begining
+            f.seek(0, 0)
+            # only read what was not read
+            data.insert(0, f.read(bytes))
+        linesFound = data[0].count('\n')
+        size -= linesFound
+        bytes -= BUFSIZ
+        block -= 1
+    return ''.join(data).splitlines()[-window:]
+
+
+def truncate(file_path):
+    """reduce a file to 0 while keeping inode
+    :param file_path: str
+    """
+    with open(file_path, 'w'):
+        logging.info('truncate {}'.format(file_path))
+
+
+def rotate_option(lfile, rotation_guide):
+    """ determine if file in rotation has a next candidate
+    :param lfile: str
+    :param rotation_guide: list
+    :return: str or None
+    """
+
+    # if it's in rotation but there is no next stage
+    if lfile[-2:] == rotation_guide[-1]:
+        logging.debug("end of rotation for {}".format(lfile))
+        return None
+
+    # our next rotation file based on current
+    return lfile[:-2] + rotation_guide[rotation_guide.index(lfile[-2:]) + 1]
+
+
+def today():
+    """ return day of week in human readable
+    :return: str
+    """
+    # 2016-12-05
+    numeric_date = date.today()
+    return calendar.day_name[numeric_date.weekday()]
+
+
+def set_perms(sfile, dfile, perms=0o644):
+    """ - match owner/group for a src to a dst
+        - establish an acl on dst
+    :param sfile: str
+    :param dfile: str
+    :param perms: int
+    """
+    sfile_stat = os.stat(sfile)
+    fd = os.open(dfile, os.O_RDONLY)
+    os.fchown(fd, sfile_stat.st_uid, sfile_stat.st_gid)
+    os.close(fd)
+    os.chmod(dfile, perms)
+
+
+def main():
+
+    argparser = argparse.ArgumentParser(
+        description=description,
+    )
+
+    rotation_guide = [
+        '.1',
+        '.2',
+        '.3',
+        '.4',
+    ]
+
+    dow = [
+        'monday',
+        'tuesday',
+        'wednesday',
+        'thursday',
+        'friday',
+        'saturday',
+        'sunday'
+        '*',
+    ]
+
+    argparser.add_argument(
+        '--dir',
+        action='append',
+        help='globs to find log directories on disk',
+    )
+
+    argparser.add_argument(
+        '--end-with',
+        action='append',
+        help='''Space separated logs extensions.  "." is prepended''',
+    )
+
+    argparser.add_argument(
+        '--min-rotate-size',
+        type=int,
+        help='In bytes',
+    )
+
+    argparser.add_argument(
+        '--max-copytruncate',
+        type=int,
+        help='In bytes',
+    )
+
+    argparser.add_argument(
+        '--tail-lines',
+        type=int,
+        help='Lines to tail if size exceeds max-copytruncate',
+    )
+
+    argparser.add_argument(
+        '--rotation-day',
+        type=str,
+        help='Day of the week to rotate. %s or "*"' % (str(dow)),
+    )
+
+    argparser.add_argument(
+        '--debug',
+        help='Turn on debug logging',
+        action='store_true',
+    )
+
+    argparser.add_argument(
+        '--config',
+        type=str,
+        help='''YAML config file, arguments specified on command line will
+             override config specifed in file''',
+        default='/etc/logcleanup.yaml',
+    )
+
+    args = argparser.parse_args()
+
+    logging.basicConfig(
+        format='%(asctime)s %(levelname)s %(message)s',
+        level=logging.DEBUG if args.debug else logging.INFO)
+
+    logging.debug(args)
+
+    try:
+        with open(args.config) as f:
+            config = yaml.safe_load(f)
+    except IOError:
+        die('reading config file  %s' % (args.config))
+
+    # Convert Namespace object args to dict
+    # Only populate keys with not None values
+    cli_args = {arg: value for arg, value in vars(args).items() if value}
+    config.update(cli_args)
+
+    logging.debug(config)
+
+    if config['rotation_day'] not in dow:
+        die('Invalid day of the week %s' % (config['rotation_day']))
+
+    required_pints = ['max_copytruncate',
+                      'tail_lines',
+                      'min_rotate_size']
+
+    for p in required_pints:
+        if p not in config:
+            die('missing config value %s' % (p,))
+        if not isinstance(config[p], int):
+            die('%s is not a valid int' % (p,))
+
+    def in_rotation(lfile):
+        """ determine if a file is part of a log rotation set
+        :param lfile: str
+        :return: bool
+        """
+        return any(map(lambda x: lfile.endswith(x), rotation_guide))
+
+    def rotate():
+        """ is today rotation day?
+        :return: bool
+        """
+        return config['rotation_day'].lower() in ['*', today().lower()]
+
+    def rotation_forward(rotated_file):
+        """ move forward (or purge) log archives based on rotation_guide
+
+            this should be done prior to primary file rotation to avoid
+            overwriting files that are currently in the rotation cycle.
+        :param rotated_file: str
+        """
+        for end in reversed(rotation_guide):
+            rotation_stage = rotated_file + end
+            if os.path.exists(rotation_stage):
+                logging.debug("rotation {} exists".format(rotation_stage))
+                rotate = rotate_option(rotation_stage, rotation_guide)
+
+                if rotate is None:
+                    logging.debug("removing {}".format(rotation_stage))
+                    os.remove(rotation_stage)
+                else:
+                    logging.debug("move {} to {}".format(rotation_stage, 
rotate))
+                    os.rename(rotated_file + end, rotate)
+
+    def rotateable(all_files):
+        """ Find all files eligible for rotation
+        :param all_files: list of files
+        :return: list
+        """
+
+        # only consider files that do not appear to be in-rotation derivatives
+        candidate_logs = [f for f in all_files if not in_rotation(f)]
+
+        valid_logs = []
+        extensions = ['.' + f for f in config['end_with']]
+        for file in candidate_logs:
+            if any(map(file.endswith, extensions)):
+                valid_logs.append(file)
+        logging.debug("Found {} valid files from ".format(len(valid_logs), 
config['end_with']))
+        return valid_logs
+
+    def process_logfile(fpath):
+        """ Process an individual log file and rotation series in a 
directory"""
+
+        fpath_new = fpath + rotation_guide[0]
+        fpath_size = os.path.getsize(fpath)
+        logging.debug("{} is {} bytes".format(fpath, fpath_size))
+
+        if fpath_size < config['min_rotate_size']:
+            logging.debug('{} is too small to rotate'.format(fpath))
+            return
+
+        # Notice: given a max_copytruncate directive (size):
+        # we forceably rotate (tail) logs that are larger than we are
+        # willing to copytruncate safely //even if not rotation day//
+        if config['max_copytruncate'] and fpath_size > 
config['max_copytruncate']:
+
+            logging.warning("{} is larger than {}".format(fpath, 
config['max_copytruncate']))
+            rotation_forward(fpath)
+
+            with open(fpath, 'r') as f:
+                tailed = tail(f, window=config['tail_lines'])
+
+            logging.debug('{} tailed to {}'.format(fpath, fpath_new))
+            with open(fpath_new, 'w') as f:
+                for l in tailed:
+                    f.write('{}\n'.format(l))
+
+            set_perms(fpath, fpath_new)
+            truncate(fpath)
+            return
+
+        if rotate():
+            rotation_forward(fpath)
+            logging.debug("rotating {} to {}".format(fpath, fpath_new))
+
+            shutil.copy2(fpath, fpath_new)
+            set_perms(fpath, fpath_new)
+            truncate(fpath)
+
+    try:
+        all_paths = []
+        for path in config['dir']:
+            all_paths.extend(glob(path))
+    except OSError as e:
+        logging.warning(str(e))
+        argparser.print_help()
+        sys.exit(1)
+
+    valid_paths = [d for d in all_paths if os.path.isdir(d)]
+    if not valid_paths:
+        logging.error('no valid path specified')
+        sys.exit(1)
+
+    logging.debug("found {} valid paths".format(len(valid_paths)))
+
+    all_logs = 0
+    for path in valid_paths:
+
+        if os.path.exists(os.path.join(path, '.norotate')):
+            logging.info("skipping {}".format(path))
+            continue
+
+        valid_logs = rotateable(os.listdir(path))
+        all_logs += len(valid_logs)
+
+        for f in valid_logs:
+            pfull = os.path.join(path, f)
+            logging.debug(pfull)
+            try:
+                process_logfile(pfull)
+            except Exception as e:
+                logging.exception('{} failed'.format(pfull))
+    logging.debug("processed {} logs".format(all_logs))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/modules/role/files/labs/labstore/secondary/logcleanup-config.yaml 
b/modules/role/files/labs/labstore/secondary/logcleanup-config.yaml
new file mode 100644
index 0000000..1313e3f
--- /dev/null
+++ b/modules/role/files/labs/labstore/secondary/logcleanup-config.yaml
@@ -0,0 +1,18 @@
+dir:
+  - /srv/tools/shared/tools/project/admin
+end_with:
+  - log
+  - err
+  - out
+# Force rotation on non-rotation days
+# if file exceeds 10Mb
+max_copytruncate: 10000000
+# Do not bother if <=1Mb
+min_rotate_size: 1000
+# If size>=max_copytruncate tail this many
+# lines on truncate
+tail_lines: 10000
+# Do rotation on this day even if not in
+# violation of max_copytruncate but ignore
+# if <=min_rotate_size
+rotation_day: 'wednesday'
diff --git a/modules/role/manifests/labs/nfs/secondary.pp 
b/modules/role/manifests/labs/nfs/secondary.pp
index 5824ae6..6a921a0 100644
--- a/modules/role/manifests/labs/nfs/secondary.pp
+++ b/modules/role/manifests/labs/nfs/secondary.pp
@@ -110,31 +110,69 @@
     class { 'labstore::monitoring::interfaces':
         monitor_iface => $monitor_iface,
     }
+
     class { 'labstore::monitoring::secondary':
         drbd_role     => $drbd_role,
         cluster_iface => $monitor_iface,
         cluster_ip    => $cluster_ip,
     }
 
+    file {'/usr/local/sbin/logcleanup':
+        source => 'puppet:///modules/labstore/logcleanup.py',
+        mode   => '0744',
+        owner  => 'root',
+        group  => 'root',
+    }
+
+    file {'/etc/logcleanup-config.yaml':
+        source => 
'puppet:///modules/role/labs/labstore/secondary/logcleanup-config.yaml',
+        mode   => '0644',
+        owner  => 'root',
+        group  => 'root',
+    }
+
+    file { '/usr/local/sbin/safe-du':
+        source => 'puppet:///modules/labstore/monitor/safe-du.sh',
+        mode   => '0744',
+        owner  => 'root',
+        group  => 'root',
+    }
+
+    sudo::user { 'diamond_dir_size_tracker':
+        user       => 'diamond',
+        privileges => ['ALL = NOPASSWD: /usr/local/sbin/safe-du'],
+        require    => File['/usr/local/sbin/safe-du'],
+    }
+
     if($drbd_role == 'primary') {
-
-        file { '/usr/local/sbin/safe-du':
-            source => 'puppet:///modules/labstore/monitor/safe-du.sh',
-            mode   => '0744',
-            owner  => 'root',
-            group  => 'root',
-        }
-
-        sudo::user { 'diamond_dir_size_tracker':
-            user       => 'diamond',
-            privileges => ['ALL = NOPASSWD: /usr/local/sbin/safe-du'],
-            require    => File['/usr/local/sbin/safe-du'],
-        }
 
         diamond::collector { 'DirectorySize':
             source      => 
'puppet:///modules/labstore/monitor/dir_size_tracker.py',
             config_file => 
'puppet:///modules/labstore/monitor/DirectorySizeCollector.conf',
             require     => Sudo::User['diamond_dir_size_tracker'],
         }
+
+        cron { 'logcleanup':
+            ensure      => present,
+            environment => 'MAILTO=labs-ad...@lists.wikimedia.org',
+            command     => '/usr/local/sbin/logcleanup --config 
/etc/logcleanup-config.yaml',
+            user        => 'root',
+            minute      => '0',
+            hour        => '14',
+            require     => [File['/usr/local/sbin/logcleanup'], 
File['/etc/logcleanup-config.yaml']],
+
+        }
+    }
+
+    if($drbd_role != 'primary') {
+        cron { 'logcleanup':
+            ensure      => absent,
+            environment => 'MAILTO=labs-ad...@lists.wikimedia.org',
+            command     => '/usr/local/sbin/logcleanup --config 
/etc/logcleanup-config.yaml',
+            user        => 'root',
+            minute      => '0',
+            hour        => '14',
+            require     => [File['/usr/local/sbin/logcleanup'], 
File['/etc/logcleanup-config.yaml']],
+        }
     }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/326153
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I86c819a80e66cb1997dd5ba8ac07d63b423d73ac
Gerrit-PatchSet: 20
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Rush <r...@wikimedia.org>
Gerrit-Reviewer: Madhuvishy <mviswanat...@wikimedia.org>
Gerrit-Reviewer: Rush <r...@wikimedia.org>
Gerrit-Reviewer: Volans <rcocci...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to