On 01/11/17 21:45, Michael Gilbert wrote:
Hi,

I’m just starting out with robinhood. I have gotten it setup and done an initial scan and can do the various rbh commands just fine. My problem is with setting up any of the actions or policies…I can’t seem to get any of them to work. One thing I’d really like to do is have an alert when the use of the filesystem goes over a certain percentage. I’ve followed the instructions here: https://github.com/cea-hpc/robinhood/wiki/How-to-setup-quota-alerts
Hi,

This doc was written for robinhood v2.x. There may have some changes for v3.0.
See my comments inline in your config.

and I’ve fiddled with adding alert_trigger blocks in my config, but nothing ever gets sent to any of the logs. Right now our filesystem is sitting at 80% usage and I setup the purge trigger for alerting on 50% usage. But alas I’m not getting any alerts. Here are system specs and config file:

[root@tillit robinhood.d]# robinhood --version

Product:         robinhood
Version:         3.0-1
Build:           2016-12-02 13:57:02

Compilation switches:
    Lustre filesystems
    Lustre Version: 2.8
    Address entries by FID
    MDT Changelogs supported

Database binding: MySQL

=======================================================
Here’s my config file:

# include template for alerts
%include "includes/alerts.inc"
This include is useless for what you need. This pre-defined policy is to alert about particular entries in the filesystem.
But all you want is a warning about overall fs usage.

# include template for checksuming
%include "includes/check.inc"
Do you plan to checksum your FS entries?
If not, remove useless includes for better DB performance.


define_policy list_ricky {
     status_manager = none;
     scope { owner == 'ricky'}
     default_action = cmd("ls {fullpath} >> /tmp/ricky_ls.out");
     default_lru_sort_attr = none;
}

list_ricky_rules {
    rule default {
        condition { creation > 15w }
    }
}
This definition and rules look good.
You may need to define a "list_ricky_trigger" to run it at regular interval:

list_ricky_trigger {
trigger_on =  scheduled;
check_interval     = 1d;
}


Purge_Trigger {
# this quota applies to the entire filesystem
    trigger_on         = global_usage;
    high_threshold_pct = 50;
    check_interval     = 1m;
    alert_high = yes;
}
This should raise an alert when usage > 50%.
But I notice that "purge" policy has not been declared at this point.

Actually, what you want to do is having a trigger without a real policy run behind it.

I suggest you define a dummy "df" policy:

define_policy df {
     status_manager = none;
     scope = all;
default_action = none;
     default_lru_sort_attr = none;
}

And the following trigger:

df_trigger {
    trigger_on = OST_usage;
    high_threshold_pct = 92;
    check_interval     = 1h;
    alert_high = yes;
}

then you can run robinhood with: --run=df --check-thresholds
It will check the triggers and send an alert if needed.

Thomas


Purge_Trigger {
# this quota applies to the OSTs
    trigger_on         = OST_usage;
    high_threshold_pct = 92;
    check_interval     = 1h;
    alert_high = yes;
}

alert_rules {
    ignore { last_mod < 1h }

    rule default {
        action = none;
        action_params { alert = clear; }
        condition = true;
    }
}

alert_trigger {
    trigger_on         = periodic;
    check_interval = 1h;
}

#### fileclass definitions ####

# fileclasses to display in reports (can still be used in policies)
FileClass empty_files {
    definition { type == file and size == 0 }
# report = yes (default)
}
FileClass small_files {
    definition { type == file and size > 0 and size <= 16MB }
# report = yes (default)
}
FileClass std_files {
    definition { type == file and size > 16MB and size <= 1GB }
}
FileClass big_files {
    definition { type == file and size > 1GB }
}

#FileClass largedir {
#    definition { type == directory and dircount > 10000 }
#}

#### end policy rules ####


General {
    fs_path = "/scratch";
# filesystem type, as displayed by 'mount' (e.g. ext4, xfs, lustre, ...)
    fs_type = lustre;
}

EntryProcessor {
    nb_threads = 32;
STAGE_GET_INFO_FS_threads_max = 16;
}

Log {
    debug_level = EVENT;
    log_file = "/var/log/robinhood/robinhood.log";
    report_file = "/var/log/robinhood/robinhood_actions.log";
    alert_file = "/var/log/robinhood/robinhood_alerts.log";
    alert_show_attrs = yes;
}

ListManager {
    MySQL {
        server = localhost;
        db = robinhood_lustre;
        user = robinhood;
        password_file = /etc/robinhood.d/.dbpassword;
    }
}

# Lustre 2.x only
ChangeLog {
    MDT {
        mdt_name = "MDT0000";
        reader_id = "cl1";
    }
    polling_interval = 1s;
}

==============================================
My /etc/sysconfig/robinhood file is set to run —readlog —check-thresholds, per the instructions.

Any help would be greatly appreciated!

Thank you,

Mike



------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today.http://sdm.link/xeonphi


_______________________________________________
robinhood-support mailing list
robinhood-support@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/robinhood-support


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
robinhood-support mailing list
robinhood-support@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/robinhood-support

Reply via email to