RE: hanging-up of statistics

Carmen Alvarez Perez Thu, 10 Nov 2011 04:45:10 -0800

Hi,
The problem is the current system health statistics take long to analyze. This 
will be solved in the next version by removing this from the main page and 
adding a link to it, but for now you can substitute the webstat.py file for the 
one attached if you have v0.99.3.
If you had a configuration file (webstat.cfg) with content like this:
----------------------------------------
[general]
visitors_box = True
search_box = True
record_box = True
bibsched_box = True
basket_box = True
alert_box = True
loan_box = True
apache_box = True
uptime_box = True
----------------------------------------
You could disable this statistics by setting the values to false. But this was 
added later, so the patch is a temporary solution until you upgrade.
Best regards,
    Carmen
________________________________
De: Genis Musulmanbekov
Enviado el: martes, 08 de noviembre de 2011 00:53
Para: project-invenio-general (Invenio users)
Asunto: hanging-up of statistics


Hi All,
We have a problem of hanging-up of Invenio while the statistics run in the 
administrator menu. The version is 0.99.3.
The same procedure works well on the test server with the same configuration 
but with a fewer content.
Did someone encounter with this situation?

*********************
Genis Musulmanbekov
JINR
RU-141980 Dubna
Russia
e-mail:  [email protected]

## $Id$
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

__revision__ = "$Id$"
__lastupdated__ = "$Date$"

import os, time, re, datetime, cPickle, calendar

from invenio import template
from invenio.config import \
     CFG_SITE_NAME, \
     CFG_WEBDIR, \
     CFG_TMPDIR, \
     CFG_SITE_LANG
from invenio.search_engine import get_alphabetically_ordered_collection_list
from invenio.dbquery import run_sql, escape_string
from invenio.bibsched import is_task_scheduled, get_task_ids_by_descending_date, get_task_options

# Imports handling key events
from invenio.webstat_engine import get_keyevent_trend_collection_population
from invenio.webstat_engine import get_keyevent_trend_search_frequency
from invenio.webstat_engine import get_keyevent_trend_search_type_distribution
from invenio.webstat_engine import get_keyevent_trend_download_frequency
from invenio.webstat_engine import get_keyevent_snapshot_apache_processes
from invenio.webstat_engine import get_keyevent_snapshot_bibsched_status
from invenio.webstat_engine import get_keyevent_snapshot_uptime_cmd
from invenio.webstat_engine import get_keyevent_snapshot_sessions

# Imports handling custom events
from invenio.webstat_engine import get_customevent_table
from invenio.webstat_engine import get_customevent_trend
from invenio.webstat_engine import get_customevent_dump

# Imports for handling outputting
from invenio.webstat_engine import create_graph_trend
from invenio.webstat_engine import create_graph_dump

# Imports for handling exports
from invenio.webstat_engine import export_to_python
from invenio.webstat_engine import export_to_csv

TEMPLATES = template.load('webstat')

# Constants
WEBSTAT_CACHE_INTERVAL = 600 # Seconds, cache_* functions not affected by this.
                             # Also not taking into account if BibSched has webstatadmin process.
WEBSTAT_RAWDATA_DIRECTORY = CFG_TMPDIR + "/"
WEBSTAT_GRAPH_DIRECTORY = CFG_WEBDIR + "/img/"

TYPE_REPOSITORY = [ ('gnuplot', 'Image - Gnuplot'),
                    ('asciiart', 'Image - ASCII art'),
                    ('asciidump', 'Image - ASCII dump'),
                    ('python', 'Data - Python code', export_to_python),
                    ('csv', 'Data - CSV', export_to_csv) ]

# Key event repository, add an entry here to support new key measures.
KEYEVENT_REPOSITORY = { 'collection population':
                          { 'fullname': 'Collection population',
                            'specificname': 'Population in collection "%(collection)s"',
                            'gatherer': get_keyevent_trend_collection_population,
                            'extraparams': {'collection': ('Collection', get_alphabetically_ordered_collection_list)},
                            'cachefilename': 'webstat_%(id)s_%(collection)s_%(timespan)s',
                            'ylabel': 'Number of records',
                            'multiple': None,
                           },
                        'search frequency':
                          { 'fullname': 'Search frequency',
                            'specificname': 'Search frequency',
                            'gatherer': get_keyevent_trend_search_frequency,
                            'extraparams': {},
                            'cachefilename': 'webstat_%(id)s_%(timespan)s',
                            'ylabel': 'Number of searches',
                            'multiple': None,
                           },
                        'search type distribution':
                          { 'fullname': 'Search type distribution',
                            'specificname': 'Search type distribution',
                            'gatherer': get_keyevent_trend_search_type_distribution,
                            'extraparams': {},
                            'cachefilename': 'webstat_%(id)s_%(timespan)s',
                            'ylabel': 'Number of searches',
                            'multiple': ['Simple searches', 'Advanced searches'],
                           },
                        'download frequency':
                          { 'fullname': 'Download frequency',
                            'specificname': 'Download frequency',
                            'gatherer': get_keyevent_trend_download_frequency,
                            'extraparams': {},
                            'cachefilename': 'webstat_%(id)s_%(timespan)s',
                            'ylabel': 'Number of downloads',
                            'multiple': None,
                           }
                       }

# CLI

def create_customevent(id=None, name=None, cols=[]):
    """
    Creates a new custom event by setting up the necessary MySQL tables.

    @param id: Proposed human-readable id of the new event.
    @type id: str

    @param name: Optionally, a descriptive name.
    @type name: str

    @param cols: Optionally, the name of the additional columns.
    @type cols: [str]

    @return: A status message
    @type: str
    """
    if id is None:
        return "Please specify a human-readable ID for the event."

    # Only accept id and name with standard characters
    if not re.search("[^\w]", str(id) + str(name)) is None:
        return "Please note that both event id and event name needs to be written without any non-standard characters."

    # Make sure the chosen id is not already taken
    if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id,))) != 0:
        return "Event id [%s] already exists! Aborted." % id

    # Check if the cols are valid titles
    for argument in cols:
        if (argument == "creation_time") or (argument == "id"):
            return "Invalid column title: %s! Aborted." % argument

    # Insert a new row into the events table describing the new event
    sql_param = [id]
    if name is not None:
        sql_name = "%s"
        sql_param.append(name)
    else:
        sql_name = "NULL"
    if len(cols) != 0:
        sql_cols = "%s"
        sql_param.append(cPickle.dumps(cols))
    else:
        sql_cols = "NULL"
    run_sql("INSERT INTO staEVENT (id, name, cols) VALUES (%s, " + sql_name + ", " + sql_cols + ")",
            tuple(sql_param))

    tbl_name = get_customevent_table(id)

    # Create a table for the new event
    sql_query = ["CREATE TABLE %s (" % tbl_name]
    sql_query.append("id MEDIUMINT unsigned NOT NULL auto_increment,")
    sql_query.append("creation_time TIMESTAMP DEFAULT NOW(),")
    for argument in cols:
        arg = escape_string(argument)
        sql_query.append("%s MEDIUMTEXT NULL," % arg)
        sql_query.append("INDEX %s (%s(50))," % (arg, arg))
    sql_query.append("PRIMARY KEY (id))")
    sql_str = ' '.join(sql_query)
    run_sql(sql_str)

    # We're done! Print notice containing the name of the event.
    return ("Event table [%s] successfully created.\n" +
            "Please use event id [%s] when registering an event.") % (tbl_name, id)

def destroy_customevent(id=None):
    """
    Removes an existing custom event by destroying the MySQL tables and
    the event data that might be around. Use with caution!

    @param id: Human-readable id of the event to be removed.
    @type id: str

    @return: A status message
    @type: str
    """
    if id is None:
        return "Please specify an existing event id."

    # Check if the specified id exists
    if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id,))) == 0:
        return "Event id [%s] doesn't exist! Aborted." % id
    else:
        tbl_name = get_customevent_table(id)
        run_sql("DROP TABLE %s" % tbl_name)
        run_sql("DELETE FROM staEVENT WHERE id = %s", (id,))
        return ("Event with id [%s] was successfully destroyed.\n" +
                "Table [%s], with content, was destroyed.") % (id, tbl_name)

def register_customevent(id, *arguments):
    """
    Registers a custom event. Will add to the database's event tables
    as created by create_customevent().

    This function constitutes the "function hook" that should be
    called throughout CDS Invenio where one wants to register a
    custom event! Refer to the help section on the admin web page.

    @param id: Human-readable id of the event to be registered
    @type id: str

    @param *arguments: The rest of the parameters of the function call
    @type *arguments: [params]
    """
    res = run_sql("SELECT CONCAT('staEVENT', number),cols FROM staEVENT WHERE id = %s",  (id,))
    if not res:
        return # the id don't exist
    tbl_name = res[0][0]
    if res[0][1]:
        col_titles = cPickle.loads(res[0][1])
    else:
        col_titles = []
    if len(col_titles) != len(arguments[0]):
        return # there is different number of arguments than cols

    # Make sql query
    if len(arguments[0]) != 0:
        sql_param = []
        sql_query = ["INSERT INTO %s (" % tbl_name]
        for title in col_titles:
            sql_query.append("%s" % title)
            sql_query.append(",")
        sql_query.pop() # del the last ','
        sql_query.append(") VALUES (")
        for argument in arguments[0]:
            sql_query.append("%s")
            sql_query.append(",")
            sql_param.append(argument)
        sql_query.pop() # del the last ','
        sql_query.append(")")
        sql_str = ''.join(sql_query)
        run_sql(sql_str, tuple(sql_param))
    else:
        run_sql("INSERT INTO %s () VALUES ()" % tbl_name)

def cache_keyevent_trend(ids=[]):
    """
    Runs the rawdata gatherer for the specific key events.
    Intended to be run mainly but the BibSched daemon interface.

    For a specific id, all possible timespans' rawdata is gathered.

    @param ids: The key event ids that are subject to caching.
    @type ids: []
    """
    args = {}
    timespans = _get_timespans()

    for id in ids:
        args['id'] = id
        extraparams = KEYEVENT_REPOSITORY[id]['extraparams']

        # Construct all combinations of extraparams and store as [{param name: arg value}]
        # so as we can loop over them and just pattern-replace the each dictionary against
        # the KEYEVENT_REPOSITORY['id']['cachefilename'].
        combos = [[]]
        for x in [[(param, x[0]) for x in extraparams[param][1]()] for param in extraparams]:
            combos = [i + [y] for y in x for i in combos]
        combos = [dict(x) for x in combos]

        for i in range(len(timespans)):
            # Get timespans parameters
            args['timespan'] = timespans[i][0]
            args.update({ 't_start': timespans[i][2], 't_end': timespans[i][3], 'granularity': timespans[i][4],
                          't_format': timespans[i][5], 'xtic_format': timespans[i][6] })

            for combo in combos:
                args.update(combo)

                # Create unique filename for this combination of parameters
                filename = KEYEVENT_REPOSITORY[id]['cachefilename'] \
                            % dict([(param, re.subn("[^\w]", "_", args[param])[0]) for param in args])

                # Create closure of gatherer function in case cache needs to be refreshed
                gatherer = lambda: KEYEVENT_REPOSITORY[id]['gatherer'](args)

                # Get data file from cache, ALWAYS REFRESH DATA!
                _get_file_using_cache(filename, gatherer, True).read()

    return True

def cache_customevent_trend(ids=[]):
    """
    Runs the rawdata gatherer for the specific custom events.
    Intended to be run mainly but the BibSched daemon interface.

    For a specific id, all possible timespans' rawdata is gathered.

    @param ids: The custom event ids that are subject to caching.
    @type ids: []
    """
    args = {}
    timespans = _get_timespans()

    for id in ids:
        args['id'] = id
        args['cols'] = []

        for i in range(len(timespans)):
            # Get timespans parameters
            args['timespan'] = timespans[i][0]
            args.update({ 't_start': timespans[i][2], 't_end': timespans[i][3], 'granularity': timespans[i][4],
                          't_format': timespans[i][5], 'xtic_format': timespans[i][6] })

            # Create unique filename for this combination of parameters
            filename = "webstat_customevent_%(id)s_%(timespan)s" \
                        % { 'id': re.subn("[^\w]", "_", id)[0], 'timespan': re.subn("[^\w]", "_", args['timespan'])[0] }

            # Create closure of gatherer function in case cache needs to be refreshed
            gatherer = lambda: get_customevent_trend(args)

            # Get data file from cache, ALWAYS REFRESH DATA!
            _get_file_using_cache(filename, gatherer, True).read()

    return True

# WEB

def perform_request_index(ln=CFG_SITE_LANG):
    """
    Displays some informative text, the health box, and a the list of
    key/custom events.
    """
    out = TEMPLATES.tmpl_welcome(ln=ln)

    # Prepare the health base data
    # health_indicators = []
    # now = datetime.datetime.now()
    # yesterday = (now-datetime.timedelta(days=1)).strftime("%Y-%m-%d")
    # today = now.strftime("%Y-%m-%d")
    # tomorrow = (now+datetime.timedelta(days=1)).strftime("%Y-%m-%d")

    # Append session information to the health box
    # sess = get_keyevent_snapshot_sessions()
    # health_indicators.append(("Total active visitors", sum(sess)))
    # health_indicators.append(("    Logged in", sess[1]))
    # health_indicators.append(None)

    # Append searches information to the health box
    # args = { 't_start': today, 't_end': tomorrow, 'granularity': "day", 't_format': "%Y-%m-%d" }
    # searches = get_keyevent_trend_search_type_distribution(args)
    # health_indicators.append(("Searches since midnight", sum(searches[0][1])))
    # health_indicators.append(("    Simple", searches[0][1][0]))
    # health_indicators.append(("    Advanced", searches[0][1][1]))
    # health_indicators.append(None)

    # Append new records information to the health box
    # args = { 'collection': CFG_SITE_NAME, 't_start': today, 't_end': tomorrow, 'granularity': "day", 't_format': "%Y-%m-%d" }
    # try: tot_records = get_keyevent_trend_collection_population(args)[0][1]
    # except IndexError: tot_records = 0
    # args = { 'collection': CFG_SITE_NAME, 't_start': yesterday, 't_end': today, 'granularity': "day", 't_format': "%Y-%m-%d" }
    # try: new_records = tot_records - get_keyevent_trend_collection_population(args)[0][1]
    # except IndexError: new_records = 0
    # health_indicators.append(("Total records", tot_records))
    # health_indicators.append(("    New records since midnight", new_records))
    # health_indicators.append(None)

    # Append status of BibSched queue to the health box
    # bibsched = get_keyevent_snapshot_bibsched_status()
    # health_indicators.append(("BibSched queue", sum([x[1] for x in bibsched])))
    # for item in bibsched:
    #     health_indicators.append(("    " + item[0], str(item[1])))
    # health_indicators.append(None)

    # Append number of Apache processes to the health box
    # health_indicators.append(("Apache processes", get_keyevent_snapshot_apache_processes()))

    # Append uptime and load average to the health box
    # health_indicators.append(("Uptime cmd", get_keyevent_snapshot_uptime_cmd()))

    # Display the health box
    # out += TEMPLATES.tmpl_system_health(health_indicators, ln=ln)

    # Produce a list of the key statistics
    out += TEMPLATES.tmpl_keyevent_list(ln=ln)

    # Display the custom statistics
    out += TEMPLATES.tmpl_customevent_list(_get_customevents(), ln=ln)

    return out

def perform_display_keyevent(id=None, args={}, req=None, ln=CFG_SITE_LANG):
    """
    Display key events using a certain output type over the given time span.

    @param ids: The ids for the custom events that are to be displayed.
    @type ids: [str]

    @param args: { param name: argument value }
    @type args: { str: str }

    @param req: The Apache request object, necessary for export redirect.
    @type req:
    """
    # Get all the option lists: { parameter name: [(argument internal name, argument full name)]}
    options = dict([(param,
                     (KEYEVENT_REPOSITORY[id]['extraparams'][param][0],
                      KEYEVENT_REPOSITORY[id]['extraparams'][param][1]())) for param in
                    KEYEVENT_REPOSITORY[id]['extraparams']] +
                   [('timespan', ('Time span', _get_timespans())), ('format', ('Output format', _get_formats()))])
    # Order of options
    order = [param for param in KEYEVENT_REPOSITORY[id]['extraparams']] + ['timespan', 'format']
    # Build a dictionary for the selected parameters: { parameter name: argument internal name }
    choosed = dict([(param, args[param]) for param in KEYEVENT_REPOSITORY[id]['extraparams']] +
                   [('timespan', args['timespan']), ('format', args['format'])])
    # Send to template to prepare event customization FORM box
    out = TEMPLATES.tmpl_keyevent_box(options, order, choosed, ln=ln)

    # Arguments OK?

    # Check for existance. If nothing, only show FORM box from above.
    if len(choosed) == 0:
        return out

    # Make sure extraparams are valid, if any
    for param in choosed:
        if not choosed[param] in [x[0] for x in options[param][1]]:
            return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".'
                                               % options[param][0], ln=ln)

    # Arguments OK beyond this point!

    # Get unique name for caching purposes (make sure that the params used in the filename are safe!)
    filename = KEYEVENT_REPOSITORY[id]['cachefilename'] \
               % dict([(param, re.subn("[^\w]", "_", choosed[param])[0]) for param in choosed] +
                      [('id', re.subn("[^\w]", "_", id)[0])])

    # Get time parameters from repository
    # TODO: This should quite possibly be lifted out (webstat_engine?), in any case a cleaner repository
    _, t_fullname, t_start, t_end, granularity, t_format, xtic_format = \
        options['timespan'][1][[x[0] for x in options['timespan'][1]].index(choosed['timespan'])]
    args = { 't_start': t_start, 't_end': t_end, 'granularity': granularity,
             't_format': t_format, 'xtic_format': xtic_format }
    for param in KEYEVENT_REPOSITORY[id]['extraparams']:
        args[param] = choosed[param]

    # Create closure of frequency function in case cache needs to be refreshed
    gatherer = lambda: KEYEVENT_REPOSITORY[id]['gatherer'](args)

    # Determine if this particular file is due for scheduling cacheing, in that case we must not
    # allow refreshing of the rawdata.
    allow_refresh = not _is_scheduled_for_cacheing(id)

    # Get data file from cache (refresh if necessary)
    data = eval(_get_file_using_cache(filename, gatherer, allow_refresh=allow_refresh).read())

    # If type indicates an export, run the export function and we're done
    if _is_type_export(choosed['format']):
        _get_export_closure(choosed['format'])(data, req)
        return out

    # Prepare the graph settings that are being passed on to grapher
    settings = { "title": KEYEVENT_REPOSITORY[id]['specificname'] % choosed,
                  "xlabel": t_fullname + ' (' + granularity + ')',
                  "ylabel": KEYEVENT_REPOSITORY[id]['ylabel'],
                  "xtic_format": xtic_format,
                  "format": choosed['format'],
                  "multiple": KEYEVENT_REPOSITORY[id]['multiple'] }

    return out + _perform_display_event(data, os.path.basename(filename), settings, ln=ln)

def perform_display_customevent(ids=[], args={}, req=None, ln=CFG_SITE_LANG):
    """
    Display custom events using a certain output type over the given time span.

    @param ids: The ids for the custom events that are to be displayed.
    @type ids: [str]

    @param args: { param name: argument value }
    @type args: { str: str }

    @param req: The Apache request object, necessary for export redirect.
    @type req:
    """
    # Get all the option lists: { parameter name: [(argument internal name, argument full name)]}
    options = { 'ids': ('Custom event', _get_customevents()),
                'timespan': ('Time span', _get_timespans()),
                'format': ('Output format', _get_formats(True)),
                'cols': ('Column', _get_customevent_cols()) }
    # Order of options
    order = ['ids', 'timespan', 'format', 'cols']
    # Build a dictionary for the selected parameters: { parameter name: argument internal name }
    choosed = { 'ids': ids, 'timespan': args['timespan'], 'format': args['format'], 'cols': "" }
    # Send to template to prepare event customization FORM box
    out = TEMPLATES.tmpl_customevent_box(options, order, choosed, ln=ln)

    # Arguments OK?

    # Make sure extraparams are valid, if any
    for param in order:
        if param == 'cols': continue
        legalvalues = [x[0] for x in options[param][1]]

        if type(choosed[param]) is list:
            # If the argument is a list, like the content of 'ids' every value has to be checked
            if len(choosed[param]) == 0:
                return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".' % options[param][0], ln=ln)
            for arg in choosed[param]:
                if not arg in legalvalues:
                    return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".' % options[param][0], ln=ln)
        else:
            if not choosed[param] in legalvalues:
                return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".' % options[param][0], ln=ln)

    # Calculate cols
    cols = []
    if args.has_key('cols') and args.has_key('col_value'):
        cols = zip(args['cols'], args['col_value'])

    # Fetch time parameters from repository
    _, t_fullname, t_start, t_end, granularity, t_format, xtic_format = \
        options['timespan'][1][[x[0] for x in options['timespan'][1]].index(choosed['timespan'])]
    args = { 't_start': t_start, 't_end': t_end, 'granularity': granularity,
            't_format': t_format, 'xtic_format': xtic_format, 'cols': cols }

    data_unmerged = []

    # ASCII dump data is different from the standard formats
    if choosed['format'] == 'asciidump':
        filename = "webstat_customevent_" + re.subn("[^\w]", "", ''.join(ids) + "_" + choosed['timespan'] + "_" + '-'.join([ ':'.join(col) for col in args['cols']]) + "_asciidump")[0]
        args['ids'] = ids
        gatherer = lambda: get_customevent_dump(args)
        data = eval(_get_file_using_cache(filename, gatherer).read())
    else:
        for id in ids:
            # Get unique name for the rawdata file (wash arguments!)
            filename = "webstat_customevent_" + re.subn("[^\w]", "", id + "_" + choosed['timespan'] + "_" + '-'.join([ ':'.join(col) for col in args['cols']]))[0]

            # Add the current id to the gatherer's arguments
            args['id'] = id

            # Prepare raw data gatherer, if cache needs refreshing.
            gatherer = lambda: get_customevent_trend(args)

            # Determine if this particular file is due for scheduling cacheing, in that case we must not
            # allow refreshing of the rawdata.
            allow_refresh = not _is_scheduled_for_cacheing(id)

            # Get file from cache, and evaluate it to trend data
            data_unmerged.append(eval(_get_file_using_cache(filename, gatherer, allow_refresh=allow_refresh).read()))

        # Merge data from the unmerged trends into the final destination
        data = [(x[0][0], tuple([y[1] for y in x])) for x in zip(*data_unmerged)]

    # If type indicates an export, run the export function and we're done
    if _is_type_export(choosed['format']):
        _get_export_closure(choosed['format'])(data, req)
        return out

    # Get full names, for those that have them
    names = []
    events = _get_customevents()
    for id in ids:
        temp = events[[x[0] for x in events].index(id)]
        if temp[1] != None:
            names.append(temp[1])
        else:
            names.append(temp[0])

    # Generate a filename for the graph
    filename = "tmp_webstat_customevent_" + ''.join([re.subn("[^\w]", "", id)[0] for id in ids]) + "_" + choosed['timespan']

    settings = { "title": 'Custom event',
                 "xlabel": t_fullname + ' (' + granularity + ')',
                 "ylabel": "Action quantity",
                 "xtic_format": xtic_format,
                 "format": choosed['format'],
                 "multiple": (type(ids) is list) and names or [] }

    return out + _perform_display_event(data, os.path.basename(filename), settings, ln=ln)

def perform_display_customevent_help(ln=CFG_SITE_LANG):
    """Display the custom event help"""
    return TEMPLATES.tmpl_customevent_help(ln=ln)

# INTERNALS

def _perform_display_event(data, name, settings, ln=CFG_SITE_LANG):
    """
    Retrieves a graph.

    @param data: The trend/dump data
    @type data: [(str, str|int|(str|int,...))] | [(str|int,...)]

    @param name: The name of the trend (to be used as basename of graph file)
    @type name: str

    @param settings: Dictionary of graph parameters
    @type settings: dict

    @return: The URL of the graph (ASCII or image)
    @type: str
    """
    path = WEBSTAT_GRAPH_DIRECTORY + "tmp_" + name

    # Generate, and insert using the appropriate template
    if settings["format"] != "asciidump":
        create_graph_trend(data, path, settings)
        if settings["format"] == "asciiart":
            return TEMPLATES.tmpl_display_event_trend_ascii(settings["title"], path, ln=ln)
        else:
            if settings["format"] == "gnuplot":
                try:
                    import Gnuplot
                except ImportError:
                    return 'Gnuplot is not installed. Returning ASCII art.' +\
                           TEMPLATES.tmpl_display_event_trend_ascii(settings["title"], path, ln=ln)

            return TEMPLATES.tmpl_display_event_trend_image(settings["title"], path, ln=ln)
    else:
        path += "_asciidump"
        create_graph_dump(data, path, settings)
        return TEMPLATES.tmpl_display_event_trend_ascii(settings["title"], path, ln=ln)

def _get_customevents():
    """
    Retrieves registered custom events from the database.

    @return: [(internal name, readable name)]
    @type: [(str, str)]
    """
    return [(x[0], x[1]) for x in run_sql("SELECT id, name FROM staEVENT")]

def _get_timespans(dt=None):
    """
    Helper function that generates possible time spans to be put in the
    drop-down in the generation box. Computes possible years, and also some
    pre-defined simpler values. Some items in the list returned also tweaks the
    output graph, if any, since such values are closely related to the nature
    of the time span.

    @param dt: A datetime object indicating the current date and time
    @type dt: datetime.datetime

    @return [(Internal name, Readable name, t_start, t_end, granularity, format, xtic_format)]
    @type [(str, str, str, str, str, str, str)]
    """
    if dt is None:
        dt = datetime.datetime.now()

    format = "%Y-%m-%d"
    # Helper function to return a timediff object reflecting a diff of x days
    d_diff = lambda x: datetime.timedelta(days=x)
    # Helper function to return the number of days in the month x months ago
    d_in_m = lambda x: calendar.monthrange(((dt.month-x<1) and dt.year-1 or dt.year),
                                           (((dt.month-1)-x)%12+1))[1]
    to_str = lambda x: x.strftime(format)
    dt_str = to_str(dt)

    spans = [("today", "Today",
              dt_str,
              to_str(dt+d_diff(1)),
              "hour", format, "%H"),
             ("this week", "This week",
              to_str(dt-d_diff(dt.weekday())),
              to_str(dt+d_diff(1)),
              "day", format, "%a"),
             ("last week", "Last week",
              to_str(dt-d_diff(dt.weekday()+7)),
              to_str(dt-d_diff(dt.weekday())),
              "day", format, "%a"),
             ("this month", "This month",
              to_str(dt-d_diff(dt.day)+d_diff(1)),
              to_str(dt+d_diff(1)),
              "day", format, "%d"),
             ("last month", "Last month",
              to_str(dt-d_diff(d_in_m(1))-d_diff(dt.day)+d_diff(1)),
              to_str(dt-d_diff(dt.day)+d_diff(1)),
              "day", format, "%d"),
             ("last three months", "Last three months",
              to_str(dt-d_diff(d_in_m(1))-d_diff(d_in_m(2))-d_diff(d_in_m(3))-d_diff(dt.day)+d_diff(1)),
              to_str(dt-d_diff(dt.day)+d_diff(1)),
              "month", format, "%b")]

    # Get first year as indicated by the content's in bibrec
    try:
        y1 = run_sql("SELECT creation_date FROM bibrec ORDER BY creation_date LIMIT 1")[0][0].year
    except IndexError:
        y1 = dt.year

    y2 = time.localtime()[0]
    spans.extend([(str(x-1), str(x), str(x), str(x+1), "month", "%Y", "%b") for x in  range(y2, y1-1, -1)])

    return spans

def _get_formats(with_dump=False):
    """
    Helper function to retrieve a CDS Invenio friendly list of all possible
    output types (displaying and exporting) from the central repository as
    stored in the variable self.types at the top of this module.

    @param with_dump: Optionally displays the custom-event only type 'asciidump'
    @type with_dump: bool

    @return [(Internal name, Readable name)]
    @type [(str, str)]
    """
    # The third tuple value is internal
    if with_dump:
        return [(x[0], x[1]) for x in TYPE_REPOSITORY]
    else:
        return [(x[0], x[1]) for x in TYPE_REPOSITORY if x[0] != 'asciidump']

def _get_customevent_cols():
    """
    List of all the diferent name of columns in customevents.

    @return: [(internal name, readable name)]
    @type: [(str, str)]
    """
    cols = []
    for x in run_sql("SELECT cols FROM staEVENT"):
        if x[0]:
            cols.extend(cPickle.loads(x[0]))

    return [ (name, name) for name in set(cols) ]

def _is_type_export(typename):
    """
    Helper function that consults the central repository of types to determine
    whether the input parameter represents an export type.

    @param typename: Internal type name
    @type typename: str

    @return: Information whether a certain type exports data
    @type: bool
    """
    return len(TYPE_REPOSITORY[[x[0] for x in TYPE_REPOSITORY].index(typename)]) == 3

def _get_export_closure(typename):
    """
    Helper function that for a certain type, gives back the corresponding export
    closure.

    @param typename: Internal type name
    @type type: str

    @return: Closure that exports data to the type's format
    @type: function
    """
    return TYPE_REPOSITORY[[x[0] for x in TYPE_REPOSITORY].index(typename)][2]

def _get_file_using_cache(filename, closure, force=False, allow_refresh=True):
    """
    Uses the CDS Invenio cache, i.e. the tempdir, to see if there's a recent
    cached version of the sought-after file in there. If not, use the closure to
    compute a new, and return that instead. Relies on CDS Invenio configuration
    parameter WEBSTAT_CACHE_INTERVAL.

    @param filename: The name of the file that might be cached
    @type filename: str

    @param closure: A function, that executed will return data to be cached. The
                    function should return either a string, or something that
                    makes sense after being interpreted with str().
    @type closure: function

    @param force: Override cache default value.
    @type force: bool


    """
    # Absolute path to cached files, might not exist.
    filename = os.path.normpath(WEBSTAT_RAWDATA_DIRECTORY + filename)

    # Get the modification time of the cached file (if any).
    try:
        mtime = os.path.getmtime(filename)
    except OSError:
        # No cached version of this particular file exists, thus the modification
        # time is set to 0 for easy logic below.
        mtime = 0

    # Consider refreshing cache if FORCE or NO CACHE AT ALL, or CACHE EXIST AND REFRESH IS ALLOWED.
    if force or mtime == 0 or (mtime > 0 and allow_refresh):

        # Is the file modification time recent enough?
        if force or (time.time() - mtime > WEBSTAT_CACHE_INTERVAL):

            # No! Use closure to compute new content
            content = closure()

            # Cache the data
            open(filename, 'w').write(str(content))

    # Return the (perhaps just) cached file
    return open(filename, 'r')

def _is_scheduled_for_cacheing(id):
    """
    @param id: The event id
    @type id: str

    @return: Indication of if the event id is scheduling for BibSched execution.
    @type: bool
    """
    if not is_task_scheduled('webstatadmin'):
        return False

    # Get the task id
    try:
        task_id = get_task_ids_by_descending_date('webstatadmin', ['RUNNING', 'WAITING'])[0]
    except IndexError:
        return False
    else:
        args = get_task_options(task_id)
        return id in (args['keyevents'] + args['customevents'])

RE: hanging-up of statistics

Reply via email to