Source code for jwql.jwql_monitors.monitor_cron_jobs

#! /usr/bin/env python

"""This module monitors the status of the ``jwql`` monitors via their
log files. Basic results (e.g. ``success``, ``failure``) are collected
and placed in a ``bokeh`` table for display on the web app.

Authors
-------

    - Bryan Hilbert

Use
---

    This module can be executed as such:

    ::

        from jwql.jwql_monitors import monitor_cron_jobs
        monitor_cron_jobs.status()

Dependencies
------------

    The user must have a configuration file named ``config.json``
    placed in the ``jwql`` directory.
"""

from datetime import datetime
import logging
import numpy as np
import os
import time

from bokeh.io import save, output_file
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, DateFormatter, HTMLTemplateFormatter, TableColumn

from jwql.utils.logging_functions import log_info, log_fail
from jwql.utils.permissions import set_permissions
from jwql.utils.utils import get_config
from jwql.utils.monitor_utils import initialize_instrument_monitor, update_monitor_table
from jwql.utils.utils import ensure_dir_exists
from jwql.utils.protect_module import lock_module

SETTINGS = get_config()



[docs]
def create_table(status_dict):
    """Create interactive ``bokeh`` table containing the logfile status
    results.

    Parameters
    ----------
    status_dict : dict
        Nested dictionary with status results from all logfiles
    """
    # Rearrange the nested dictionary into a non-nested dict for the table
    filenames = []
    dates = []
    missings = []
    results = []
    for key in status_dict:
        filenames.append(status_dict[key]['logname'])
        dates.append(datetime.fromtimestamp(status_dict[key]['latest_time']))
        missings.append(str(status_dict[key]['missing_file']))
        results.append(status_dict[key]['status'])

    # div to color the boxes in the status column
    success_template = """
    <div style="background:<%=
        (function colorfromstr(){
            if(value == "success"){
                return("green")}
            else{return("red")}
            }()) %>;
        color: white">
    <%= value %></div>
    """

    # div to color the boxes in the column for possibly late logfiles
    missing_template = """
    <div style="background:<%=
        (function colorfrombool(){
            if(value == "True"){
                return("orange")}
            else{return("green")}
            }()) %>;
        color: white">
    <%= value %></div>
    """
    success_formatter = HTMLTemplateFormatter(template=success_template)
    missing_formatter = HTMLTemplateFormatter(template=missing_template)

    data = dict(name=list(status_dict.keys()), filename=filenames, date=dates, missing=missings,
                result=results)
    source = ColumnDataSource(data)

    datefmt = DateFormatter(format="RFC-2822")
    columns = [
        TableColumn(field="name", title="Monitor Name", width=200),
        TableColumn(field="filename", title="Most Recent File", width=350),
        TableColumn(field="date", title="Most Recent Time", width=200, formatter=datefmt),
        TableColumn(field="missing", title="Possible Missing File", width=200, formatter=missing_formatter),
        TableColumn(field="result", title="Status", width=100, formatter=success_formatter),
    ]
    data_table = DataTable(source=source, columns=columns, width=800, height=280, index_position=None)

    # Get output directory for saving the table files
    output_dir = SETTINGS['outputs']
    output_filename = 'cron_status_table'

    # verify/create output sub-directory
    output_dir = os.path.join(output_dir, 'monitor_cron_jobs')
    ensure_dir_exists(output_dir)

    # Save full html
    html_outfile = os.path.join(output_dir, '{}.html'.format(output_filename))
    output_file(html_outfile)
    save(data_table)
    try:
        set_permissions(html_outfile)
    except PermissionError:
        logging.warning('Unable to set permissions for {}'.format(html_outfile))
    logging.info('Saved Bokeh full HTML file: {}'.format(html_outfile))




[docs]
def find_latest(logfiles):
    """Given a list of log files in a directory, identify the most
    recent. The way that ``jwql.utils.logging_functions.make_log_file``
    is set up, log files for all monitors are guaranteed to be the name
    of the monitor followed by the datetime that they were run, so we
    should be able to simply sort the filenames and the last will be the
    most recent.

    Parameters
    ----------
    logfiles : list
        List of logfiles in the directory

    Returns
    -------
    latest : str
        Filename of the most recent file

    latest_time : float
        Time associated with the most recent log file
    """
    latest = sorted(logfiles)[-1]
    latest_time = os.path.getctime(latest)
    return (latest, latest_time)




[docs]
def get_cadence(filenames):
    """Calculate the cadence of the log files in a given directory.
    Use timestamps

    Parameters
    ----------
    filenames : list
        List of log files to examine

    Returns
    -------
    mean_delta : float
        Mean time in seconds between the appearance of consecutive log
        files

    stdev_delta : float
        Standard deviation in seconds between the appearance of
        consecutive log files
    """
    minimum_log_num = 3  # Set to a low value for now since we don't have many logfiles
    times = [os.path.getctime(filename) for filename in filenames]
    if len(times) >= minimum_log_num:
        sorted_times = np.array(sorted(times))
        delta_times = sorted_times[1:] - sorted_times[0:-1]
        mean_delta = np.mean(delta_times)
        stdev_delta = np.std(delta_times)
    else:
        # If there are < minimum_log_num logfiles, then let's assume we can't
        # get a reliable measure of cadence. Fall back to a value of
        # 1 year between files, to avoid accidentally flagging this monitor
        # as running late in the subsequent check
        mean_delta = 31556736.0  # sec per year
        stdev_delta = 31556736.0  # sec per year
    return mean_delta, stdev_delta




[docs]
def missing_file_check(avg_time_between, uncertainty, latest_file):
    """Given the name of the most recent log file, along with the
    historical average time between files and the stdev of the time
    between files, determine whether we expect a more recent log file
    than the file given. This could hint at a problem with the cron job
    used to create the log files.

    Parameters
    ----------
    avg_time_between : float
        Average number of seconds between log files

    uncertainty : float
        Standard deviation of the number of seconds between log files

    latest_file : str
        Name of the most recent log file

    Returns
    -------
    late : bool
        True = We expect a more recent file than that given
        False =  It is reasonable that the file given is the most
        recent
    """
    latest_time = os.path.getctime(latest_file)
    now = time.time()
    time_since_latest = now - latest_time
    if time_since_latest > (avg_time_between + 3 * uncertainty):
        late = True
    else:
        late = False
    return late




[docs]
@log_fail
@log_info
def status(production_mode=True):
    """Main function: determine the status of the instrument montiors
    by examining log files.

    Parameters
    ----------
    production_mode : bool
        If ``True``, look in the main log directory. If ``False``, look
        in the ``dev`` log file directory.

    Returns
    -------
    logfile_status : dict
        Nested dictionary containing the status for all monitors. Top
        level keys include all monitors. Within a given monitor, the
        value is a dictionary containing 'missing_file' and 'status'
        keys. 'missing_file' is a boolean describing whether or not
        there is a suspected missing log file based on the timestamps
        of the existing files. 'status' is a string that is either
        'success' or 'failure'.
    """
    # Begin logging
    logging.info("Beginning cron job status monitor")

    # Get main logfile path
    log_path = SETTINGS['log_dir']

    # If we are in development mode, the log files are in a slightly
    # different location than in production mode
    if production_mode:
        log_path = os.path.join(log_path, 'prod')
    else:
        log_path = os.path.join(log_path, 'dev')

    # Set up a dictionary to keep track of results
    logfile_status = {}

    # Get a list of the directories under the main logging directory.
    generator = os.walk(log_path, topdown=True)

    # Loop over monitors
    for subdir, subsubdir, filenames in generator:
        # When running in production mode, skip the 'dev' subdirectory,
        # as it contains the development version of the monitor logs
        if production_mode:
            subsubdir[:] = [dirname for dirname in subsubdir if dirname != 'dev']

        if len(filenames) > 0:
            monitor_name = subdir.split('/')[-1]

            # Avoid monitor_cron_jobs itseft
            if monitor_name != 'monitor_cron_jobs':

                log_file_list = [os.path.join(subdir, filename) for filename in filenames]

                # Find the cadence of the monitor
                delta_time, stdev_time = get_cadence(log_file_list)

                # Identify the most recent log file
                latest_log, latest_log_time = find_latest(log_file_list)

                # Check to see if we expect a file more recent than the latest
                missing_file = missing_file_check(delta_time, stdev_time, latest_log)
                if missing_file:
                    logging.warning('Expected a more recent {} logfile than {}'
                                    .format(monitor_name, os.path.basename(latest_log)))

                # Check the file for success/failure
                result = success_check(latest_log)
                logging.info('{}: Latest log file indicates {}'.format(monitor_name, result))

                # Add results to the dictionary
                logfile_status[monitor_name] = {'logname': os.path.basename(latest_log),
                                                'latest_time': latest_log_time,
                                                'missing_file': missing_file, 'status': result}

    # Create table of results using Bokeh
    create_table(logfile_status)
    logging.info('Cron job status monitor completed successfully.')




[docs]
def success_check(filename):
    """Parse the given log file and check whether the script execution
    was successful or not

    Parameters
    ----------
    filename : str
        Name of the log file to parse

    Returns
    -------
    execution : str
        ``success`` or ``failure``
    """
    with open(filename, 'r') as file_obj:
        all_lines = file_obj.readlines()
    final_line = all_lines[-1]
    if 'complete' in final_line.lower():
        execution = 'success'
    else:
        execution = 'failure'
    return execution




[docs]
@lock_module
def protected_code():
    """Protected code ensures only 1 instance of module will run at any given time"""
    module = os.path.basename(__file__).strip('.py')
    start_time, log_file = initialize_instrument_monitor(module)

    status()
    update_monitor_table(module, start_time, log_file)



if __name__ == '__main__':
    protected_code()