#! /usr/bin/env python
"""This module monitors the status of the ``jwql`` monitors via their
log files. Basic results (e.g. ``success``, ``failure``) are collected
and placed in a ``bokeh`` table for display on the web app.
Authors
-------
- Bryan Hilbert
Use
---
This module can be executed as such:
::
from jwql.jwql_monitors import monitor_cron_jobs
monitor_cron_jobs.status()
Dependencies
------------
The user must have a configuration file named ``config.json``
placed in the ``jwql`` directory.
"""
from datetime import datetime
import logging
import numpy as np
import os
import time
from bokeh.io import save, output_file
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, DateFormatter, HTMLTemplateFormatter, TableColumn
from jwql.utils.logging_functions import log_info, log_fail
from jwql.utils.permissions import set_permissions
from jwql.utils.utils import get_config
from jwql.utils.monitor_utils import initialize_instrument_monitor, update_monitor_table
from jwql.utils.utils import ensure_dir_exists
from jwql.utils.protect_module import lock_module
SETTINGS = get_config()
[docs]
def create_table(status_dict):
"""Create interactive ``bokeh`` table containing the logfile status
results.
Parameters
----------
status_dict : dict
Nested dictionary with status results from all logfiles
"""
# Rearrange the nested dictionary into a non-nested dict for the table
filenames = []
dates = []
missings = []
results = []
for key in status_dict:
filenames.append(status_dict[key]['logname'])
dates.append(datetime.fromtimestamp(status_dict[key]['latest_time']))
missings.append(str(status_dict[key]['missing_file']))
results.append(status_dict[key]['status'])
# div to color the boxes in the status column
success_template = """
<div style="background:<%=
(function colorfromstr(){
if(value == "success"){
return("green")}
else{return("red")}
}()) %>;
color: white">
<%= value %></div>
"""
# div to color the boxes in the column for possibly late logfiles
missing_template = """
<div style="background:<%=
(function colorfrombool(){
if(value == "True"){
return("orange")}
else{return("green")}
}()) %>;
color: white">
<%= value %></div>
"""
success_formatter = HTMLTemplateFormatter(template=success_template)
missing_formatter = HTMLTemplateFormatter(template=missing_template)
data = dict(name=list(status_dict.keys()), filename=filenames, date=dates, missing=missings,
result=results)
source = ColumnDataSource(data)
datefmt = DateFormatter(format="RFC-2822")
columns = [
TableColumn(field="name", title="Monitor Name", width=200),
TableColumn(field="filename", title="Most Recent File", width=350),
TableColumn(field="date", title="Most Recent Time", width=200, formatter=datefmt),
TableColumn(field="missing", title="Possible Missing File", width=200, formatter=missing_formatter),
TableColumn(field="result", title="Status", width=100, formatter=success_formatter),
]
data_table = DataTable(source=source, columns=columns, width=800, height=280, index_position=None)
# Get output directory for saving the table files
output_dir = SETTINGS['outputs']
output_filename = 'cron_status_table'
# verify/create output sub-directory
output_dir = os.path.join(output_dir, 'monitor_cron_jobs')
ensure_dir_exists(output_dir)
# Save full html
html_outfile = os.path.join(output_dir, '{}.html'.format(output_filename))
output_file(html_outfile)
save(data_table)
try:
set_permissions(html_outfile)
except PermissionError:
logging.warning('Unable to set permissions for {}'.format(html_outfile))
logging.info('Saved Bokeh full HTML file: {}'.format(html_outfile))
[docs]
def find_latest(logfiles):
"""Given a list of log files in a directory, identify the most
recent. The way that ``jwql.utils.logging_functions.make_log_file``
is set up, log files for all monitors are guaranteed to be the name
of the monitor followed by the datetime that they were run, so we
should be able to simply sort the filenames and the last will be the
most recent.
Parameters
----------
logfiles : list
List of logfiles in the directory
Returns
-------
latest : str
Filename of the most recent file
latest_time : float
Time associated with the most recent log file
"""
latest = sorted(logfiles)[-1]
latest_time = os.path.getctime(latest)
return (latest, latest_time)
[docs]
def get_cadence(filenames):
"""Calculate the cadence of the log files in a given directory.
Use timestamps
Parameters
----------
filenames : list
List of log files to examine
Returns
-------
mean_delta : float
Mean time in seconds between the appearance of consecutive log
files
stdev_delta : float
Standard deviation in seconds between the appearance of
consecutive log files
"""
minimum_log_num = 3 # Set to a low value for now since we don't have many logfiles
times = [os.path.getctime(filename) for filename in filenames]
if len(times) >= minimum_log_num:
sorted_times = np.array(sorted(times))
delta_times = sorted_times[1:] - sorted_times[0:-1]
mean_delta = np.mean(delta_times)
stdev_delta = np.std(delta_times)
else:
# If there are < minimum_log_num logfiles, then let's assume we can't
# get a reliable measure of cadence. Fall back to a value of
# 1 year between files, to avoid accidentally flagging this monitor
# as running late in the subsequent check
mean_delta = 31556736.0 # sec per year
stdev_delta = 31556736.0 # sec per year
return mean_delta, stdev_delta
[docs]
def missing_file_check(avg_time_between, uncertainty, latest_file):
"""Given the name of the most recent log file, along with the
historical average time between files and the stdev of the time
between files, determine whether we expect a more recent log file
than the file given. This could hint at a problem with the cron job
used to create the log files.
Parameters
----------
avg_time_between : float
Average number of seconds between log files
uncertainty : float
Standard deviation of the number of seconds between log files
latest_file : str
Name of the most recent log file
Returns
-------
late : bool
True = We expect a more recent file than that given
False = It is reasonable that the file given is the most
recent
"""
latest_time = os.path.getctime(latest_file)
now = time.time()
time_since_latest = now - latest_time
if time_since_latest > (avg_time_between + 3 * uncertainty):
late = True
else:
late = False
return late
[docs]
@log_fail
@log_info
def status(production_mode=True):
"""Main function: determine the status of the instrument montiors
by examining log files.
Parameters
----------
production_mode : bool
If ``True``, look in the main log directory. If ``False``, look
in the ``dev`` log file directory.
Returns
-------
logfile_status : dict
Nested dictionary containing the status for all monitors. Top
level keys include all monitors. Within a given monitor, the
value is a dictionary containing 'missing_file' and 'status'
keys. 'missing_file' is a boolean describing whether or not
there is a suspected missing log file based on the timestamps
of the existing files. 'status' is a string that is either
'success' or 'failure'.
"""
# Begin logging
logging.info("Beginning cron job status monitor")
# Get main logfile path
log_path = SETTINGS['log_dir']
# If we are in development mode, the log files are in a slightly
# different location than in production mode
if production_mode:
log_path = os.path.join(log_path, 'prod')
else:
log_path = os.path.join(log_path, 'dev')
# Set up a dictionary to keep track of results
logfile_status = {}
# Get a list of the directories under the main logging directory.
generator = os.walk(log_path, topdown=True)
# Loop over monitors
for subdir, subsubdir, filenames in generator:
# When running in production mode, skip the 'dev' subdirectory,
# as it contains the development version of the monitor logs
if production_mode:
subsubdir[:] = [dirname for dirname in subsubdir if dirname != 'dev']
if len(filenames) > 0:
monitor_name = subdir.split('/')[-1]
# Avoid monitor_cron_jobs itseft
if monitor_name != 'monitor_cron_jobs':
log_file_list = [os.path.join(subdir, filename) for filename in filenames]
# Find the cadence of the monitor
delta_time, stdev_time = get_cadence(log_file_list)
# Identify the most recent log file
latest_log, latest_log_time = find_latest(log_file_list)
# Check to see if we expect a file more recent than the latest
missing_file = missing_file_check(delta_time, stdev_time, latest_log)
if missing_file:
logging.warning('Expected a more recent {} logfile than {}'
.format(monitor_name, os.path.basename(latest_log)))
# Check the file for success/failure
result = success_check(latest_log)
logging.info('{}: Latest log file indicates {}'.format(monitor_name, result))
# Add results to the dictionary
logfile_status[monitor_name] = {'logname': os.path.basename(latest_log),
'latest_time': latest_log_time,
'missing_file': missing_file, 'status': result}
# Create table of results using Bokeh
create_table(logfile_status)
logging.info('Cron job status monitor completed successfully.')
[docs]
def success_check(filename):
"""Parse the given log file and check whether the script execution
was successful or not
Parameters
----------
filename : str
Name of the log file to parse
Returns
-------
execution : str
``success`` or ``failure``
"""
with open(filename, 'r') as file_obj:
all_lines = file_obj.readlines()
final_line = all_lines[-1]
if 'complete' in final_line.lower():
execution = 'success'
else:
execution = 'failure'
return execution
[docs]
@lock_module
def protected_code():
"""Protected code ensures only 1 instance of module will run at any given time"""
module = os.path.basename(__file__).strip('.py')
start_time, log_file = initialize_instrument_monitor(module)
status()
update_monitor_table(module, start_time, log_file)
if __name__ == '__main__':
protected_code()