Source code for jwql.instrument_monitors.common_monitors.readnoise_monitor

#! /usr/bin/env python

"""This module contains code for the readnoise monitor, which monitors
the readnoise levels in dark exposures as well as the accuracy of
the pipeline readnoise reference files over time.

For each instrument, the readnoise, technically the correlated double
sampling (CDS) noise, is found by calculating the standard deviation
through a stack of consecutive frame differences in each dark exposure.
The sigma-clipped mean and standard deviation in each of these readnoise
images, as well as histogram distributions, are recorded in the
``<Instrument>ReadnoiseStats`` database table.

Next, each of these readnoise images are differenced with the current
pipeline readnoise reference file to identify the need for new reference
files. A histogram distribution of these difference images, as well as
the sigma-clipped mean and standard deviation, are recorded in the
``<Instrument>ReadnoiseStats`` database table. A png version of these
difference images is also saved for visual inspection.

Author
------
    - Ben Sunnquist

Use
---
    This module can be used from the command line as such:

    ::

        python readnoise_monitor.py
"""

from collections import OrderedDict
import datetime
import logging
import os
import shutil

from astropy.io import fits
from astropy.stats import sigma_clip
from astropy.time import Time
from astropy.visualization import ZScaleInterval
import crds
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt  # noqa: E402 (module level import not at top of file)
import numpy as np  # noqa: E402 (module level import not at top of file)
from pysiaf import Siaf  # noqa: E402 (module level import not at top of file)

from jwql.shared_tasks.shared_tasks import only_one, run_pipeline, run_parallel_pipeline  # noqa: E402 (module level import not at top of file)
from jwql.instrument_monitors import pipeline_tools  # noqa: E402 (module level import not at top of file)
from jwql.utils import instrument_properties, monitor_utils  # noqa: E402 (module level import not at top of file)
from jwql.utils.constants import JWST_INSTRUMENT_NAMES, JWST_INSTRUMENT_NAMES_MIXEDCASE  # noqa: E402 (module level import not at top of file)
from jwql.utils.constants import ON_GITHUB_ACTIONS, ON_READTHEDOCS  # noqa: E402 (module level import not at top of file)
from jwql.utils.logging_functions import log_info, log_fail  # noqa: E402 (module level import not at top of file)
from jwql.utils.monitor_utils import update_monitor_table  # noqa: E402 (module level import not at top of file)
from jwql.utils.permissions import set_permissions  # noqa: E402 (module level import not at top of file)
from jwql.utils.utils import ensure_dir_exists, filesystem_path, get_config, copy_files  # noqa: E402 (module level import not at top of file)

if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
    # Need to set up django apps before we can access the models
    import django  # noqa: E402 (module level import not at top of file)
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
    django.setup()

    # Import * is okay here because this module specifically only contains database models
    # for this monitor
    from jwql.website.apps.jwql.monitor_models.readnoise import *  # noqa: E402 (module level import not at top of file)


[docs] class Readnoise(): """Class for executing the readnoise monitor. This class will search for new dark current files in the file system for each instrument and will run the monitor on these files. The monitor will create a readnoise image for each of the new dark files. It will then perform statistical measurements on these readnoise images, as well as their differences with the current pipeline readnoise reference file, in order to monitor the readnoise levels over time as well as ensure the pipeline readnoise reference file is sufficiently capturing the current readnoise behavior. Results are all saved to database tables. Attributes ---------- output_dir : str Path into which outputs will be placed. working_dir : str Path into which new dark files will be copied to be worked on. query_start : float MJD start date to use for querying MAST. query_end : float MJD end date to use for querying MAST. instrument : str Name of instrument used to collect the dark current data. aperture : str Name of the aperture used for the dark current (e.g. ``NRCA1_FULL``). """ def __init__(self): """Initialize an instance of the ``Readnoise`` class."""
[docs] def determine_pipeline_steps(self): """Determines the necessary JWST pipelines steps to run on a given dark file. Returns ------- pipeline_steps : collections.OrderedDict The pipeline steps to run. """ pipeline_steps = OrderedDict({}) # Determine if the file needs group_scale step run if self.read_pattern not in pipeline_tools.GROUPSCALE_READOUT_PATTERNS: pipeline_steps['group_scale'] = False else: pipeline_steps['group_scale'] = True # Run the DQ step on all files pipeline_steps['dq_init'] = True # Only run the superbias step for NIR instruments if self.instrument.upper() != 'MIRI': pipeline_steps['superbias'] = True else: pipeline_steps['superbias'] = False # Run the refpix step on all files pipeline_steps['refpix'] = True return pipeline_steps
[docs] def file_exists_in_database(self, filename): """Checks if an entry for filename exists in the readnoise stats database. Parameters ---------- filename : str The full path to the uncal filename. Returns ------- file_exists : bool ``True`` if filename exists in the readnoise stats database. """ results = self.stats_table.objects.filter(uncal_filename__iexact=filename).values() return (len(results) != 0)
[docs] def get_amp_stats(self, image, amps): """Calculates the sigma-clipped mean and stddev, as well as the histogram stats in the input image for each amplifier. Parameters ---------- image : numpy.ndarray 2D array on which to calculate statistics. amps : dict Dictionary containing amp boundary coordinates (output from ``amplifier_info`` function) ``amps[key] = [(xmin, xmax, xstep), (ymin, ymax, ystep)]`` Returns ------- amp_stats : dict Contains the image statistics for each amp. """ amp_stats = {} for key in amps: x_start, x_end, x_step = amps[key][0] y_start, y_end, y_step = amps[key][1] # Find sigma-clipped mean/stddev values for this amp amp_data = image[y_start: y_end: y_step, x_start: x_end: x_step] clipped = sigma_clip(amp_data, sigma=3.0, maxiters=5) amp_stats['amp{}_mean'.format(key)] = np.nanmean(clipped) amp_stats['amp{}_stddev'.format(key)] = np.nanstd(clipped) # Find the histogram stats for this amp n, bin_centers = self.make_histogram(amp_data) amp_stats['amp{}_n'.format(key)] = n amp_stats['amp{}_bin_centers'.format(key)] = bin_centers return amp_stats
[docs] def get_metadata(self, filename): """Collect basic metadata from a fits file. Parameters ---------- filename : str Name of fits file to examine. """ header = fits.getheader(filename) try: self.detector = header['DETECTOR'] self.read_pattern = header['READPATT'] self.subarray = header['SUBARRAY'] self.nints = header['NINTS'] self.ngroups = header['NGROUPS'] self.substrt1 = header['SUBSTRT1'] self.substrt2 = header['SUBSTRT2'] self.subsize1 = header['SUBSIZE1'] self.subsize2 = header['SUBSIZE2'] self.date_obs = header['DATE-OBS'] self.time_obs = header['TIME-OBS'] self.expstart = '{}T{}'.format(self.date_obs, self.time_obs) except KeyError as e: logging.error(e)
[docs] def identify_tables(self): """Determine which database tables to use for a run of the readnoise monitor. """ mixed_case_name = JWST_INSTRUMENT_NAMES_MIXEDCASE[self.instrument] self.query_table = eval('{}ReadnoiseQueryHistory'.format(mixed_case_name)) self.stats_table = eval('{}ReadnoiseStats'.format(mixed_case_name))
[docs] def image_to_png(self, image, outname): """Outputs an image array into a png file. Parameters ---------- image : numpy.ndarray 2D image array. outname : str The name given to the output png file. Returns ------- output_filename : str The full path to the output png file. """ output_filename = os.path.join(self.output_data_dir, '{}.png'.format(outname)) # Get image scale limits zscale = ZScaleInterval() vmin, vmax = zscale.get_limits(image) # Plot the image plt.figure(figsize=(12, 12)) im = plt.imshow(image, cmap='gray', origin='lower', vmin=vmin, vmax=vmax) plt.colorbar(im, label='Readnoise Difference (most recent dark - reffile) [DN]') plt.title('{}'.format(outname)) # Save the figure plt.savefig(output_filename, bbox_inches='tight', dpi=200) set_permissions(output_filename) logging.info('\t{} created'.format(output_filename)) return output_filename
[docs] def make_crds_parameter_dict(self): """Construct a paramter dictionary to be used for querying CRDS for the current reffiles in use by the JWST pipeline. Returns ------- parameters : dict Dictionary of parameters, in the format expected by CRDS. """ parameters = {} parameters['INSTRUME'] = self.instrument.upper() parameters['DETECTOR'] = self.detector.upper() parameters['READPATT'] = self.read_pattern.upper() parameters['SUBARRAY'] = self.subarray.upper() parameters['DATE-OBS'] = datetime.date.today().isoformat() current_date = datetime.datetime.now() parameters['TIME-OBS'] = current_date.time().isoformat() return parameters
[docs] def make_histogram(self, data): """Creates a histogram of the input data and returns the bin centers and the counts in each bin. Parameters ---------- data : numpy.ndarray The input data. Returns ------- counts : numpy.ndarray The counts in each histogram bin. bin_centers : numpy.ndarray The histogram bin centers. """ # Calculate the histogram range as that within 5 sigma from the mean data = data.flatten() clipped = sigma_clip(data, sigma=3.0, maxiters=5) mean, stddev = np.nanmean(clipped), np.nanstd(clipped) lower_thresh, upper_thresh = mean - 4 * stddev, mean + 4 * stddev # Some images, e.g. readnoise images, will never have values below zero if (lower_thresh < 0) & (len(data[data < 0]) == 0): lower_thresh = 0.0 # Make the histogram counts, bin_edges = np.histogram(data, bins='auto', range=(lower_thresh, upper_thresh)) bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 return counts, bin_centers
[docs] def make_readnoise_image(self, data): """Calculates the readnoise for the given input dark current ramp. Parameters ---------- data : numpy.ndarray The input ramp data. The data shape is assumed to be a 4D array in DMS format (integration, group, y, x). Returns ------- readnoise : numpy.ndarray The 2D readnoise image. """ logging.info('\tCreating readnoise image') num_ints, num_groups, num_y, num_x = data.shape # Calculate the readnoise in slices to avoid memory issues on large files. slice_width = 20 cols_idx = np.array(np.arange(num_x)[::slice_width]) readnoise = np.zeros((num_y, num_x)) for idx in cols_idx: # Create a stack of correlated double sampling (CDS) images using input # ramp data, combining multiple integrations if necessary. for integration in range(num_ints): if num_groups % 2 == 0: cds = data[integration, 1::2, :, idx:idx + slice_width] - data[integration, ::2, :, idx:idx + slice_width] else: # Omit the last group if the number of groups is odd cds = data[integration, 1::2, :, idx:idx + slice_width] - data[integration, ::2, :, idx:idx + slice_width][:-1] if integration == 0: cds_stack = cds else: cds_stack = np.concatenate((cds_stack, cds), axis=0) # Calculate readnoise by taking the clipped stddev through CDS stack clipped = sigma_clip(cds_stack, sigma=3.0, maxiters=3, axis=0) readnoise_slice = np.ma.std(clipped, axis=0) # Add the readnoise in this slice to the full readnoise image readnoise[:, idx:idx + slice_width] = readnoise_slice return readnoise
[docs] def process(self, file_list): """The main method for processing darks. See module docstrings for further details. Parameters ---------- file_list : list List of filenames (including full paths) to the dark current files. """ files_to_calibrate = [] for file in file_list: processed_file = file.replace("uncal", "refpix") if not os.path.isfile(processed_file): files_to_calibrate.append(file) # Run the files through the necessary pipeline steps outputs = run_parallel_pipeline(files_to_calibrate, "uncal", "refpix", self.instrument) for filename in file_list: logging.info('\tWorking on file: {}'.format(filename)) # Get relevant header information for this file self.get_metadata(filename) if filename in outputs: processed_file = outputs[filename] else: refpix_file = filename.replace("uncal", "refpix") if os.path.isfile(refpix_file): processed_file = refpix_file else: # Processed file not available logging.warning("Calibrated file {} not found".format(refpix_file)) logging.warning("Skipping file {}".format(filename)) continue # Find amplifier boundaries so per-amp statistics can be calculated _, amp_bounds = instrument_properties.amplifier_info(processed_file, omit_reference_pixels=True) logging.info('\tAmplifier boundaries: {}'.format(amp_bounds)) # Get the ramp data; remove first 5 groups and last group for MIRI to avoid reset/rscd effects cal_data = fits.getdata(processed_file, 'SCI', uint=False) if self.instrument == 'MIRI': cal_data = cal_data[:, 5:-1, :, :] # Make the readnoise image readnoise_outfile = os.path.join(self.output_data_dir, os.path.basename(processed_file.replace('.fits', '_readnoise.fits'))) readnoise = self.make_readnoise_image(cal_data) # fits.writeto(readnoise_outfile, readnoise, overwrite=True) # logging.info('\tReadnoise image saved to {}'.format(readnoise_outfile)) # Calculate the full image readnoise stats clipped = sigma_clip(readnoise, sigma=3.0, maxiters=5) full_image_mean, full_image_stddev = np.nanmean(clipped), np.nanstd(clipped) full_image_n, full_image_bin_centers = self.make_histogram(readnoise) logging.info('\tReadnoise image stats: {:.5f} +/- {:.5f}'.format(full_image_mean, full_image_stddev)) # Calculate readnoise stats in each amp separately amp_stats = self.get_amp_stats(readnoise, amp_bounds) logging.info('\tReadnoise image stats by amp: {}'.format(amp_stats)) # Get the current JWST Readnoise Reference File data parameters = self.make_crds_parameter_dict() try: reffile_mapping = crds.getreferences(parameters, reftypes=['readnoise']) readnoise_file = reffile_mapping['readnoise'] logging.info('\tPipeline readnoise reffile is {}'.format(readnoise_file)) pipeline_readnoise = fits.getdata(readnoise_file) except Exception as e: logging.warning('\tError retrieving pipeline readnoise reffile - assuming all zeros.') logging.warning('\tError {} was raised'.format(e)) pipeline_readnoise = np.zeros(readnoise.shape) # Find the difference between the current readnoise image and the pipeline readnoise reffile, and record image stats. # Sometimes, the pipeline readnoise reffile needs to be cutout to match the subarray. if readnoise.shape != pipeline_readnoise.shape: try: p_substrt1, p_substrt2 = fits.getheader(readnoise_file)['SUBSTRT1'], fits.getheader(readnoise_file)['SUBSTRT2'] except KeyError: p_substrt1, p_substrt2 = 1, 1 x1, y1 = self.substrt1 - p_substrt1, self.substrt2 - p_substrt2 x2, y2 = x1 + self.subsize1, y1 + self.subsize2 pipeline_readnoise = pipeline_readnoise[y1:y2, x1:x2] if pipeline_readnoise.shape != readnoise.shape: logging.warning('\tError cutting out pipeline readnoise - assuming all zeros.') pipeline_readnoise = np.zeros(readnoise.shape) readnoise_diff = readnoise - pipeline_readnoise clipped = sigma_clip(readnoise_diff, sigma=3.0, maxiters=5) diff_image_mean, diff_image_stddev = np.nanmean(clipped), np.nanstd(clipped) diff_image_n, diff_image_bin_centers = self.make_histogram(readnoise_diff) logging.info('\tReadnoise difference image stats: {:.5f} +/- {:.5f}'.format(diff_image_mean, diff_image_stddev)) # Save a png of the readnoise difference image for visual inspection logging.info('\tCreating png of readnoise difference image') readnoise_diff_png = self.image_to_png(readnoise_diff, outname=os.path.basename(readnoise_outfile).replace('.fits', '_diff')) # Construct new entry for this file for the readnoise database table. # Can't insert values with numpy.float32 datatypes into database # so need to change the datatypes of these values. # # Store files as file name only (file path will be built at retrieval based # on runtime configuration) readnoise_db_entry = {'uncal_filename': os.path.basename(filename), 'aperture': self.aperture, 'detector': self.detector, 'subarray': self.subarray, 'read_pattern': self.read_pattern, 'nints': self.nints, 'ngroups': self.ngroups, 'expstart': self.expstart, 'readnoise_filename': os.path.basename(readnoise_outfile), 'full_image_mean': float(full_image_mean), 'full_image_stddev': float(full_image_stddev), 'full_image_n': list(full_image_n.astype(float)), 'full_image_bin_centers': list(full_image_bin_centers.astype(float)), 'readnoise_diff_image': os.path.basename(readnoise_diff_png), 'diff_image_mean': float(diff_image_mean), 'diff_image_stddev': float(diff_image_stddev), 'diff_image_n': list(diff_image_n.astype(float)), 'diff_image_bin_centers': list(diff_image_bin_centers.astype(float)), 'entry_date': datetime.datetime.now() } for key in amp_stats.keys(): if isinstance(amp_stats[key], (int, float)): readnoise_db_entry[key] = float(amp_stats[key]) else: readnoise_db_entry[key] = list(amp_stats[key].astype(float)) # Add this new entry to the readnoise database table entry = self.stats_table(**readnoise_db_entry) entry.save() logging.info('\tNew entry added to readnoise database table') # Remove the raw and calibrated files to save memory space os.remove(filename) os.remove(processed_file)
@log_fail @log_info @only_one(key='readnoise_monitor') def run(self): """The main method. See module docstrings for further details. """ logging.info('Begin logging for readnoise_monitor\n') # Get the output directory and setup a directory to store the data self.output_dir = os.path.join(get_config()['outputs'], 'readnoise_monitor') ensure_dir_exists(os.path.join(self.output_dir, 'data')) self.working_dir = os.path.join(get_config()['working'], 'readnoise_monitor') ensure_dir_exists(os.path.join(self.working_dir, 'data')) # Use the current time as the end time for MAST query self.query_end = Time.now().mjd # Loop over all instruments for instrument in JWST_INSTRUMENT_NAMES: self.instrument = instrument # Identify which database tables to use self.identify_tables() # Get a list of all possible apertures for this instrument siaf = Siaf(self.instrument) possible_apertures = list(siaf.apertures) for aperture in possible_apertures: logging.info('\nWorking on aperture {} in {}'.format(aperture, instrument)) self.aperture = aperture # Locate the record of the most recent MAST search; use this time # (plus a buffer to catch any missing files from the previous # run) as the start time in the new MAST search. most_recent_search = self.most_recent_search() self.query_start = most_recent_search - 70 # Query MAST for new dark files for this instrument/aperture logging.info('\tQuery times: {} {}'.format(self.query_start, self.query_end)) new_entries = monitor_utils.mast_query_darks(instrument, aperture, self.query_start, self.query_end) # Exclude ASIC tuning data len_new_darks = len(new_entries) new_entries = monitor_utils.exclude_asic_tuning(new_entries) len_no_asic = len(new_entries) num_asic = len_new_darks - len_no_asic logging.info("\tFiltering out ASIC tuning files removed {} dark files.".format(num_asic)) logging.info('\tAperture: {}, new entries: {}'.format(self.aperture, len(new_entries))) # Set up a directory to store the data for this aperture self.output_data_dir = os.path.join(self.output_dir, 'data/{}_{}'.format(self.instrument.lower(), self.aperture.lower())) if len(new_entries) > 0: ensure_dir_exists(self.output_data_dir) self.working_data_dir = os.path.join(self.working_dir, 'data/{}_{}'.format(self.instrument.lower(), self.aperture.lower())) if len(new_entries) > 0: ensure_dir_exists(self.working_data_dir) # Get any new files to process new_files = [] checked_files = [] for file_entry in new_entries: output_filename = os.path.join(self.working_data_dir, file_entry['filename'].replace('_dark', '_uncal')) # Sometimes both the dark and uncal name of a file is picked up in new_entries if output_filename in checked_files: logging.info('\t{} already checked in this run.'.format(output_filename)) continue checked_files.append(output_filename) # Dont process files that already exist in the readnoise stats database file_exists = self.file_exists_in_database(output_filename) if file_exists: logging.info('\t{} already exists in the readnoise database table.'.format(output_filename)) continue # Save any new uncal files with enough groups in the output directory; some dont exist in JWQL filesystem try: filename = filesystem_path(file_entry['filename']) uncal_filename = filename.replace('_dark', '_uncal') if not os.path.isfile(uncal_filename): logging.info('\t{} does not exist in JWQL filesystem, even though {} does'.format(uncal_filename, filename)) else: num_groups = fits.getheader(uncal_filename)['NGROUPS'] num_ints = fits.getheader(uncal_filename)['NINTS'] if instrument == 'miri': total_cds_frames = int((num_groups - 6) / 2) * num_ints else: total_cds_frames = int(num_groups / 2) * num_ints # Skip processing if the file doesnt have enough groups/ints to calculate the readnoise. # MIRI needs extra since they omit the first five and last group before calculating the readnoise. if total_cds_frames >= 10: shutil.copy(uncal_filename, self.working_data_dir) logging.info('\tCopied {} to {}'.format(uncal_filename, output_filename)) set_permissions(output_filename) new_files.append(output_filename) else: logging.info('\tNot enough groups/ints to calculate readnoise in {}'.format(uncal_filename)) except FileNotFoundError: logging.info('\t{} does not exist in JWQL filesystem'.format(file_entry['filename'])) # Run the readnoise monitor on any new files if len(new_files) > 0: self.process(new_files) monitor_run = True else: logging.info('\tReadnoise monitor skipped. {} new dark files for {}, {}.'.format(len(new_files), instrument, aperture)) monitor_run = False # Update the query history new_entry = {'instrument': instrument, 'aperture': aperture, 'start_time_mjd': self.query_start, 'end_time_mjd': self.query_end, 'entries_found': len(new_entries), 'files_found': len(new_files), 'run_monitor': monitor_run, 'entry_date': datetime.datetime.now()} stats_entry = self.query_table(**new_entry) stats_entry.save() logging.info('\tUpdated the query history table') logging.info('Readnoise Monitor completed successfully.')
if __name__ == '__main__': module = os.path.basename(__file__).strip('.py') start_time, log_file = monitor_utils.initialize_instrument_monitor(module) monitor = Readnoise() monitor.run() monitor_utils.update_monitor_table(module, start_time, log_file)