Source code for gcpy.examples.diagnostics.compare_diags

#!/usr/bin/env python
r"""
Example script that can compare diagnostics from two different netCDF
collections.  Similar to compute_diagnostics.ipynb, but can be used
without having to open a Jupyter notebook.  The parameters for the
configuration are specified in a YAML file whose name is passed
as an argument.

Examples
--------

#. Copy the :file:`compare_diags.yml` file to a different folder
#. In your copy of :file:`compare_diags.yml`, edit labels, file paths,
   etc. for the Ref and Dev versions.  Also select the types
   of output that you desire.
#. Run the following commands:

.. code-block:: console

   $ conda activate gcpy_env
   (gcpy_env) $ python -m gcpy.examples.diagnostics.compare_diags \
                /path/to/yaml/file
"""
import os
import sys
import warnings
import numpy as np
from gcpy.util import add_missing_variables, compare_varnames, \
    dataset_reader, read_config_file, rename_and_flip_gchp_rst_vars
from gcpy.constants import SKIP_THESE_VARS
from gcpy.plot.compare_single_level import compare_single_level
from gcpy.plot.compare_zonal_mean import compare_zonal_mean

# Tell matplotlib not to look for an X-window
os.environ["QT_QPA_PLATFORM"] = "offscreen"

# Suppress harmless run-time warnings (mostly about underflow in division)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# ======================================================================
# Methods
# ======================================================================


[docs]
def create_dirs(config):
    """
    Create directories for plots and weights if they do not exist.

    Parameters
    ----------
    config : dict
        Configuration information read from a YAML file.
    """

    # Extract fields from the config object
    do_level_plot = config["options"]["level_plot"]["create_plot"]
    do_zonal_mean = config["options"]["zonal_mean"]["create_plot"]
    plotsdir = config["paths"]["plots_dir"]
    weightsdir = config["paths"]["weights_dir"]

    # Create plotsdir and weightsdir if necessary
    if do_level_plot or do_zonal_mean:
        if not os.path.isdir(plotsdir):
            os.mkdir(plotsdir)
            if not os.path.isdir(weightsdir):
                os.mkdir(weightsdir)




[docs]
def read_data(config):
    """
    Read data from the Ref and Dev datasets.

    Parameters
    ----------
    config : dict
        Configuration information read from a YAML file.

    Returns
    -------
    data : dict
        Contains Ref and Dev data as xarray Dataset fields.
    """

    # Root data path
    rootdir = config["paths"]["main_dir"]

    # Define paths to Ref & Dev files
    ref_file = os.path.join(
        rootdir,
        config["data"]["ref"]["dir"],
        config["data"]["ref"]["subdir"],
        config["data"]["ref"]["file"]
    )
    dev_file = os.path.join(
        rootdir,
        config["data"]["dev"]["dir"],
        config["data"]["dev"]["subdir"],
        config["data"]["dev"]["file"]
    )

    # Function to read the data
    reader = dataset_reader(
        multi_files=False,
        verbose=False
    )

    # Read Ref data
    try:
        refdata = reader(
            ref_file,
            drop_variables=SKIP_THESE_VARS
        )
    except FileNotFoundError as exc:
        msg = "Error reading " + ref_file
        raise FileNotFoundError(msg) from exc

    # Read Dev data
    try:
        devdata = reader(
            dev_file,
            drop_variables=SKIP_THESE_VARS
        )
    except FileNotFoundError as exc:
        msg = "Error reading " + dev_file
        raise FileNotFoundError(msg) from exc

    # If the data is from a GCHP restart file, rename variables and
    # flip levels to match the GEOS-Chem Classic naming and level
    # conventions.  Otherwise no changes will be made.
    refdata = rename_and_flip_gchp_rst_vars(refdata)
    devdata = rename_and_flip_gchp_rst_vars(devdata)

    # Define dictionary for return
    data = {
        "ref": refdata,
        "dev": devdata
    }

    return data




[docs]
def print_totals_and_diffs(config, refdata, devdata, varlist):
    """
    Prints totals and differences of data.  This is a quick way
    to see if there are nonzero differences between Ref and Dev
    datasets.

    Parameters
    ----------
    config : dict
        Configuration information read from a YAML file.
    refdata : xarray.Dataset
        Contains data from the Ref model run.
    devdata : xarray.Dataset
        Contains data from the Dev model run.
    varlist : list of str
        Contains a list of data variables.
    """

    # Get quantities from the config object
    filename = config["options"]["totals_and_diffs"]["filename"]
    do_screen = config["options"]["totals_and_diffs"]["print_to_screen"]

    # Determine if we will print percent or absolute differences
    do_percent_diff = False
    if any(x in config["options"]["totals_and_diffs"]["diff_type"] \
           for x in ["percent", "pctdiff", "%"]):
        do_percent_diff = True

    # Determine if we will print to a file
    do_file = len(filename) > 0
    if not do_file and not do_screen:
        print("...Printing to screen and file are both disabled!")
        return

    # Open a text file for output if a filename is specified
    if do_file:
        pathname = os.path.join(
            config["paths"]["plots_dir"],
            filename
        )
        ofile = open(pathname, 'w', encoding="UTF-8")


    # Percent threshold for reporting differences
    threshold = config["options"]["totals_and_diffs"]["small_diff_threshold"]

    # If we are skipping variables with small differences,
    # then alert the user (print to screen & file)
    if config["options"]["totals_and_diffs"]["skip_small_diffs"]:
        diff_label = f"|absolute difference| > {threshold}"
        if do_percent_diff:
            diff_label = f"|percent difference| > {threshold} %"
        line = f"... Only showing variables with {diff_label}"
        if do_file:
            print(line, file=ofile)
        else:
            print(line)

    # Print top header
    diff_label = "Dev - Ref"
    if do_percent_diff:
        diff_label = "(Dev-Ref)/Ref % diff"
    line = "{} Ref={} Dev={} {}".format(
        "Variable".ljust(22),
        config["data"]["ref"]["label"].ljust(20),
        config["data"]["dev"]["label"].ljust(20),
        diff_label
    )
    if do_file:
        print(line, file=ofile)
    if do_screen:
        print(line)

    # Always print nonzero differences, but only print zero differences
    # if the configuration option "skip_zero_diffs" is False.
    for var in varlist:

        # Absolute difference
        refsum = np.sum(refdata[var].values)
        devsum = np.sum(devdata[var].values)

        # Absolute difference
        absdiff = np.sum(devdata[var].values - refdata[var].values)

        # Compute percent difference if needed
        # otherwise we'll use the absolute difference for plotting
        diff = absdiff
        if do_percent_diff:
            diff = 0.0
            if np.abs(refsum) > 0.0:
                diff = (absdiff / refsum) * 100.0

        # Line to be printed
        line = "{} : {} | {} | {} ".format(
            var.ljust(20),
            str(refsum).ljust(22),
            str(devsum).ljust(22),
            diff
        )

        # Skip small values
        if np.abs(diff) > threshold:
            if do_file:
                print(line, file=ofile)
            if do_screen:
                print(line)

        # Or don't skip any values
        else:
            if not config["options"]["totals_and_diffs"]["skip_small_diffs"]:
                if do_file:
                    print(line, file=ofile)
                if do_screen:
                    print(line)

    # Close file
    if do_file:
        ofile.close()




[docs]
def compare_data(config, data):
    """
    Compares data from two different xarray datasets.

    Parameters
    ----------
    config : dict
        Configuration information read from a YAML file.
    data : dict
        Contains Ref and Dev data as xarray Dataset fields.
    """

    # Get xarray datasets from the data object
    refdata = data["ref"]
    devdata = data["dev"]

    # For each variable in refdata, but not non devdata, add an
    # array of NaN values to refdata. Ditto for devdata.  This will
    # allow us to show that the variable is missing in either
    # refdata or devdata.
    [refdata, devdata] = add_missing_variables(refdata, devdata)

    # Get the list of common variable names
    verbose = config["options"]["verbose"]
    quiet = not verbose
    vardict = compare_varnames(refdata, devdata, quiet=quiet)
    varlist_level = vardict["commonvars2D"] + vardict["commonvars3D"]
    varlist_zonal = vardict["commonvars3D"]

    # Restrict variables to those containing a given substring
    restrict_vars = config["options"]["restrict_vars"]
    if len(restrict_vars) > 0:
        varlist_level = [v for v in varlist_level if v in restrict_vars]
        varlist_zonal = [v for v in varlist_zonal if v in restrict_vars]

    # Determine if we need to flip levels in the vertical
    flip_ref = False
    flip_dev = False
    if "flip_levels" in config["data"]["ref"]:
        flip_ref = config["data"]["ref"]["flip_levels"]
    if "flip_levels" in config["data"]["dev"]:
        flip_dev = config["data"]["dev"]["flip_levels"]        
        
    # ==================================================================
    # Generate the single level comparison plot
    # ==================================================================
    if config["options"]["level_plot"]["create_plot"]:
        print('... Creating single-level plots')
        pdfname = os.path.join(
            config["paths"]["plots_dir"],
            config["options"]["level_plot"]["pdfname"]
        )
        compare_single_level(
            refdata,
            config["data"]["ref"]["label"],
            devdata,
            config["data"]["dev"]["label"],
            flip_ref=flip_ref,
            flip_dev=flip_dev,
            ilev=config["options"]["level_plot"]["level_to_plot"],
            varlist=varlist_level,
            pdfname=pdfname,
            weightsdir=config["paths"]["weights_dir"],
            n_job=config["options"]["n_cores"],
            verbose=verbose
        )

    # ==================================================================
    # Generate the zonal mean comparison plot
    # ==================================================================
    if config["options"]["zonal_mean"]["create_plot"]:
        print('... Creating zonal mean plots')
        pdfname = os.path.join(
            config["paths"]["plots_dir"],
            config["options"]["zonal_mean"]["pdfname"]
        )
        compare_zonal_mean(
            refdata,
            config["data"]["ref"]["label"],
            devdata,
            config["data"]["dev"]["label"],
            flip_ref=flip_ref,
            flip_dev=flip_dev,
            varlist=varlist_zonal,
            pdfname=pdfname,
            weightsdir=config["paths"]["weights_dir"],
            n_job=config["options"]["n_cores"],
            verbose=verbose
        )

    # ==================================================================
    # Print totals for each quantity
    # ==================================================================
    if config["options"]["totals_and_diffs"]["create_table"]:
        print('... Printing totals and differences')
        print_totals_and_diffs(
            config,
            refdata,
            devdata,
            varlist_level
        )




[docs]
def main(argv):
    """
    Main program, reads data and calls compare_data to make plots.
    """

    # Take the config file as the 2nd argument (or use a default)
    # NOTE: sys.argv[0] is always the program name!
    if len(argv) == 2:
        config_file = argv[1]
    else:
        config_file = "compare_diags.yml"

    # Get paths and options from the configuration file
    config = read_config_file(config_file)

    # Create dirs for plots & weights (if necessary)
    create_dirs(config)

    # Read and compare data
    compare_data(config, read_data(config))



# Only execute when we run as a standalone script
if __name__ == "__main__":
    main(sys.argv)