#!/usr/bin/env python3
"""
Specific utilities for creating plots from GEOS-Chem benchmark simulations.
"""
import os
import warnings
import itertools
import gc
import numpy as np
import pandas as pd
import xarray as xr
import sparselt.esmf
import sparselt.xr
from joblib import Parallel, delayed
from tabulate import tabulate
from gcpy.regrid import create_regridders
from gcpy.grid import get_troposphere_mask
from gcpy.util import \
add_bookmarks_to_pdf, add_missing_variables, add_nested_bookmarks_to_pdf, \
array_equals, compare_varnames, create_blank_dataarray, dataset_reader, \
dataset_mean, divide_dataset_by_dataarray, get_area_from_dataset, \
get_emissions_varnames, get_filepath, get_variables_from_dataset, \
insert_text_into_file, make_directory, print_totals, read_config_file, \
read_species_metadata, rename_and_flip_gchp_rst_vars, \
replace_whitespace, unique_values, verify_variable_type, wrap_text
from gcpy.units import convert_units
from gcpy.constants import \
COL_WIDTH, ENCODING, MW_AIR_g, SKIP_THESE_VARS, TABLE_WIDTH
from gcpy.plot.compare_single_level import compare_single_level
from gcpy.plot.compare_zonal_mean import compare_zonal_mean
from gcpy.benchmark.modules.benchmark_utils import \
AOD_SPC, EMISSION_SPC, EMISSION_INV, add_lumped_species_to_dataset, \
archive_lumped_species_definitions, get_species_categories, \
archive_species_categories, rename_speciesconc_to_speciesconcvv
# Suppress numpy divide by zero warnings to prevent output spam
np.seterr(divide="ignore", invalid="ignore")
[docs]
def create_total_emissions_table(
refdata,
refstr,
devdata,
devstr,
species,
spcdb_files,
outfilename,
ref_interval=[2678400.0],
dev_interval=[2678400.0],
template="Emis{}_",
refmetdata=None,
devmetdata=None,
):
"""
Creates a table of emissions totals (by sector and by inventory)
for a list of species in contained in two data sets. The data sets,
which typically represent output from two different model versions,
are usually contained in netCDF data files.
Parameters
----------
refdata : xr.Dataset
The first data set to be compared (aka "Reference" or "Ref").
refstr : str
A string that can be used to identify refdata
(e.g. a model version number or other identifier).
devdata : xr.Dataset
The second data set to be compared (aka "Development" or "Dev").
devstr : str
A string that can be used to identify the data set specified
by devfile (e.g. a model version number or other identifier).
species : dict
Dictionary containing the name of each species and the target
unit that emissions will be converted to. The format of
species is as follows:
{ species_name: target_unit", etc. }
where "species_name" and "target_unit" are strs.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
outfilename : str
Name of the text file which will contain the table of
emissions totals.
ref_interval : float, optional
The length of the ref data interval in seconds. By default, interval
is set to the number of seconds in a 31-day month (86400 * 31),
which corresponds to typical benchmark simulation output.
Default value: [2678400.0]
dev_interval : float, optional
The length of the dev data interval in seconds. By default, interval
is set to the number of seconds in a 31-day month (86400 * 31),
which corresponds to typical benchmark simulation output.
Default value: [2678400.0]
template : str, optional
Template for the diagnostic names that are contained both
"Reference" and "Development" data sets. If not specified,
template will be set to "Emis{}", where {} will be replaced
by the species name.
Default value: "Emis{}_"
ref_area_varname : str, optional
Name of the variable containing the grid box surface areas
(in m2) in the ref dataset.
Default value: 'AREA'
dev_area_varname : str, optional
Name of the variable containing the grid box surface areas
(in m2) in the dev dataset.
Default value: 'AREA'
refmetdata : xr.Dataset, optional
Dataset containing ref meteorology and area.
Default value: None
devmetdata : xr.Dataset, optional
Dataset containing dev meteorology and area.
Default value: None
Notes
-----
This method is mainly intended for model benchmarking purposes,
rather than as a general-purpose tool.
Species metadata (such as molecular weights) are read from a
YAML file called "species_database.yml".
"""
# ==================================================================
# Initialization
# ==================================================================
verify_variable_type(refdata, xr.Dataset)
verify_variable_type(refstr, str)
verify_variable_type(devdata, xr.Dataset)
verify_variable_type(devstr, str)
verify_variable_type(species, dict)
verify_variable_type(spcdb_files, list)
verify_variable_type(outfilename, str)
# Get ref area [m2]
if "AREA" in refdata.data_vars.keys():
refarea = refdata["AREA"]
elif refmetdata is not None:
refarea = refmetdata["Met_AREAM2"]
else:
msg = "AREA variable is not in the ref Dataset and " + \
"optional dataset containing Met_AREAM2 is not passed!"
raise ValueError(msg)
# Get dev area [m2]
if "AREA" in devdata.data_vars.keys():
devarea = devdata["AREA"]
elif devmetdata is not None:
devarea = devmetdata["Met_AREAM2"]
else:
msg = "AREA variable is not in the dev Dataset and optional " + \
"dataset containing Met_AREAM2 is not passed!"
raise ValueError(msg)
# Read the species database files in the Ref & Dev rundirs,
# and return a dict for each containing the species metadata.
ref_metadata, dev_metadata = read_species_metadata(
spcdb_files,
quiet=True
)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ==================================================================
# Get the list of emission variables for which we will print totals
# ==================================================================
# Find all common variables between the two datasets
# and get the lists of variables only in Ref and only in Dev,
vardict = compare_varnames(refdata, devdata, quiet=True)
cvars = vardict["commonvars"]
refonly = vardict["refonly"]
devonly = vardict["devonly"]
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refdata, devdata] = add_missing_variables(refdata, devdata)
# =================================================================
# Open the file for output
# =================================================================
# Create file
try:
f = open(outfilename, "w", encoding=ENCODING)
except (IOError, OSError, FileNotFoundError) as e:
raise e(f"Could not open {outfilename} for writing!") from e
# Write a placeholder to the file that denotes where
# the list of species with differences will be written
placeholder = "@%% insert diff status here %%@"
print(f"{placeholder}\n\n", file=f)
# Define a list for differences
diff_list = []
# =================================================================
# Loop through all of the species are in species_dict
# =================================================================
for species_name, target_units in species.items():
# Get a list of emission variable names for each species
diagnostic_template = template.replace("{}", species_name)
varnames = get_emissions_varnames(cvars, diagnostic_template)
# Also add variables that might be in either Ref or Dev
# but not the other. This will allow us to print totals
# for all species (and print NaN for the missing ones).
if len(refonly) > 0:
matching = [v for v in refonly if diagnostic_template in v]
varnames = varnames + matching
if len(devonly) > 0:
matching = [v for v in devonly if diagnostic_template in v]
varnames = varnames + matching
# Sort the list again to account for new variables added above
varnames.sort()
# If no emissions are found, then skip to next species
if len(varnames) == 0:
msg = f"No emissions found for {species_name} ... skippping"
print(msg)
continue
# Check if there is a total emissions variable in the list
vartot = [v for v in varnames if "_TOTAL" in v.upper()]
# Push the total variable to the last list element
# so that it will be printed last of all
if len(vartot) == 1:
varnames.append(varnames.pop(varnames.index(vartot[0])))
# Title strings
if "Inv" in template:
print(f"Computing inventory totals for {species_name}")
title0 = f"for inventory {species_name}"
title1 = f"### Emissions totals {title0} [Tg]"
else:
print(f"Computing emissions totals for {species_name}")
title0 = f"for species {species_name}"
title1 = f"### Emissions totals {title0} [Tg]"
title2 = f"### Ref = {refstr}"
title3 = f"### Dev = {devstr}"
# Print header to file
print("#" * TABLE_WIDTH, file=f)
print(f"{title1 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title2 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title3 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print("#" * TABLE_WIDTH, file=f)
print(f"{'' : <{COL_WIDTH-1}}{'Ref' : >{COL_WIDTH}}{'Dev' : >{COL_WIDTH}}{'Dev - Ref' : >{COL_WIDTH}}{'% diff' : >{COL_WIDTH}} {'diffs'}", file=f)
# =============================================================
# Loop over all emissions variables corresponding to this
# species and print their totals in Ref and Dev to the file.
# =============================================================
for v in varnames:
if "Inv" in template:
spc_name = v.split("_")[1]
else:
spc_name = species_name
# Get metadata for the given species
ref_species_metadata = ref_metadata.get(spc_name)
dev_species_metadata = dev_metadata.get(spc_name)
if ref_species_metadata is None and dev_species_metadata is None:
print(f"No metadata found for {spc_name} ... skipping")
continue
# Convert units of Ref and Dev and save to numpy ndarray objects
# (or set to NaN if the variable is not found in Ref or Dev)
if v in refonly and v not in devonly:
# Convert units of Ref
refarray = convert_units(
refdata[v],
spc_name,
ref_species_metadata,
target_units,
interval=ref_interval,
area_m2=refarea,
)
# Set Dev to NaN (missing values) everywhere
devarray = create_blank_dataarray(
name=refdata[v].name,
sizes=devdata.sizes,
coords=devdata.coords,
attrs=refdata[v].attrs,
)
elif v in devonly and v not in refonly:
# Convert units of Dev
devarray = convert_units(
devdata[v],
spc_name,
dev_species_metadata,
target_units,
interval=dev_interval,
area_m2=devarea,
)
# Set Ref to NaN (missing values) everywhere
refarray = create_blank_dataarray(
name=devdata[v].name,
sizes=refdata.sizes,
coords=refdata.coords,
attrs=devdata[v].attrs,
)
else:
# Convert units of both Ref and Dev
refarray = convert_units(
refdata[v],
spc_name,
ref_species_metadata,
target_units,
interval=ref_interval,
area_m2=refarea,
)
devarray = convert_units(
devdata[v],
spc_name,
dev_species_metadata,
target_units,
interval=dev_interval,
area_m2=devarea,
)
# ==========================================================
# Print emission totals for Ref and Dev
# ==========================================================
print_totals(
refarray,
devarray,
f,
diff_list
)
# Add newlines before going to the next species
print(file=f)
print(file=f)
# =================================================================
# Cleanup and quit
# =================================================================
# Close file
f.close()
# Reopen file and replace placeholder with list of diffs
insert_text_into_file(
filename=outfilename,
search_text=placeholder,
replace_text=diff_list_to_text(
refstr,
devstr,
diff_list),
width=TABLE_WIDTH
)
[docs]
def create_global_mass_table(
refdata,
refstr,
devdata,
devstr,
varlist,
met_and_masks,
spcdb_files,
ref_hdr_label="",
dev_hdr_label="",
trop_only=False,
outfilename="GlobalMass_TropStrat.txt",
verbose=False,
):
"""
Creates a table of global masses for a list of species in contained in
two data sets. The data sets, which typically represent output from two
different model versions, are usually contained in netCDF data files.
Parameters
----------
refdata : xr.Dataset
The first data set to be compared (aka "Reference").
refstr : str
A string that can be used to identify refdata
(e.g. a model version number or other identifier).
devdata : xr.Dataset
The second data set to be compared (aka "Development").
devstr : str
A string that can be used to identify the data set specified
by devfile (e.g. a model version number or other identifier).
varlist : list of str
List of species concentration variable names to include
in the list of global totals.
met_and_masks : dict of xr.DataArray
Dictionary containing the meteorological variables and
masks for the Ref and Dev datasets.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
ref_hdr_label : str, optional
Label for Ref, placed after refstr in the file header.
Default value: ""
dev_hdr_label : str, optional
Label for Dev, placed after devstr in the file header.
Default value: ""
trop_only : bool, optional
Set this switch to True if you wish to print totals
only for the troposphere.
Default value: False (i.e. print whole-atmosphere totals).
outfilename : str, optional
Name of the text file which will contain the table of
emissions totals.
Default value: "GlobalMass_TropStrat.txt"
verbose : bool, optional
Set this switch to True if you wish to print out extra
informational messages.
Default value: False
Notes
-----
This method is mainly intended for model benchmarking purposes,
rather than as a general-purpose tool.
Species metadata (such as molecular weights) are read from a
YAML file called "species_database.yml".
"""
# ==================================================================
# Initialization
# ==================================================================
verify_variable_type(refdata, xr.Dataset)
verify_variable_type(devdata, xr.Dataset)
# Make sure required arguments are passed
if varlist is None:
raise ValueError('The "varlist" argument was not passed!')
if met_and_masks is None:
raise ValueError('The "met_and_masks" argument was not passed!')
# Read the species database files in the Ref & Dev rundirs, and
# return a dict containing metadata for each.
ref_metadata, dev_metadata = read_species_metadata(
spcdb_files,
quiet=True
)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ==================================================================
# Open file for output
# ==================================================================
# Create file
try:
f = open(outfilename, "w", encoding=ENCODING)
except (IOError, OSError, FileNotFoundError) as e:
raise e(f"Could not open {outfilename} for writing!") from e
# Define a list for differences
diff_list = []
# Title strings
title1 = f"### Global mass (Gg) (Trop + Strat)"
if trop_only:
title1 = f"### Global mass (Gg) (Trop only)"
title2 = f"### Ref = {refstr} {ref_hdr_label}"
title3 = f"### Dev = {devstr} {dev_hdr_label}"
# Write a placeholder to the file that denotes where
# the list of species with differences will be written
placeholder = "@%% insert diff status here %%@"
# Print header to file
print("#" * TABLE_WIDTH, file=f)
print(f"{title1 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title2 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title3 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{placeholder}", file=f)
print("#" * TABLE_WIDTH, file=f)
# Column headers
print(f"{'' : <{COL_WIDTH-1}}{'Ref' : >{COL_WIDTH}}{'Dev' : >{COL_WIDTH}}{'Dev - Ref' : >{COL_WIDTH}}{'% diff' : >{COL_WIDTH}} {'diffs'}", file=f)
# ==================================================================
# Print global masses for all species
#
# NOTE: By this point, all secies will be in both Ref and Dev'
# because we have added them in the calling routine
# ==================================================================
for v in varlist:
# Get the species name
spc_name = v.split("_")[1]
# Get metadta for the given species
ref_species_metadata = ref_metadata.get(spc_name)
dev_species_metadata = dev_metadata.get(spc_name)
if ref_species_metadata is None and dev_species_metadata is None:
if verbose:
msg = f"No metadata found for {spc_name} ... skippping"
print(msg)
continue
# Specify target units
target_units = "Gg"
# ==============================================================
# Convert units of Ref and save to a DataArray
# (or skip if Ref contains NaNs everywhere)
# ==============================================================
refarray = refdata[v]
if not np.isnan(refdata[v].values).all():
refarray = convert_units(
refarray,
spc_name,
ref_species_metadata,
target_units,
area_m2=met_and_masks["Ref_Area"],
delta_p=met_and_masks["Ref_Delta_P"],
box_height=met_and_masks["Ref_BxHeight"],
)
# ==============================================================
# Convert units of Dev and save to a DataArray
# (or skip if Dev contains NaNs everywhere)
# ==============================================================
devarray = devdata[v]
if not np.isnan(devdata[v].values).all():
devarray = convert_units(
devarray,
spc_name,
dev_species_metadata,
target_units,
area_m2=met_and_masks["Dev_Area"],
delta_p=met_and_masks["Dev_Delta_P"],
box_height=met_and_masks["Dev_BxHeight"],
)
# ==============================================================
# Print global masses for Ref and Dev
# (we will mask out tropospheric boxes in print_totals)
# ==============================================================
if trop_only:
print_totals(
refarray,
devarray,
f,
diff_list,
masks=met_and_masks
)
else:
print_totals(
refarray,
devarray,
f,
diff_list
)
# ==================================================================
# Cleanup and quit
# ==================================================================
# Close file
f.close()
# Reopen file and replace placeholder text by diff_text
insert_text_into_file(
filename=outfilename,
search_text=placeholder,
replace_text=diff_list_to_text(
refstr,
devstr,
diff_list,
fancy_format=True
),
width=TABLE_WIDTH
)
[docs]
def create_mass_accumulation_table(
refdatastart,
refdataend,
refstr,
refperiodstr,
devdatastart,
devdataend,
devstr,
devperiodstr,
varlist,
met_and_masks,
label,
spcdb_files,
trop_only=False,
outfilename="GlobalMassAccum_TropStrat.txt",
verbose=False,
):
"""
Creates a table of global mass accumulation for a list of species in
two data sets. The data sets, which typically represent output from two
different model versions, are usually contained in netCDF data files.
Parameters
----------
refdatastart : xr.Dataset
The first data set to be compared (aka "Reference").
refdataend : xr.Dataset
The first data set to be compared (aka "Reference").
refstr : str
A string that can be used to identify refdata
(e.g. a model version number or other identifier).
refperiodstr : str
Ref simulation period start and end.
devdatastart : xr.Dataset
The second data set to be compared (aka "Development").
devdataend : xr.Dataset
The second data set to be compared (aka "Development").
devstr : str
A string that can be used to identify the data set specified
by devfile (e.g. a model version number or other identifier).
devperiodstr : str
Dev simulation period start and end.
varlist : list of str
List of species concentration variable names to include
in the list of global totals.
met_and_masks : dict of xr.DataArray
Dictionary containing the meteorological variables and
masks for the Ref and Dev datasets.
label : str
Label to go in the header string. Can be used to
pass the month & year.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
trop_only : bool, optional
Set this switch to True if you wish to print totals
only for the troposphere.
Default value: False (i.e. print whole-atmosphere totals).
outfilename : str, optional
Name of the text file which will contain the table of
emissions totals.
Default value: "GlobalMass_TropStrat.txt"
verbose : bool, optional
Set this switch to True if you wish to print out extra
informational messages.
Default value: False
Notes
-----
This method is mainly intended for model benchmarking purposes,
rather than as a general-purpose tool.
Species metadata (such as molecular weights) are read from a
YAML file called "species_database.yml".
"""
# ==================================================================
# Initialization
# ==================================================================
verify_variable_type(refdatastart, xr.Dataset)
verify_variable_type(refdataend, xr.Dataset)
verify_variable_type(devdatastart, xr.Dataset)
verify_variable_type(devdataend, xr.Dataset)
# Make sure required arguments are passed
if varlist is None:
raise ValueError('The "varlist" argument was not passed!')
if met_and_masks is None:
raise ValueError('The "met_and_masks" argument was not passed!')
# Read the species database files in the Ref & Dev rundirs, and
# return a dict containing metadata for each.
ref_metadata, dev_metadata = read_species_metadata(
spcdb_files,
quiet=True
)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ==================================================================
# Open file for output
# ==================================================================
# Create file
try:
f = open(outfilename, "w", encoding=ENCODING)
except (IOError, OSError, FileNotFoundError) as e:
raise e(f"Could not open {outfilename} for writing!") from e
# Define a list for differences
diff_list = []
# Title strings
title1 = f"### Global mass accumulation (Gg) {label} (Trop + Strat)"
if trop_only:
title1 = f"### Global mass accumulation (Gg) {label} (Trop only)"
title2 = "### Computed as change in instantaneous mass across period"
title3 = f"### Ref = {refstr}"
title4 = f"### Dev = {devstr}"
title5 = f"### Ref period: {refperiodstr}"
title6 = f"### Dev period: {devperiodstr}"
# Write a placeholder to the file that denotes where
# the list of species with differences will be written
placeholder = "@%% insert diff status here %%@"
# Print header to file
print("#" * TABLE_WIDTH, file=f)
print(f"{title1 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title2 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title3 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title4 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title5 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{title6 : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{'###' : <{TABLE_WIDTH-3}}{'###'}", file=f)
print(f"{placeholder}", file=f)
print("#" * TABLE_WIDTH, file=f)
# Column headers
print(f"{'' : <{COL_WIDTH-1}}{'Ref' : >{COL_WIDTH}}{'Dev' : >{COL_WIDTH}}{'Dev - Ref' : >{COL_WIDTH}}{'% diff' : >{COL_WIDTH}} {'diffs'}", file=f)
# ==================================================================
# Print global masses for all species
#
# NOTE: By this point, all secies will be in both Ref and Dev'
# because we have added them in the calling routine
# ==================================================================
for v in varlist:
# Get the species name
spc_name = v.split("_")[1]
# Get a list of metadata for the given species
ref_species_metadata = ref_metadata.get(spc_name)
dev_species_metadata = dev_metadata.get(spc_name)
if ref_species_metadata is None and dev_species_metadata is None:
if verbose:
msg = f"No properties found for {spc_name} ... skippping"
print(msg)
continue
# Specify target units
target_units = "Gg"
# ==============================================================
# Convert units of Ref and save to a DataArray
# (or skip if Ref contains NaNs everywhere)
# ==============================================================
refarrays = refdatastart[v]
if not np.isnan(refdatastart[v].values).all():
refarrays = convert_units(
refarrays,
spc_name,
ref_species_metadata,
target_units,
area_m2=met_and_masks["Refs_Area"],
delta_p=met_and_masks["Refs_Delta_P"],
box_height=met_and_masks["Refs_BxHeight"],
)
refarraye = refdataend[v]
if not np.isnan(refdataend[v].values).all():
refarraye = convert_units(
refarraye,
spc_name,
ref_species_metadata,
target_units,
area_m2=met_and_masks["Refe_Area"],
delta_p=met_and_masks["Refe_Delta_P"],
box_height=met_and_masks["Refe_BxHeight"],
)
refarray = refarrays
refarray.values = refarraye.values - refarrays.values
# ==============================================================
# Convert units of Dev and save to a DataArray
# (or skip if Dev contains NaNs everywhere)
# ==============================================================
devarrays = devdatastart[v]
if not np.isnan(devdatastart[v].values).all():
devarrays = convert_units(
devarrays,
spc_name,
dev_species_metadata,
target_units,
area_m2=met_and_masks["Devs_Area"],
delta_p=met_and_masks["Devs_Delta_P"],
box_height=met_and_masks["Devs_BxHeight"],
)
#print('devarrays: {}'.format(devarrays.values))
devarraye = devdataend[v]
if not np.isnan(devdataend[v].values).all():
devarraye = convert_units(
devarraye,
spc_name,
dev_species_metadata,
target_units,
area_m2=met_and_masks["Deve_Area"],
delta_p=met_and_masks["Deve_Delta_P"],
box_height=met_and_masks["Deve_BxHeight"],
)
devarray = devarrays
devarray.values = devarraye.values - devarrays.values
# ==============================================================
# Print global masses for Ref and Dev
# (we will mask out tropospheric boxes in print_totals)
# ==============================================================
# ewl: for now trop_only is always false for accumulation table
if trop_only:
print_totals(
refarray,
devarray,
f,
diff_list,
masks=met_and_masks
)
else:
print_totals(
refarray,
devarray,
f,
diff_list
)
# ==================================================================
# Cleanup and quit
# ==================================================================
# Close file
f.close()
# Reopen file and replace placeholder text by diff_text
insert_text_into_file(
filename=outfilename,
search_text=placeholder,
replace_text=diff_list_to_text(
refstr,
devstr,
diff_list,
fancy_format=True
),
width=TABLE_WIDTH
)
[docs]
def make_benchmark_conc_plots(
ref,
refstr,
dev,
devstr,
spcdb_files,
dst="./benchmark",
subdst=None,
overwrite=False,
verbose=False,
collection="SpeciesConc",
benchmark_type="FullChemBenchmark",
cmpres=None,
plot_by_spc_cat=True,
restrict_cats=[],
plots=["sfc", "500hpa", "zonalmean"],
use_cmap_RdBu=False,
log_color_scale=False,
sigdiff_files=None,
normalize_by_area=False,
cats_in_ugm3=["Aerosols", "Secondary_Organic_Aerosols"],
areas=None,
refmet=None,
devmet=None,
weightsdir='.',
n_job=-1,
second_ref=None,
second_dev=None,
time_mean=False,
):
"""
Creates PDF files containing plots of species concentration
for model benchmarking purposes.
Parameters
----------
ref : str
Path name for the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name for the "Dev" (aka "Development") data set.
This data set will be compared against the "Reference"
data set.
devstr : str
A string to describe dev (e.g. version number).
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
dst : str, optional
A string denoting the destination folder where a PDF
file containing plots will be written.
Default value: ./benchmark
subdst : str, optional
A string denoting the sub-directory of dst where PDF
files containing plots will be written. In practice,
subdst is only needed for the 1-year benchmark output,
and denotes a date string (such as "Jan2016") that
corresponds to the month that is being plotted.
Default value: None
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False
collection : str, optional
Name of collection to use for plotting.
Default value: "SpeciesConc"
benchmark_type : str, optional
A string denoting the type of benchmark output to plot, options are
FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
Default value: "FullChemBenchmark"
cmpres : str, optional
Grid resolution at which to compare ref and dev data, e.g. '1x1.25'.
plot_by_spc_cat : bool, optional
Set this flag to False to send plots to one file rather
than separate file per category.
Default value: True
restrict_cats : list of str, optional
List of benchmark categories in benchmark_categories.yml to make
plots for. If empty, plots are made for all categories.
Default value: empty
plots : list of str, optional
List of plot types to create.
Default value: ['sfc', '500hpa', 'zonalmean']
log_color_scale : bool, optional
Set this flag to True to enable plotting data (not diffs)
on a log color scale.
Default value: False
normalize_by_area : bool, optional
Set this flag to true to enable normalization of data
by surface area (i.e. kg s-1 --> kg s-1 m-2).
Default value: False
cats_in_ugm3 : list of str, optional
List of benchmark categories to to convert to ug/m3.
Default value: ["Aerosols", "Secondary_Organic_Aerosols"]
areas : dict of xr.DataArray, optional
Grid box surface areas in m2 on Ref and Dev grids.
Default value: None
refmet : str, optional
Path name for ref meteorology.
Default value: None
devmet : str, optional
Path name for dev meteorology.
Default value: None
sigdiff_files : list of str, optional
Filenames that will contain the lists of species having
significant differences in the 'sfc', '500hpa', and
'zonalmean' plots. These lists are needed in order to
fill out the benchmark approval forms.
Default value: None
weightsdir : str, optional
Directory in which to place (and possibly reuse) xESMF regridder
netCDF files.
Default value: '.'
n_job : int, optional
Defines the number of simultaneous workers for parallel plotting.
Set to 1 to disable parallel plotting. Value of -1 allows the
application to decide.
Default value: -1
second_ref : str, optional
Path name for a second "Ref" (aka "Reference") data set for
diff-of-diffs plotting. This dataset should have the same model
type and grid as ref.
Default value: None
second_dev : str, optional
Path name for a second "Ref" (aka "Reference") data set for
diff-of-diffs plotting. This dataset should have the same model
type and grid as ref.
Default value: None
time_mean : bool, optional
Determines if we should average the datasets over time.
Default value: False
"""
# NOTE: this function could use some refactoring;
# abstract processing per category?
# ==================================================================
# Initialization and data read
# ==================================================================
verify_variable_type(ref, (str, list))
verify_variable_type(refstr, str)
verify_variable_type(dev, (str, list))
verify_variable_type(devstr, str)
verify_variable_type(spcdb_files, list)
# Create the destination folder
make_directory(dst, overwrite)
# Define extra title text (usually a date string)
# for the top-title of the plot
if subdst is not None:
extra_title_txt = subdst
else:
extra_title_txt = None
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Pick the proper function to read the data
reader = dataset_reader(time_mean, verbose=verbose)
# Open datasets
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
# Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new
# naming introduced in GEOS-Chem 14.1.0
refds = rename_speciesconc_to_speciesconcvv(refds)
devds = rename_speciesconc_to_speciesconcvv(devds)
# -----------------------------------------------------------------
# Kludge, rename wrong variable name
if "SpeciesConcVV_PFE" in refds.data_vars.keys():
refds = refds.rename({"SpeciesConcVV_PFE": "SpeciesConcVV_pFe"})
if "SpeciesConcVV_PFE" in devds.data_vars.keys():
devds = devds.rename({"SpeciesConcVV_PFE": "SpeciesConcVV_pFe"})
# -----------------------------------------------------------------
if verbose:
print('\nPrinting refds (comparison ref)\n')
print(refds)
print('\nPrinting devds (comparison dev)\n')
print(devds)
# Open met datasets if passed as arguments
refmetds = None
devmetds = None
if refmet:
refmetds = reader(refmet, drop_variables=SKIP_THESE_VARS)
if devmet:
devmetds = reader(devmet, drop_variables=SKIP_THESE_VARS)
# Determine if doing diff-of-diffs
diff_of_diffs = second_ref is not None and second_dev is not None
if diff_of_diffs:
# --------------------------------------------------------------
# %%%%% We are plotting diff-of-diffs %%%%%
#
# Open the second Ref and Dev datasets, if they have been
# passed as keyword arguments, as these are needed for the
# diff-of-diffs plots. Regrid to the same # horizontal grid
# resolution if the two Refs or two Devs are not on the same
# grid.
#
# Also, do not take the time mean (e.g. Annual Mean) of the
# datasets, as this will compute the the difference of means.
# Instead, we need to compute the mean of differences. This
# will be done in the plotting functions compare_single_level
# and compare_zonal_mean.
# --------------------------------------------------------------
second_refds = reader(second_ref, drop_variables=SKIP_THESE_VARS)
second_devds = reader(second_dev, drop_variables=SKIP_THESE_VARS)
if verbose:
print('\nPrinting second_refds (dev of ref for diff-of-diffs)\n')
print(second_refds)
print('\nPrinting second_devds (dev of dev for diff-of-diffs)\n')
print(second_devds)
# Only regrid if ref grid resolutions differ.
# Assume only may differ if both cubed-sphere.
# If different, assume the two resolutions are C24 and C48,
# and do the comparison at C48.
regrid_ref = False
if "Xdim" in refds.dims and "Xdim" in second_refds.dims:
if refds['Xdim'].size != second_refds['Xdim'].size:
regrid_ref = True
regrid_dev = False
if "Xdim" in devds.dims and "Xdim" in second_devds.dims:
if devds['Xdim'].size != second_devds['Xdim'].size:
regrid_dev = True
if regrid_ref or regrid_dev:
# Assume regridding C24 to C48 to compute the difference at C48
regridfile=os.path.join(weightsdir,'regrid_weights_c24_to_c48.nc')
transform = sparselt.esmf.load_weights(
regridfile,
input_dims=[('nf', 'Ydim', 'Xdim'), (6, 24, 24)],
output_dims=[('nf', 'Ydim', 'Xdim'), (6, 48, 48)],
)
if regrid_ref and refds['Xdim'].size == 24:
print('\nRegridding ref dataset 1 from C24 to C48\n')
refds = sparselt.xr.apply(transform, refds)
print(refds)
print('\nRegrid complete\n')
if regrid_ref and second_refds['Xdim'].size == 24:
print('\nRegridding ref dataset 2 from C24 to C48\n')
second_refds = sparselt.xr.apply(transform, second_refds)
print(second_refds)
print('\nRegrid complete\n')
if regrid_dev and devds['Xdim'].size == 24:
print('\nRegridding dev dataset 1 from C24 to C48\n')
devds = sparselt.xr.apply(transform, devds)
print(devds)
print('\nRegrid complete\n')
if regrid_dev and second_devds['Xdim'].size == 24:
print('\nRegridding dev dataset 2 from C24 to C48\n')
second_devds = sparselt.xr.apply(transform, second_devds)
print(second_devds)
print('\nRegrid complete\n')
else:
# --------------------------------------------------------------
# %%%%% We are not plotting diff-of-diffs %%%%%
#
# We do not need the second Ref and Dev data.
# We can also compute the time mean (e.g. Annual Mean) here.
# --------------------------------------------------------------
second_refds = None
second_devds = None
if time_mean:
refds = dataset_mean(refds)
devds = dataset_mean(devds)
refmetds = dataset_mean(refmetds)
devmetds = dataset_mean(devmetds)
# Create regridding files if necessary while not in parallel loop
[_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)]
# If we are normalizing by area, then merge the surface areas
# on the Ref & Dev grids into the Ref & Dev datasets, but only
# if they are not already there. The area variables should both
# be called 'AREA' and be in units of m2.
if normalize_by_area:
if areas is not None:
if 'AREA' not in refds.data_vars:
refds = xr.merge([refds, areas["Ref"]])
if 'AREA' not in devds.data_vars:
devds = xr.merge([devds, areas["Dev"]])
if diff_of_diffs:
if 'AREA' not in second_refds.data_vars:
second_refds = xr.merge([second_refds, areas["Ref"]])
if 'AREA' not in second_devds.data_vars:
second_devds = xr.merge([second_devds, areas["Dev"]])
else:
msg = "ERROR: normalize_by_area = True but " \
+ "the 'areas' argument was not passed!"
raise ValueError(msg)
# ==================================================================
# If sending plots to one file then do all plots here and return.
# Keep original units for plots.
# ==================================================================
if not plot_by_spc_cat:
[refds, devds] = add_missing_variables(refds, devds)
var_prefix = 'SpeciesConcVV_'
varlist = [k for k in refds.data_vars.keys() if var_prefix in k]
varlist.sort()
# Surface
pdfname = os.path.join(dst, 'SpeciesConc_Sfc.pdf')
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
cmpres=cmpres,
pdfname=pdfname,
use_cmap_RdBu=use_cmap_RdBu,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
normalize_by_area=normalize_by_area,
weightsdir=weightsdir,
second_ref=second_refds,
second_dev=second_devds,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=var_prefix,
verbose=verbose
)
# 500 hPa
pdfname = os.path.join(dst, 'SpeciesConc_500hPa.pdf')
compare_single_level(
refds,
refstr,
devds,
devstr,
ilev=22,
varlist=varlist,
cmpres=cmpres,
pdfname=pdfname,
use_cmap_RdBu=use_cmap_RdBu,
log_color_scale=log_color_scale,
normalize_by_area=normalize_by_area,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
second_ref=second_refds,
second_dev=second_devds,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=var_prefix,
verbose=verbose
)
# Zonal mean
pdfname = os.path.join(dst, 'SpeciesConc_ZnlMn.pdf')
compare_zonal_mean(
refds,
refstr,
devds,
devstr,
varlist=varlist,
pdfname=pdfname,
use_cmap_RdBu=use_cmap_RdBu,
log_color_scale=log_color_scale,
normalize_by_area=normalize_by_area,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
second_ref=second_refds,
second_dev=second_devds,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=var_prefix,
verbose=verbose
)
return
# ==================================================================
# Otherwise plot by categories. Convert units to ug/m3 for
# aerosol categories: Aerosols and Secondary Organic Aerosols.
# ==================================================================
# FullChemBenchmark has lumped species (TransportTracers, CH4 do not)
if "FullChem" in benchmark_type:
print("\nComputing lumped species for full chemistry benchmark")
print("-->Adding lumped species to ref dataset")
refds = add_lumped_species_to_dataset(refds)
print("-->Adding lumped species to dev dataset")
devds = add_lumped_species_to_dataset(devds)
if diff_of_diffs:
print("-->Adding lumped species to second ref and dev datasets")
second_refds = add_lumped_species_to_dataset(second_refds)
second_devds = add_lumped_species_to_dataset(second_devds)
archive_lumped_species_definitions(dst)
print("Lumped species computation complete.\n")
# Get the list of species categories
catdict = get_species_categories(benchmark_type)
archive_species_categories(dst)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refds, devds] = add_missing_variables(refds, devds)
if diff_of_diffs:
[refds, second_refds] = add_missing_variables(refds, second_refds)
[devds, second_devds] = add_missing_variables(devds, second_devds)
# Collection prefix
if "SpeciesConc" in collection:
coll_prefix = "SpeciesConcVV_"
else:
coll_prefix = collection.strip() + "_"
# ==================================================================
# Create the plots!
# ==================================================================
# Use dictionaries to maintain order of significant difference categories
dict_sfc = {}
dict_500 = {}
dict_zm = {}
def createplots(filecat):
cat_diff_dict = {'sfc': [], '500': [], 'zm': []}
# Plots units ug/m3 for certain species categories
convert_to_ugm3 = False
if cats_in_ugm3 is not None and filecat in cats_in_ugm3:
convert_to_ugm3 = True
# Suppress harmless run-time warnings from all threads
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)
# If restrict_cats list is passed,
# skip all categories except those in the list
if restrict_cats and filecat not in restrict_cats:
return {filecat: cat_diff_dict}
# Create a directory for each category.
# If subdst is passed, then create a subdirectory in each
# category directory (e.g. as for the 1-year benchmark).
catdir = os.path.join(dst, filecat)
if not os.path.isdir(catdir):
os.mkdir(catdir)
if subdst is not None:
catdir = os.path.join(catdir, subdst)
if not os.path.isdir(catdir):
os.mkdir(catdir)
# Get the list of variables in both Ref and Dev for each category
# (this is computationally efficient)
ref_vars = set(refds.data_vars)
dev_vars = set(devds.data_vars)
candidates = [
coll_prefix + spc
for subcat in catdict[filecat]
for spc in catdict[filecat][subcat]
]
varlist = \
[var for var in candidates \
if var in ref_vars and var in dev_vars
]
warninglist = \
[var for var in candidates \
if var not in ref_vars or var not in dev_vars
]
# Create new datasets containing only the variables for a
# given category, as this will optimize performance.
refds_cat = refds[varlist]
devds_cat = devds[varlist]
second_refds_cat = None
if second_refds is not None:
second_refds_cat = second_refds[varlist]
second_devds_cat = None
if second_devds is not None:
second_devds_cat = second_devds[varlist]
# -----------------------
# Surface plots
# -----------------------
if "sfc" in plots:
if subdst is not None:
pdfname = os.path.join(
catdir,
f"{filecat}_Surface_{subdst}.pdf"
)
print(f"creating {pdfname}")
else:
pdfname = os.path.join(
catdir,
f"{filecat}_Surface.pdf"
)
diff_sfc = []
compare_single_level(
refds_cat,
refstr,
devds_cat,
devstr,
varlist=varlist,
ilev=0,
cmpres=cmpres,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
use_cmap_RdBu=use_cmap_RdBu,
log_color_scale=log_color_scale,
normalize_by_area=normalize_by_area,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_sfc,
weightsdir=weightsdir,
convert_to_ugm3=convert_to_ugm3,
second_ref=second_refds_cat,
second_dev=second_devds_cat,
n_job=n_job,
spcdb_files=spcdb_files,
)
diff_sfc[:] = [v.replace(coll_prefix, "") for v in diff_sfc]
cat_diff_dict['sfc'] = diff_sfc
add_nested_bookmarks_to_pdf(
pdfname,
filecat,
catdict,
warninglist,
remove_prefix=coll_prefix
)
# -----------------------
# 500 hPa plots
# -----------------------
if "500hpa" in plots:
if subdst is not None:
pdfname = os.path.join(
catdir,
f"{filecat}_500hPa_{subdst}.pdf"
)
else:
pdfname = os.path.join(
catdir,
f"{filecat}_500hPa.pdf"
)
diff_500 = []
compare_single_level(
refds_cat,
refstr,
devds_cat,
devstr,
varlist=varlist,
ilev=22,
cmpres=cmpres,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
use_cmap_RdBu=use_cmap_RdBu,
log_color_scale=log_color_scale,
normalize_by_area=normalize_by_area,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_500,
weightsdir=weightsdir,
convert_to_ugm3=convert_to_ugm3,
second_ref=second_refds_cat,
second_dev=second_devds_cat,
n_job=n_job,
spcdb_files=spcdb_files
)
diff_500[:] = [v.replace(coll_prefix, "") for v in diff_500]
#dict_500[filecat] = diff_500
cat_diff_dict['500'] = diff_500
add_nested_bookmarks_to_pdf(
pdfname,
filecat,
catdict,
warninglist,
remove_prefix=coll_prefix
)
# -----------------------
# Zonal mean plots
# -----------------------
if "zonalmean" in plots or "zm" in plots:
if subdst is not None:
pdfname = os.path.join(
catdir,
f"{filecat}_FullColumn_ZonalMean_{subdst}.pdf"
)
else:
pdfname = os.path.join(
catdir,
f"{filecat}_FullColumn_ZonalMean.pdf"
)
diff_zm = []
compare_zonal_mean(
refds_cat,
refstr,
devds_cat,
devstr,
varlist=varlist,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
use_cmap_RdBu=use_cmap_RdBu,
log_color_scale=log_color_scale,
normalize_by_area=normalize_by_area,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_zm,
weightsdir=weightsdir,
convert_to_ugm3=convert_to_ugm3,
second_ref=second_refds_cat,
second_dev=second_devds_cat,
n_job=n_job,
spcdb_files=spcdb_files
)
diff_zm[:] = [v.replace(coll_prefix, "") for v in diff_zm]
#dict_zm = diff_zm
cat_diff_dict['zm'] = diff_zm
add_nested_bookmarks_to_pdf(
pdfname,
filecat,
catdict,
warninglist,
remove_prefix=coll_prefix
)
# Strat_ZonalMean plots will use a log-pressure Y-axis, with
# a range of 1..100 hPa, as per GCSC request. (bmy, 8/13/19)
if subdst is not None:
pdfname = os.path.join(
catdir,
f"{filecat}_Strat_ZonalMean_{subdst}.pdf"
)
else:
pdfname = os.path.join(
catdir,
f"{filecat}_Strat_ZonalMean.pdf"
)
compare_zonal_mean(
refds_cat,
refstr,
devds_cat,
devstr,
varlist=varlist,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
use_cmap_RdBu=use_cmap_RdBu,
pres_range=[1, 100],
log_yaxis=True,
extra_title_txt=extra_title_txt,
log_color_scale=log_color_scale,
normalize_by_area=normalize_by_area,
convert_to_ugm3=convert_to_ugm3,
weightsdir=weightsdir,
second_ref=second_refds_cat,
second_dev=second_devds_cat,
n_job=n_job,
spcdb_files=spcdb_files
)
add_nested_bookmarks_to_pdf(
pdfname,
filecat,
catdict,
warninglist,
remove_prefix=coll_prefix
)
return {filecat: cat_diff_dict}
# --------------------------------------------
# Create the plots in parallel
# Turn off parallelization if n_job=1
if n_job != 1:
results = Parallel(n_jobs=n_job)(
delayed(createplots)(filecat)
for _, filecat in enumerate(catdict)
)
else:
results = []
for _, filecat in enumerate(catdict):
results.append(createplots(filecat))
# --------------------------------------------
dict_sfc = {list(result.keys())[0]: result[list(
result.keys())[0]]['sfc'] for result in results}
dict_500 = {list(result.keys())[0]: result[list(
result.keys())[0]]['500'] for result in results}
dict_zm = {list(result.keys())[0]: result[list(
result.keys())[0]]['zm'] for result in results}
# ==============================================================
# Write the list of species having significant differences,
# which we need to fill out the benchmark approval forms.
# ==============================================================
if sigdiff_files is not None:
for filename in sigdiff_files:
if "sfc" in plots:
if "sfc" in filename:
with open(filename, "a+", encoding=ENCODING) as f:
for c, diff_list in dict_sfc.items():
print(f"* {c}: ", file=f, end="")
for v in diff_list:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
if "500hpa" in plots:
if "500hpa" in filename:
with open(filename, "a+", encoding=ENCODING) as f:
for c, diff_list in dict_500.items():
print(f"* {c}: ", file=f, end="")
for v in diff_list:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
if "zonalmean" in plots or "zm" in plots:
if "zonalmean" in filename or "zm" in filename:
with open(filename, "a+", encoding=ENCODING) as f:
for c, diff_list in dict_zm.items():
print(f"* {c}: ", file=f, end="")
for v in diff_list:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
del refmetds
del devmetds
del second_ref
del second_dev
gc.collect()
[docs]
def make_benchmark_emis_plots(
ref,
refstr,
dev,
devstr,
spcdb_files,
dst="./benchmark",
subdst=None,
plot_by_spc_cat=False,
plot_by_hco_cat=False,
benchmark_type="FullChemBenchmark",
cmpres=None,
overwrite=False,
verbose=False,
flip_ref=False,
flip_dev=False,
log_color_scale=False,
sigdiff_files=None,
weightsdir='.',
n_job=-1,
time_mean=False,
):
"""
Creates PDF files containing plots of emissions for model
benchmarking purposes. This function is compatible with benchmark
simulation output only. It is not compatible with transport tracers
emissions diagnostics.
Parameters
----------
ref : str
Path name for the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name for the "Dev" (aka "Development") data set.
This data set will be compared against the "Reference"
data set.
devstr : str
A string to describe dev (e.g. version number).
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
dst : str, optional
A string denoting the destination folder where
PDF files containing plots will be written.
Default value: './benchmark'
subdst : str, optional
A string denoting the sub-directory of dst where PDF
files containing plots will be written. In practice,
and denotes a date string (such as "Jan2016") that
corresponds to the month that is being plotted.
Default value: None
plot_by_spc_cat : bool, optional
Set this flag to True to separate plots into PDF files
according to the benchmark species categories (e.g. Oxidants,
Aerosols, Nitrogen, etc.) These categories are specified
in the YAML file benchmark_categories.yml.
Default value: False
plot_by_hco_cat : bool, optional
Set this flag to True to separate plots into PDF files
according to HEMCO emissions categories (e.g. Anthro,
Aircraft, Bioburn, etc.)
Default value: False
benchmark_type : str, optional
A string denoting the type of benchmark output to plot, options are
FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
Default value: "FullChemBenchmark"
cmpres : str, optional
Grid resolution at which to compare ref and dev data, e.g. '1x1.25'.
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False
flip_ref : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Ref" dataset (in case "Ref" starts
from the top of atmosphere instead of the surface).
Default value: False
flip_dev : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Dev" dataset (in case "Dev" starts
from the top of atmosphere instead of the surface).
Default value: False
log_color_scale : bool, optional
Set this flag to True to enable plotting data (not diffs)
on a log color scale.
Default value: False
sigdiff_files : list of str, optional
Filenames that will contain the lists of species having
significant differences in the 'sfc', '500hpa', and
'zonalmean' plots. These lists are needed in order to
fill out the benchmark approval forms.
Default value: None
weightsdir : str, optional
Directory in which to place (and possibly reuse) xESMF regridder
netCDF files.
Default value: '.'
n_job : int, optional
Defines the number of simultaneous workers for parallel plotting.
Set to 1 to disable parallel plotting.
Value of -1 allows the application to decide.
Default value: -1
time_mean : bool, optional
Determines if we should average the datasets over time.
Default value: False
Notes
-----
1. If both plot_by_spc_cat and plot_by_hco_cat are
False, then all emission plots will be placed into the
same PDF file.
2. Emissions that are 3-dimensional will be plotted as
column sums.
"""
# =================================================================
# Initialization and data read
# =================================================================
verify_variable_type(ref, (str,list))
verify_variable_type(refstr, str)
verify_variable_type(dev, (str,list))
verify_variable_type(devstr, str)
verify_variable_type(spcdb_files, list)
# Create the destination folder
make_directory(dst, overwrite)
# Create the "Emissions" category folder. If subdst is passed,
# then create a sub-folder (needed for the 1-year benchmarks).
emisdir = os.path.join(dst, "Emissions")
if not os.path.isdir(emisdir):
os.mkdir(emisdir)
if subdst is not None:
emisdir = os.path.join(emisdir, subdst)
if not os.path.isdir(emisdir):
os.mkdir(emisdir)
extra_title_txt = subdst
else:
extra_title_txt = None
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Get the function that will read the dataset
reader = dataset_reader(time_mean, verbose=verbose)
# Ref dataset
try:
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {ref}") from e
# Dev dataset
try:
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {dev}") from e
# Compute mean of data over the time dimension (if time_mean=True)
if time_mean:
refds = dataset_mean(refds)
devds = dataset_mean(devds)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refds, devds] = add_missing_variables(refds, devds)
# Create regridding files if necessary while not in parallel loop
[_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)]
# Combine 2D and 3D variables into an overall list
vardict = compare_varnames(refds, devds, quiet=not verbose)
vars2D = vardict["commonvars2D"]
vars3D = vardict["commonvars3D"]
varlist = vars2D + vars3D
# ==================================================================
# Compute column sums for 3D emissions
# Make sure not to clobber the DataArray attributes
# ==================================================================
with xr.set_options(keep_attrs=True):
for v in vars3D:
if "lev" in refds[v].dims:
refds[v] = refds[v].sum(dim="lev")
if "lev" in devds[v].dims:
devds[v] = devds[v].sum(dim="lev")
# ==================================================================
# If inputs plot_by* are both false, plot all emissions in same file
# ==================================================================
if not plot_by_spc_cat and not plot_by_hco_cat:
if subdst is not None:
pdfname = os.path.join(
emisdir,
f"Emissions_{subdst}.pdf"
)
else:
pdfname = os.path.join(
emisdir,
"Emissions.pdf"
)
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
cmpres=cmpres,
pdfname=pdfname,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix="Emis",
verbose=verbose
)
return
# Get emissions variables (non-inventory), categories, and species
emis_vars = [v for v in varlist if v[:4] == "Emis"]
emis_cats = sorted(set([v.split("_")[1] for v in emis_vars]))
emis_spc = sorted(set([v.split("_")[0][4:] for v in emis_vars]))
# This is fixed in 12.3.2, comment out for now (bmy, 5/1/19)
# # Handle Bioburn and BioBurn as same categories (temporary until 12.3.1)
# emis_cats.remove('BioBurn')
# Sort alphabetically (assume English characters)
emis_vars.sort(key=str.lower)
# ==================================================================
# if plot_by_hco_cat is true, make a file for each HEMCO emissions
# category that is in the diagnostics file
#
# Also write the list of emission quantities that have significant
# diffs. We'll need that to fill out the benchmark forms.
# ==================================================================
if plot_by_hco_cat:
emisspcdir = os.path.join(dst, "Emissions")
if not os.path.isdir(emisspcdir):
os.mkdir(emisspcdir)
if subdst is not None:
emisspcdir = os.path.join(emisspcdir, subdst)
if not os.path.isdir(emisspcdir):
os.mkdir(emisspcdir)
# for c in emis_cats:
def createfile_hco_cat(c):
# Handle cases of bioburn and bioBurn (temporary until 12.3.1)
if c == "Bioburn":
varnames = [k for k in emis_vars
if any(b in k for b in ["Bioburn", "BioBurn"])
]
else:
varnames = [k for k in emis_vars if c in k]
# Create the PDF name. If subdst is passed, then also add
# subdst to the file name (e.g. as for 1-year benchmarks).
if subdst is not None:
pdfname = os.path.join(
emisspcdir,
f"{c}_Emissions_{subdst}.pdf"
)
else:
pdfname = os.path.join(
emisspcdir,
f"{c}_Emissions.pdf"
)
diff_dict = {}
diff_emis = []
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varnames,
cmpres=cmpres,
ilev=0,
pdfname=pdfname,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_emis,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varnames,
remove_prefix="Emis",
verbose=verbose
)
# Save the list of quantities with significant differences for
# this category into the diff_dict dictionary for use below
diff_emis[:] = [v.replace("Emis", "") for v in diff_emis]
diff_emis[:] = [v.replace("_" + c, "") for v in diff_emis]
diff_dict[c] = diff_emis
return diff_dict
# ---------------------------------------
# Create plots in parallel
# Turn off parallelization if n_job=1
if n_job != 1:
results = Parallel(n_jobs=n_job)(
delayed(createfile_hco_cat)(c)
for c in emis_cats
)
else:
results = []
for c in emis_cats:
results.append(createfile_hco_cat(c))
# ---------------------------------------
dict_emis = {list(result.keys())[0]: result[list(result.keys())[0]]
for result in results}
# =============================================================
# Write the list of species having significant differences,
# which we need to fill out the benchmark approval forms.
# =============================================================
if sigdiff_files is not None:
for filename in sigdiff_files:
if "emis" in filename:
with open(filename, "w+", encoding=ENCODING) as f:
for c, diff_list in dict_emis.items():
print(f"* {c}: ", file=f, end="")
for v in diff_list:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
# ==================================================================
# if plot_by_spc_cat is true, make a file for each benchmark
# species category with emissions in the diagnostics file
# ==================================================================
if plot_by_spc_cat:
catdict = get_species_categories(benchmark_type)
# in case any emissions are skipped (for use in nested pdf bookmarks)
warninglist = []
# for checking if emissions species not defined in benchmark category
# file
allcatspc = []
emisdict = {} # used for nested pdf bookmarks
# for i, filecat in enumerate(catdict):
def createfile_bench_cat(filecat):
# Get emissions for species in this benchmark category
varlist = []
emisdict[filecat] = {}
catspc = []
for subcat in catdict[filecat]:
for spc in catdict[filecat][subcat]:
catspc.append(spc)
if spc in emis_spc:
emisdict[filecat][spc] = []
emisvars = [v for v in emis_vars
if spc == v.split("_")[0][4:]]
for var in emisvars:
emisdict[filecat][spc].append(
var.replace("Emis", ""))
varlist.append(var)
if not varlist:
print(
"\nWarning: no emissions species in benchmark species" + \
f"category {filecat}"
)
return catspc
# Use same directory structure as for concentration plots
catdir = os.path.join(dst, filecat)
if not os.path.isdir(catdir):
os.mkdir(catdir)
if subdst is not None:
catdir = os.path.join(catdir, subdst)
if not os.path.isdir(catdir):
os.mkdir(catdir)
# Create emissions file for this benchmark species category
# If subdst is passed, add it to the pdf name (e.g. as
# is needed for the 1-year benchmarks).
if subdst is not None:
pdfname = os.path.join(
catdir,
f"{filecat}_Emissions_{subdst}.pdf"
)
else:
pdfname = os.path.join(
catdir,
f"{filecat}_Emissions.pdf"
)
# Create the PDF
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
cmpres=cmpres,
ilev=0,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_nested_bookmarks_to_pdf(
pdfname,
filecat,
emisdict,
warninglist
)
return catspc
#------------------------------------------------
# Create plots in parallel
# Turn of parallalization if n_job=1
if n_job != 1:
results = Parallel(n_jobs=n_job)(
delayed(createfile_bench_cat)(filecat)
for _, filecat in enumerate(catdict)
)
else:
results = []
for _, filecat in enumerate(catdict):
results.append(createfile_bench_cat(filecat))
#------------------------------------------------
allcatspc = [spc for result in results for spc in result]
# Give warning if emissions species is not assigned a benchmark
# category
for spc in emis_spc:
if spc not in allcatspc:
print(\
f"Warning: species {spc} has emissions diagnostics but is not"
" in benchmark_categories.yml"
)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
gc.collect()
[docs]
def make_benchmark_emis_tables(
reflist,
refstr,
devlist,
devstr,
spcdb_files,
dst="./benchmark",
benchmark_type="FullChemBenchmark",
refmet=None,
devmet=None,
overwrite=False,
ref_interval=[2678400.0],
dev_interval=[2678400.0],
):
"""
Creates a text file containing emission totals by species and
category for benchmarking purposes.
Parameters
----------
reflist : str or list of str
Path name(s) of the emissions file(s) that constitute
the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
devlist : str or list of str
Path name(s) of the emissions file(s) that constitute
the "Dev" (aka "Development") data set.
devstr : str
A string to describe dev (e.g. version number).
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
dst : str, optional
A string denoting the destination folder where the file
containing emissions totals will be written.
Default value: ./benchmark
benchmark_type : str, optional
A string denoting the type of benchmark output to plot, options are
FullChemBenchmark, TransportTracersBenchmark or CH4Benchmark.
Default value: "FullChemBenchmark"
refmet : str, optional
Path name for ref meteorology.
Default value: None
devmet : str, optional
Path name for dev meteorology.
Default value: None
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
ref_interval : list of float, optional
The length of the ref data interval in seconds. By default, interval
is set to [2678400.0], which is the number of seconds in July
(our 1-month benchmarking month).
Default value: [2678400.0]
dev_interval : list of float, optional
The length of the dev data interval in seconds. By default, interval
is set to [2678400.0], which is the number of seconds in July
(our 1-month benchmarking month).
Default value: [2678400.0]
"""
# ==================================================================
# Initialization
# ==================================================================
verify_variable_type(reflist, (str,list))
verify_variable_type(refstr, str)
verify_variable_type(devlist, (str,list))
verify_variable_type(devstr, str)
verify_variable_type(spcdb_files, list)
# Create the destination folder
make_directory(dst, overwrite)
# Create the "Tables" category folder if it does not exist
emisdir = os.path.join(dst, "Tables")
if not os.path.isdir(emisdir):
os.mkdir(emisdir)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ==================================================================
# Read data from netCDF into Dataset objects
# ==================================================================
# Read the input datasets
# Also read the meteorology datasets if passed. These are optional
# since the refds and devds have variable AREA already (always true)
# and unit conversions do not require any meteorology.
if isinstance(reflist, str):
reflist = [reflist]
if isinstance(devlist, str):
devlist = [devlist]
refmetds = None
devmetds = None
refds = xr.open_mfdataset(
reflist,
drop_variables=SKIP_THESE_VARS
)
devds = xr.open_mfdataset(
devlist,
drop_variables=SKIP_THESE_VARS
)
if refmet is not None:
refmetds = xr.open_mfdataset(
refmet,
drop_variables=SKIP_THESE_VARS
)
if devmet is not None:
devmetds = xr.open_mfdataset(
devmet,
drop_variables=SKIP_THESE_VARS
)
# ==================================================================
# Create table of emissions
# ==================================================================
# Emissions species dictionary
spc_dict = read_config_file(
os.path.join(
os.path.dirname(__file__),
EMISSION_SPC
),
quiet=True
)
species=spc_dict[benchmark_type]
inv_dict = read_config_file(
os.path.join(
os.path.dirname(__file__),
EMISSION_INV
),
quiet=True
)
inventories=inv_dict[benchmark_type]
# Destination files
file_emis_totals = os.path.join(emisdir, "Emission_totals.txt")
file_inv_totals = os.path.join(emisdir, "Inventory_totals.txt")
# Create table of emissions by species
create_total_emissions_table(
refds,
refstr,
devds,
devstr,
species,
spcdb_files,
file_emis_totals,
ref_interval,
dev_interval,
template="Emis{}_",
refmetdata=refmetds,
devmetdata=devmetds,
)
# Create table of emissions by inventory
create_total_emissions_table(
refds,
refstr,
devds,
devstr,
inventories,
spcdb_files,
file_inv_totals,
ref_interval,
dev_interval,
template="Inv{}_",
refmetdata=refmetds,
devmetdata=devmetds,
)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
del refmetds
del devmetds
gc.collect()
[docs]
def make_benchmark_jvalue_plots(
ref,
refstr,
dev,
devstr,
spcdb_files,
varlist=None,
dst="./benchmark",
subdst=None,
local_noon_jvalues=False,
cmpres=None,
plots=["sfc", "500hpa", "zonalmean"],
overwrite=False,
verbose=False,
flip_ref=False,
flip_dev=False,
log_color_scale=False,
sigdiff_files=None,
weightsdir='.',
n_job=-1,
time_mean=False,
):
"""
Creates PDF files containing plots of J-values for model
benchmarking purposes.
Parameters
----------
ref : str
Path name for the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name for the "Dev" (aka "Development") data set.
This data set will be compared against the "Reference"
data set.
devstr : str
A string to describe dev (e.g. version number).
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
varlist : list of str, optional
List of J-value variables to plot. If not passed,
then all J-value variables common to both dev
and ref will be plotted. The varlist argument can be
a useful way of restricting the number of variables
plotted to the pdf file when debugging.
Default value: None
dst : str, optional
A string denoting the destination folder where a
PDF file containing plots will be written.
Default value: ./benchmark.
subdst : str, optional
A string denoting the sub-directory of dst where PDF
files containing plots will be written. In practice,
subdst is only needed for the 1-year benchmark output,
and denotes a date string (such as "Jan2016") that
corresponds to the month that is being plotted.
Default value: None
local_noon_jvalues : bool, optional
Set this flag to plot local noon J-values. This will
divide all J-value variables by the JNoonFrac counter,
which is the fraction of the time that it was local noon
at each location.
Default value: False
cmpres : str, optional
Grid resolution at which to compare ref and dev data, e.g. '1x1.25'.
plots : list of str, optional
List of plot types to create.
Default value: ['sfc', '500hpa', 'zonalmean']
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False.
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False
flip_ref : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Ref" dataset (in case "Ref" starts
from the top of atmosphere instead of the surface).
Default value: False
flip_dev : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Dev" dataset (in case "Dev" starts
from the top of atmosphere instead of the surface).
Default value: False
log_color_scale : bool, optional
Set this flag to True if you wish to enable plotting data
(not diffs) on a log color scale.
Default value: False
sigdiff_files : list of str, optional
Filenames that will contain the lists of J-values having
significant differences in the 'sfc', '500hpa', and
'zonalmean' plots. These lists are needed in order to
fill out the benchmark approval forms.
Default value: None
weightsdir : str, optional
Directory in which to place (and possibly reuse) xESMF regridder
netCDF files.
Default value: '.'
n_job : int, optional
Defines the number of simultaneous workers for parallel plotting.
Set to 1 to disable parallel plotting. Value of -1 allows the
application to decide.
Default value: -1
time_mean : bool, optional
Determines if we should average the datasets over time.
Default value: False
Notes
-----
Will create 4 files containing J-value plots:
1. Surface values
2. 500 hPa values
3. Full-column zonal mean values
4. Stratospheric zonal mean values
These can be toggled on/off with the plots keyword argument.
At present, we do not yet have the capability to split the
plots up into separate files per category (e.g. Oxidants,
Aerosols, etc.). This is primarily due to the fact that
we archive J-values from GEOS-Chem for individual species
but not family species. We could attempt to add this
functionality later if there is sufficient demand.
"""
# ==================================================================
# Initialization
# ==================================================================
# Create the directory for output
make_directory(dst, overwrite)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Get the function that will read file(s) into a Dataset
reader = dataset_reader(time_mean, verbose=verbose)
# Ref dataset
try:
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {ref}") from e
# Dev dataset
try:
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {dev}") from e
# Compute mean of data over the time dimension (if time_mean=True)
if time_mean:
refds = dataset_mean(refds)
devds = dataset_mean(devds)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refds, devds] = add_missing_variables(refds, devds)
# Create regridding files if necessary
[_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)]
# Get a list of the 3D variables in both datasets
if varlist is None:
vardict = compare_varnames(refds, devds, quiet=not verbose)
cmn = vardict["commonvars3D"]
# ==================================================================
# Local noon or continuously-averaged J-values?
# ==================================================================
if local_noon_jvalues:
# Get a list of local noon J-value variables
# (or use the varlist passed via tha argument list)
prefix = "JNoon_"
if varlist is None:
varlist = [v for v in cmn if prefix in v]
# Make sure JNoonFrac (fraction of times it was local noon
# in each column) is present in both Ref and Dev datasets
if "JNoonFrac" not in cmn:
msg = "JNoonFrac is not common to Ref and Dev datasets!"
raise ValueError(msg)
# JNoon_* are cumulative sums of local noon J-values; we need
# to divide these by JNoonFrac to get the average value
refds = divide_dataset_by_dataarray(refds,
refds["JNoonFrac"], varlist)
devds = divide_dataset_by_dataarray(devds,
devds["JNoonFrac"], varlist)
# Subfolder of dst where PDF files will be printed
catdir = "JValuesLocalNoon"
else:
# Get a list of continuously averaged J-value variables
# (or use the varlist passed via tha argument list)
prefix = "Jval"
if varlist is None:
varlist = [v for v in cmn if prefix in v]
# Subfolder of dst where PDF files will be printed
catdir = "JValues"
# ==================================================================
# Create the plots
# ==================================================================
# Make the output folder if it doesn't exist. If subdst is passed,
# then create a sub-folder of this directory (e.g. which is needed
# for the 1-year benchmarks)
jvdir = os.path.join(dst, catdir)
if not os.path.isdir(jvdir):
os.mkdir(jvdir)
if subdst is not None:
jvdir = os.path.join(jvdir, subdst)
if not os.path.isdir(jvdir):
os.mkdir(jvdir)
extra_title_txt = subdst
else:
extra_title_txt = None
# Surface plots
if "sfc" in plots:
if subdst is not None:
pdfname = os.path.join(
jvdir,
f"{prefix}_Surface_{subdst}.pdf"
)
else:
pdfname = os.path.join(
jvdir,
f"{prefix}_Surface.pdf"
)
diff_sfc = []
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
ilev=0,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_sfc,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
diff_sfc[:] = [v.replace(prefix, "") for v in diff_sfc]
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=prefix,
verbose=verbose)
# 500hPa plots
if "500hpa" in plots:
if subdst is not None:
pdfname = os.path.join(
jvdir,
f"{prefix}_500hPa_{subdst}.pdf"
)
else:
pdfname = os.path.join(
jvdir, f"{prefix}_500hPa.pdf"
)
diff_500 = []
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
cmpres=cmpres,
ilev=22,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_500,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
diff_500[:] = [v.replace(prefix, "") for v in diff_500]
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=prefix,
verbose=verbose
)
# Full-column zonal mean plots
if "zonalmean" in plots:
if subdst is not None:
pdfname = os.path.join(
jvdir,
f"{prefix}_FullColumn_ZonalMean_{subdst}.pdf"
)
else:
pdfname = os.path.join(
jvdir, f"{prefix}_FullColumn_ZonalMean.pdf"
)
diff_zm = []
compare_zonal_mean(
refds,
refstr,
devds,
devstr,
varlist=varlist,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_zm,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
diff_zm[:] = [v.replace(prefix, "") for v in diff_zm]
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=prefix,
verbose=verbose)
# Strat_ZonalMean plots will use a log-pressure Y-axis, with
# a range of 1..100 hPa, as per GCSC request. (bmy, 8/13/19)
if subdst is not None:
pdfname = os.path.join(
jvdir,
f"{prefix}_Strat_ZonalMean_{subdst}.pdf"
)
else:
pdfname = os.path.join(
jvdir,
f"{prefix}_Strat_ZonalMean.pdf"
)
compare_zonal_mean(
refds,
refstr,
devds,
devstr,
varlist=varlist,
pdfname=pdfname,
pres_range=[1, 100],
log_yaxis=True,
flip_ref=flip_ref,
flip_dev=flip_dev,
extra_title_txt=extra_title_txt,
log_color_scale=log_color_scale,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=prefix,
verbose=verbose
)
# ==============================================================
# Write the lists of J-values that have significant differences,
# which we need to fill out the benchmark approval forms.
# ==============================================================
if sigdiff_files is not None:
for filename in sigdiff_files:
if "sfc" in plots:
if "sfc" in filename:
with open(filename, "a+", encoding=ENCODING) as f:
print("* J-Values: ", file=f, end="")
for v in diff_sfc:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
if "500" in plots:
if "500" in filename:
with open(filename, "a+", encoding=ENCODING) as f:
print("* J-Values: ", file=f, end="")
for v in diff_500:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
if "zonalmean" in plots or "zm" in plots:
if "zonalmean" in filename or "zm" in filename:
with open(filename, "a+", encoding=ENCODING) as f:
print("* J-Values: ", file=f, end="")
for v in diff_zm:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
gc.collect()
[docs]
def make_benchmark_collection_2d_var_plots(
ref,
refstr,
dev,
devstr,
colname,
spcdb_files,
var_prefix=None,
varlist=None,
dst="./benchmark",
cmpres=None,
overwrite=False,
verbose=False,
flip_ref=False,
flip_dev=False,
log_color_scale=False,
weightsdir='.',
n_job=-1,
time_mean=False,
):
"""
Creates PDF file containing plots comparing all 2D variables in
a collection without special handling.
Parameters
----------
ref : str
Path name for the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name for the "Dev" (aka "Development") data set.
This data set will be compared against the "Reference"
data set.
devstr : str
A string to describe dev (e.g. version number).
colname : str
Name of file collection, to be used in PDF name.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
var_prefix : str, optional
Variable name prefix in file to search for. If excluded then all 2D
variables will be plotted regardless of name.
Default value: None
varlist : list of str, optional
List of variables to plot. If not passed,
then all variables common to both dev
and ref will be plotted. The varlist argument can be
a useful way of restricting the number of variables
plotted to the pdf file when debugging.
Default value: None
dst : str, optional
A string denoting the destination folder where a
PDF file containing plots will be written.
Default value: ./benchmark.
cmpres : str, optional
Grid resolution at which to compare ref and dev data, e.g. '1x1.25'.
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False.
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False
flip_ref : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Ref" dataset (in case "Ref" starts
from the top of atmosphere instead of the surface).
Default value: False
flip_dev : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Dev" dataset (in case "Dev" starts
from the top of atmosphere instead of the surface).
Default value: False
log_color_scale : bool, optional
Set this flag to True if you wish to enable plotting data
(not diffs) on a log color scale.
Default value: False
weightsdir : str, optional
Directory in which to place (and possibly reuse) xESMF regridder
netCDF files.
Default value: '.'
n_job : int, optional
Defines the number of simultaneous workers for parallel plotting.
Set to 1 to disable parallel plotting. Value of -1 allows the
application to decide.
Default value: -1
time_mean : bool, optional
Determines if we should average the datasets over time.
Default value: False
Notes
-----
Will create 4 files containing the following plots:
1. Surface values
2. 500 hPa values
3. Full-column zonal mean values.
4. Stratospheric zonal mean values
These can be toggled on/off with the plots keyword argument.
"""
# ==================================================================
# Initialization
# ==================================================================
# Create the directory for output
make_directory(dst, overwrite)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Get the function that will read file(s) into a Dataset
reader = dataset_reader(time_mean, verbose=verbose)
# Ref dataset
try:
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {ref}") from e
# Dev dataset
try:
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {dev}") from e
# Compute mean of data over the time dimension (if time_mean=True)
if time_mean:
refds = dataset_mean(refds)
devds = dataset_mean(devds)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refds, devds] = add_missing_variables(refds, devds)
# Create regridding files if necessary
[_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)]
# Get a list of the 2D variables in both datasets
if varlist is None:
vardict = compare_varnames(refds, devds, quiet=not verbose)
cmn = vardict["commonvars2D"]
# Get a list of variables
# (or use the varlist passed via tha argument list)
if varlist is None:
if var_prefix is None:
varlist = cmn
else:
varlist = [v for v in cmn if var_prefix in v]
# ==================================================================
# Create the plots
# ==================================================================
# Make the output folder if it doesn't exist
bdir = os.path.join(dst, colname)
if not os.path.isdir(bdir):
os.mkdir(bdir)
extra_title_txt = None
pdfname = os.path.join(
bdir,
f"{colname}_2D.pdf"
)
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
ilev=0,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_bookmarks_to_pdf(
pdfname,
varlist,
verbose=verbose
)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
gc.collect()
[docs]
def make_benchmark_collection_3d_var_plots(
ref,
refstr,
dev,
devstr,
colname,
spcdb_files,
var_prefix=None,
varlist=None,
dst="./benchmark",
cmpres=None,
plots=["sfc", "500hpa", "zonalmean"],
overwrite=False,
verbose=False,
flip_ref=False,
flip_dev=False,
log_color_scale=False,
weightsdir='.',
n_job=-1,
time_mean=False,
):
"""
Creates PDF files containing plots comparing all 3D variables in
a collection without special handling.
Parameters
----------
ref : str
Path name for the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name for the "Dev" (aka "Development") data set.
This data set will be compared against the "Reference"
data set.
devstr : str
A string to describe dev (e.g. version number).
colname : str
Name of file collection, to be used in PDF name.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
var_prefix : str, optional
Variable name prefix in file to search for. If excluded then all 2D
variables will be plotted regardless of name.
Default value: None
varlist : list of str, optional
List of J-value variables to plot. If not passed,
then all J-value variables common to both dev
and ref will be plotted. The varlist argument can be
a useful way of restricting the number of variables
plotted to the pdf file when debugging.
Default value: None
dst : str, optional
A string denoting the destination folder where a
PDF file containing plots will be written.
Default value: ./benchmark.
cmpres : str, optional
Grid resolution at which to compare ref and dev data, e.g. '1x1.25'.
plots : list of str, optional
List of plot types to create.
Default value: ['sfc', '500hpa', 'zonalmean']
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False.
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False
flip_ref : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Ref" dataset (in case "Ref" starts
from the top of atmosphere instead of the surface).
Default value: False
flip_dev : bool, optional
Set this flag to True to reverse the vertical level
ordering in the "Dev" dataset (in case "Dev" starts
from the top of atmosphere instead of the surface).
Default value: False
log_color_scale : bool, optional
Set this flag to True if you wish to enable plotting data
(not diffs) on a log color scale.
Default value: False
weightsdir : str, optional
Directory in which to place (and possibly reuse) xESMF regridder
netCDF files.
Default value: '.'
n_job : int, optional
Defines the number of simultaneous workers for parallel plotting.
Set to 1 to disable parallel plotting. Value of -1 allows the
application to decide.
Default value: -1
time_mean : bool, optional
Determines if we should average the datasets over time.
Default value: False
Notes
-----
Will create 4 files containing the following plots:
1. Surface values
2. 500 hPa values
3. Full-column zonal mean values.
4. Stratospheric zonal mean values
These can be toggled on/off with the plots keyword argument.
"""
# ==================================================================
# Initialization
# ==================================================================
# Create the directory for output
make_directory(dst, overwrite)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Get the function that will read file(s) into a Dataset
reader = dataset_reader(time_mean, verbose=verbose)
# Ref dataset
try:
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {ref}") from e
# Dev dataset
try:
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {dev}") from e
# Compute mean of data over the time dimension (if time_mean=True)
if time_mean:
refds = dataset_mean(refds)
devds = dataset_mean(devds)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refds, devds] = add_missing_variables(refds, devds)
# Create regridding files if necessary
[_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)]
# Get a list of the 3D variables in both datasets
if varlist is None:
vardict = compare_varnames(refds, devds, quiet=not verbose)
cmn = vardict["commonvars3D"]
# Get a list of variables
# (or use the varlist passed via tha argument list)
if varlist is None:
if var_prefix is None:
varlist = cmn
else:
varlist = [v for v in cmn if var_prefix in v]
# ==================================================================
# Create the plots
# ==================================================================
# Make the output folder if it doesn't exist
bdir = os.path.join(dst, colname)
if not os.path.isdir(bdir):
os.mkdir(bdir)
extra_title_txt = None
pdfname = os.path.join(
bdir,
f"{colname}.pdf"
)
# Surface plots
if "sfc" in plots:
pdfname = os.path.join(
bdir,
f"{colname}_Surface.pdf"
)
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
ilev=0,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varlist,
verbose=verbose
)
# 500hPa plots
if "500hpa" in plots:
pdfname = os.path.join(
bdir,
f"{colname}_500hPa.pdf"
)
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
cmpres=cmpres,
ilev=22,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_bookmarks_to_pdf(
pdfname,
varlist,
verbose=verbose
)
# Full-column zonal mean plots
if "zonalmean" in plots:
pdfname = os.path.join(
bdir,
f"{colname}_ZonalMean.pdf"
)
compare_zonal_mean(
refds,
refstr,
devds,
devstr,
varlist=varlist,
pdfname=pdfname,
flip_ref=flip_ref,
flip_dev=flip_dev,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varlist,
verbose=verbose
)
# Strat_ZonalMean plots will use a log-pressure Y-axis, with
# a range of 1..100 hPa, as per GCSC request. (bmy, 8/13/19)
pdfname = os.path.join(
bdir,
f"{colname}_Strat_ZonalMean.pdf"
)
compare_zonal_mean(
refds,
refstr,
devds,
devstr,
varlist=varlist,
pdfname=pdfname,
pres_range=[1, 100],
log_yaxis=True,
flip_ref=flip_ref,
flip_dev=flip_dev,
extra_title_txt=extra_title_txt,
log_color_scale=log_color_scale,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varlist,
verbose=verbose
)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
gc.collect()
[docs]
def make_benchmark_aod_plots(
ref,
refstr,
dev,
devstr,
spcdb_files,
varlist=None,
dst="./benchmark",
subdst=None,
cmpres=None,
overwrite=False,
verbose=False,
log_color_scale=False,
sigdiff_files=None,
weightsdir='.',
n_job=-1,
time_mean=False,
):
"""
Creates PDF files containing plots of column aerosol optical
depths (AODs) for model benchmarking purposes.
Parameters
----------
ref : str
Path name for the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name for the "Dev" (aka "Development") data set.
This data set will be compared against the "Reference"
data set.
devstr : str
A string to describe dev (e.g. version number).
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
varlist : list of str, optional
List of AOD variables to plot. If not passed, then all
AOD variables common to both Dev and Ref will be plotted.
Use the varlist argument to restrict the number of
variables plotted to the pdf file when debugging.
Default value: None
dst : str, optional
A string denoting the destination folder where a
PDF file containing plots will be written.
Default value: ./benchmark.
subdst : str, optional
A string denoting the sub-directory of dst where PDF
files containing plots will be written. In practice,
subdst is only needed for the 1-year benchmark output,
and denotes a date string (such as "Jan2016") that
corresponds to the month that is being plotted.
Default value: None
cmpres : str, optional
Grid resolution at which to compare ref and dev data, e.g. '1x1.25'.
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False.
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False
log_color_scale : bool, optional
Set this flag to True to enable plotting data (not diffs)
on a log color scale.
Default value: False
sigdiff_files : list of str, optional
Filenames that will contain the list of quantities having
having significant differences in the column AOD plots.
These lists are needed in order to fill out the benchmark
approval forms.
Default value: None
weightsdir : str, optional
Directory in which to place (and possibly reuse) xESMF regridder
netCDF files.
Default value: '.'
n_job : int, optional
Defines the number of simultaneous workers for parallel plotting.
Set to 1 to disable parallel plotting. Value of -1 allows the
application to decide.
Default value: -1
time_mean : bool, optional
Determines if we should average the datasets over time.
Default value: False
"""
# ==================================================================
# Initialization and also read data
# ==================================================================
# Create destination plots directory
make_directory(dst, overwrite)
# Create the "Aerosols" directory as a subfolder of dst.
# If subdst is passed, then create a subdirectory of the "Aerosols"
# directory (e.g. which is needed for the 1-year benchmarks).
aoddir = os.path.join(dst, "Aerosols")
if not os.path.isdir(aoddir):
os.mkdir(aoddir)
if subdst is not None:
aoddir = os.path.join(aoddir, subdst)
if not os.path.isdir(aoddir):
os.mkdir(aoddir)
extra_title_txt = subdst
else:
extra_title_txt = None
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Get the function that will read file(s) into a dataset
reader = dataset_reader(time_mean, verbose=verbose)
# Read the Ref dataset
try:
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {ref}") from e
# Read the Dev dataset
try:
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
except (OSError, IOError, FileNotFoundError) as e:
raise e(f"Could not find Ref file: {dev}") from e
# Compute mean of data over the time dimension (if time_mean=True)
if time_mean:
refds = dataset_mean(refds)
devds = dataset_mean(devds)
# Create regridding files if necessary
[_ for _ in create_regridders(refds, devds, weightsdir=weightsdir)]
# NOTE: GCHP diagnostic variable exports are defined before the
# input.geos file is read. This means "WL1" will not have been
# replaced with "550nm" in the variable names. Do this string
# replace operation here, so that we can compare GCC and GCHP
# data directly. (bmy, 4/29/19)
with xr.set_options(keep_attrs=True):
# Rename variables in the Ref dataset
old2new = {}
for v in refds.data_vars.keys():
if "WL1" in v:
newname = v.replace("WL1", "550nm")
old2new[v] = newname
refds = refds.rename(old2new)
# Rename variables in the Dev dataset
old2new = {}
for v in devds.data_vars.keys():
if "WL1" in v:
newname = v.replace("WL1", "550nm")
old2new[v] = newname
devds = devds.rename(old2new)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refds, devds] = add_missing_variables(refds, devds)
# Find common AOD variables in both datasets
# (or use the varlist passed via keyword argument)
if varlist is None:
vardict = compare_varnames(refds, devds, quiet=not verbose)
cmn3D = vardict["commonvars3D"]
varlist = [v for v in cmn3D if "AOD" in v]
# Dictionary and list for new display names
newvars = read_config_file(
os.path.join(
os.path.dirname(__file__),
AOD_SPC
),
quiet=True
)
newvarlist = []
# ==================================================================
# Compute the total AOD by summing over the constituent members
# ==================================================================
# Take one of the variables so we can use its dims, coords,
# attrs to create the DataArray object for total AOD
v = varlist[0]
# Create a DataArray to hold total column AOD
# This is the same shape as the DataArray objects in refds
reftot = xr.DataArray(
np.zeros(refds[v].values.shape),
name="AODTotal",
dims=refds[v].dims,
coords=refds[v].coords,
attrs=refds[v].attrs,
)
# Create a DataArray to hold total column AOD
# This is the same shape as the DataArray objects in devds
devtot = xr.DataArray(
np.zeros(devds[v].values.shape),
name="AODTotal",
dims=devds[v].dims,
coords=devds[v].coords,
attrs=devds[v].attrs,
)
# Save the variable attributes so that we can reattach them
refattrs = reftot.attrs
devattrs = devtot.attrs
# Compute the sum of all AOD variables
# Avoid double-counting the following:
# (1) Individual dust AODs, use the column AOD total instead
# (2) SOA from aqueous isoprene, which is already accounted
# for in AODHyg550nm_OCPI. Also see Github issue:
# https://github.com/geoschem/gcpy/issues/65
for v in varlist:
if "_bin" in v or "AODSOAfromAqIsoprene550nm" in v:
continue
reftot = reftot + refds[v]
devtot = devtot + devds[v]
# Reattach the variable attributes
reftot.name = "AODTotal"
reftot.attrs = refattrs
reftot.attrs["long_name"] = "Total aerosol optical depth"
devtot.name = "AODTotal"
devtot.attrs = devattrs
devtot.attrs["long_name"] = "Total aerosol optical depth"
# Merge these variables back into the dataset
refds = xr.merge([refds, reftot])
devds = xr.merge([devds, devtot])
# Also add AODTotal to the list
varlist.append("AODTotal")
# ==================================================================
# Compute column AODs
# Create a new DataArray for each column AOD variable,
# using the new display name, and preserving attributes.
# Merge the new DataArrays back into the DataSets.
# ==================================================================
for v in varlist:
# Get the new name for each AOD variable (it's easier to display)
if v in newvars:
newname = newvars[v]
newvarlist.append(newname)
else:
raise ValueError(f"Could not find a display name for {v}")
# Don't clobber existing DataArray and Dataset attributes
with xr.set_options(keep_attrs=True):
# Add column AOD of newname to Ref
array = refds[v].sum(dim="lev")
array.name = newname
array.attrs["units"] = "1"
refds = xr.merge([refds, array])
# Add column AOD of newname to Dev
array = devds[v].sum(dim="lev")
array.name = newname
array.attrs["units"] = "1"
devds = xr.merge([devds, array])
# ==================================================================
# Create the plots
# ==================================================================
if subdst is not None:
pdfname = os.path.join(
aoddir,
f"Aerosols_ColumnOptDepth_{subdst}.pdf"
)
else:
pdfname = os.path.join(
aoddir,
"Aerosols_ColumnOptDepth.pdf"
)
diff_aod = []
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=newvarlist,
cmpres=cmpres,
ilev=0,
pdfname=pdfname,
log_color_scale=log_color_scale,
extra_title_txt=extra_title_txt,
sigdiff_list=diff_aod,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files
)
diff_aod[:] = [v.replace("Column_AOD_", "") for v in diff_aod]
add_bookmarks_to_pdf(
pdfname,
newvarlist,
remove_prefix="Column_AOD_",
verbose=verbose
)
# ==================================================================
# Write the list of AOD quantities having significant differences,
# which we will need to fill out the benchmark forms.
# ==================================================================
if sigdiff_files is not None:
for filename in sigdiff_files:
if "sfc" in filename:
with open(filename, "a+", encoding=ENCODING) as f:
print("* Column AOD: ", file=f, end="")
for v in diff_aod:
print(f"{v} ", file=f, end="")
print(file=f)
f.close()
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
gc.collect()
[docs]
def make_benchmark_mass_tables(
ref,
refstr,
dev,
devstr,
spcdb_files,
varlist=None,
dst="./benchmark",
subdst=None,
overwrite=False,
verbose=False,
ref_hdr_label="",
dev_hdr_label="",
ref_met_extra=None,
dev_met_extra=None,
):
"""
Creates a text file containing global mass totals by species and
category for benchmarking purposes.
Parameters
----------
ref : str
Pathname that will constitute
the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Pathname that will constitute
the "Dev" (aka "Development") data set. The "Dev"
data set will be compared against the "Ref" data set.
devstr : str
A string to describe dev (e.g. version number).
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
varlist : list of str, optional
List of variables to include in the list of totals.
If omitted, then all variables that are found in either
"Ref" or "Dev" will be included. The varlist argument
can be a useful way of reducing the number of
variables during debugging and testing.
Default value: None
dst : str, optional
A string denoting the destination folder where the file
containing emissions totals will be written.
Default value: ./benchmark
subdst : str, optional
A string denoting the sub-directory of dst where PDF
files containing plots will be written. In practice,
subdst is only needed for the 1-year benchmark output,
and denotes a date string (such as "Jan2016") that
corresponds to the month that is being plotted.
Default value: None
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False.
ref_hdr_label : str, optional
Label for Ref, placed after refstr in the file header.
Default value: ""
dev_hdr_label : str, optional
Label for Dev, placed after devstr in the file header.
Default value: ""
ref_met_extra : str, optional
Path to ref Met file containing area data for use with restart files
which do not contain the Area variable.
Default value: ''
dev_met_extra : str, optional
Path to dev Met file containing area data for use with restart files
which do not contain the Area variable.
Default value: ''
"""
# ==================================================================
# Initialization
# ==================================================================
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ==================================================================
# Define destination directory
# ==================================================================
if os.path.isdir(dst) and not overwrite:
msg = "Directory {} exists. Pass overwrite=True to overwrite " \
+ "files in that directory, if any."
msg = msg.format(dst)
raise ValueError(msg)
if not os.path.isdir(dst):
try:
os.makedirs(dst)
except FileExistsError:
pass
# ==================================================================
# Read data from netCDF into Dataset objects
# ==================================================================
# Read data
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=xr.SerializationWarning)
refds = xr.open_dataset(ref, drop_variables=SKIP_THESE_VARS)
devds = xr.open_dataset(dev, drop_variables=SKIP_THESE_VARS)
# ==================================================================
# Update GCHP restart dataset (if any)
# ==================================================================
# If the data is from a GCHP restart file, rename variables and
# flip levels to match the GEOS-Chem Classic naming and level
# conventions. Otherwise no changes will be made.
refds = rename_and_flip_gchp_rst_vars(refds)
devds = rename_and_flip_gchp_rst_vars(devds)
# ==================================================================
# Make sure that all necessary meteorological variables are found
# ==================================================================
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=xr.SerializationWarning)
# Find the area variable in Ref
if ref_met_extra is None:
ref_area = get_area_from_dataset(refds)
else:
ref_area = get_area_from_dataset(
xr.open_dataset(
ref_met_extra,
drop_variables=SKIP_THESE_VARS
)
)
# Find the area variable in Dev
if dev_met_extra is None:
dev_area = get_area_from_dataset(devds)
else:
dev_area = get_area_from_dataset(
xr.open_dataset(
dev_met_extra,
drop_variables=SKIP_THESE_VARS
)
)
# Find required meteorological variables in Ref
# (or exit with an error if we can't find them)
metvar_list = ["Met_DELPDRY", "Met_BXHEIGHT", "Met_TropLev"]
refmet = get_variables_from_dataset(refds, metvar_list)
devmet = get_variables_from_dataset(devds, metvar_list)
# ==================================================================
# Make sure that all necessary species are found
# ==================================================================
# Get lists of variables names in datasets
vardict = compare_varnames(refds, devds, quiet=not verbose)
commonvars = vardict["commonvars3D"]
refonly = vardict['refonly']
devonly = vardict['devonly']
# Narrow down the lists to only include species
commonspc = [v for v in commonvars if "SpeciesRst_" in v]
refonlyspc = [v for v in refonly if v.startswith('SpeciesRst_')]
devonlyspc = [v for v in devonly if v.startswith('SpeciesRst_')]
# Add ref only species to dev dataset with all nan values
if refonlyspc:
for v in refonlyspc:
devds[v] = devds[commonspc[0]]
devds[v].data = np.full(devds[v].shape, np.nan)
devds[v].attrs['units'] = refds[v].units
commonspc.append(v)
# Add dev only species to ref dataset with all nan values
if devonlyspc:
for v in devonlyspc:
refds[v] = refds[commonspc[0]]
refds[v].data = np.full(refds[v].shape, np.nan)
devds[v].attrs['units'] = refds[v].units
commonspc.append(v)
# Set list of variables to print in mass table. If this list was passed
# as argument, check that all the vars are now in commonspc to ensure
# in both datasets.
if varlist:
for v in varlist:
if v not in commonspc:
raise ValueError(
f"{dst} folder error: Variable {v} in varlist passed to make_benchmark_mass_tables is not present in Ref and Dev datasets"
)
else:
varlist = commonspc
# Sort the list of species to be printed alphabetically
varlist.sort()
# ==================================================================
# Create the mask arrays for the troposphere for Ref and Dev
# ==================================================================
ref_tropmask = get_troposphere_mask(refmet)
dev_tropmask = get_troposphere_mask(devmet)
# ==================================================================
# Create a dictionary to hold all of the meterological
# variables and mask variables that we need to pass down
# ==================================================================
met_and_masks = {
"Ref_Area": ref_area,
"Dev_Area": dev_area,
"Ref_Delta_P": refmet["Met_DELPDRY"],
"Dev_Delta_P": devmet["Met_DELPDRY"],
"Ref_BxHeight": refmet["Met_BXHEIGHT"],
"Dev_BxHeight": devmet["Met_BXHEIGHT"],
"Ref_TropMask": ref_tropmask,
"Dev_TropMask": dev_tropmask,
}
# ==================================================================
# Create global mass table
# ==================================================================
if subdst is not None:
mass_filename = f"GlobalMass_TropStrat_{subdst}.txt"
else:
mass_filename = "GlobalMass_TropStrat.txt"
mass_file = os.path.join(dst, mass_filename)
create_global_mass_table(
refds,
refstr,
devds,
devstr,
varlist,
met_and_masks,
spcdb_files,
ref_hdr_label=ref_hdr_label,
dev_hdr_label=dev_hdr_label,
outfilename=mass_file,
verbose=verbose,
)
# ==================================================================
# Create tropospheric mass table
# ==================================================================
# If a file name has not been specified, then use the "filename"
# keyword argument. Otherwise generate a default filename.
if subdst is not None:
mass_filename = f"GlobalMass_Trop_{subdst}.txt"
else:
mass_filename = 'GlobalMass_Trop.txt'
mass_file = os.path.join(dst, mass_filename)
create_global_mass_table(
refds,
refstr,
devds,
devstr,
varlist,
met_and_masks,
spcdb_files,
ref_hdr_label=ref_hdr_label,
dev_hdr_label=dev_hdr_label,
outfilename=mass_file,
trop_only=True,
verbose=verbose,
)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
gc.collect()
[docs]
def make_benchmark_mass_accumulation_tables(
ref_start,
ref_end,
refstr,
refperiodstr,
dev_start,
dev_end,
devstr,
devperiodstr,
spcdb_files,
varlist=None,
dst="./benchmark",
subdst=None,
overwrite=False,
verbose=False,
label="at end of simulation",
):
"""
Creates a text file containing global mass totals by species and
category for benchmarking purposes.
Parameters
----------
ref_start : list of str
Pathname that will constitute
the "Ref" (aka "Reference") data set.
ref_end : list of str
Pathname that will constitute
the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
refperiodstr : str
Ref simulation period start and end.
dev_start : list of str
Pathname that will constitute
the "Dev" (aka "Development") data set. The "Dev"
data set will be compared against the "Ref" data set.
dev_end : list of str
Pathname that will constitute
the "Dev" (aka "Development") data set. The "Dev"
data set will be compared against the "Ref" data set.
devstr : str
A string to describe dev (e.g. version number).
devperiodstr : str
Dev simulation period start and end.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
varlist : list of str, optional
List of variables to include in the list of totals.
If omitted, then all variables that are found in either
"Ref" or "Dev" will be included. The varlist argument
can be a useful way of reducing the number of
variables during debugging and testing.
Default value: None
dst : str, optional
A string denoting the destination folder where the file
containing emissions totals will be written.
Default value: ./benchmark
subdst : str, optional
A string denoting the sub-directory of dst where PDF
files containing plots will be written. In practice,
subdst is only needed for the 1-year benchmark output,
and denotes a date string (such as "Jan2016") that
corresponds to the month that is being plotted.
Default value: None
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False.
"""
# ==================================================================
# Initialization
# ==================================================================
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ==================================================================
# Define destination directory
# ==================================================================
if os.path.isdir(dst) and not overwrite:
msg = "Directory {} exists. Pass overwrite=True to overwrite " \
+ "files in that directory, if any."
msg = msg.format(dst)
raise ValueError(msg)
if not os.path.isdir(dst):
try:
os.makedirs(dst)
except FileExistsError:
pass
# ==================================================================
# Read data from netCDF into Dataset objects
# ==================================================================
print('Creating mass accumulation tables from four restart files:')
print(f' Ref start: {ref_start}')
print(f' Ref end: {ref_end}')
print(f' Dev start: {dev_start}')
print(f' Dev end: {dev_end}')
# Read data
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=xr.SerializationWarning)
refSds = xr.open_dataset(ref_start, drop_variables=SKIP_THESE_VARS)
refEds = xr.open_dataset(ref_end, drop_variables=SKIP_THESE_VARS)
devSds = xr.open_dataset(dev_start, drop_variables=SKIP_THESE_VARS)
devEds = xr.open_dataset(dev_end, drop_variables=SKIP_THESE_VARS)
# ==================================================================
# Update GCHP restart dataset if needed
# ==================================================================
# If the data is from a GCHP restart file, rename variables and
# flip levels to match the GEOS-Chem Classic naming and level
# conventions. Otherwise no changes will be made.
refSds = rename_and_flip_gchp_rst_vars(refSds)
refEds = rename_and_flip_gchp_rst_vars(refEds)
devSds = rename_and_flip_gchp_rst_vars(devSds)
devEds = rename_and_flip_gchp_rst_vars(devEds)
# Add area to start restart dataset if area in end but not start
# Need to consider area variable names used in both GC-Classic and GCHP
# Should put this in a function (todo)
refSkeys = refSds.data_vars.keys()
refEkeys = refEds.data_vars.keys()
devSkeys = devSds.data_vars.keys()
devEkeys = devEds.data_vars.keys()
areaVars = ["Met_AREAM2", "AREA"]
for areaVar in areaVars:
if areaVar in refEkeys and areaVar not in refSkeys:
refSds[areaVar] = refEds[areaVar]
if areaVar in devEkeys and areaVar not in devSkeys:
devSds[areaVar] = devEds[areaVar]
# ==================================================================
# Make sure that all necessary meteorological variables are found
# ==================================================================
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=xr.SerializationWarning)
# Find the area variable in Ref
refs_area = get_area_from_dataset(refSds)
refe_area = get_area_from_dataset(refEds)
# Find the area variable in Dev
devs_area = get_area_from_dataset(devSds)
deve_area = get_area_from_dataset(devEds)
# Find required meteorological variables in Ref
# (or exit with an error if we can't find them)
metvar_list = ["Met_DELPDRY", "Met_BXHEIGHT", "Met_TropLev"]
refsmet = get_variables_from_dataset(refSds, metvar_list)
refemet = get_variables_from_dataset(refEds, metvar_list)
devsmet = get_variables_from_dataset(devSds, metvar_list)
devemet = get_variables_from_dataset(devEds, metvar_list)
# ==================================================================
# Make sure that all necessary species are found
# ==================================================================
# Get lists of variables names in datasets
vardict = compare_varnames(refSds, devSds, quiet=not verbose)
commonvars = vardict["commonvars3D"]
refonly = vardict['refonly']
devonly = vardict['devonly']
# Narrow down the lists to only include species
commonspc = [v for v in commonvars if "SpeciesRst_" in v]
refonlyspc = [v for v in refonly if v.startswith('SpeciesRst_')]
devonlyspc = [v for v in devonly if v.startswith('SpeciesRst_')]
# Add ref only species to dev dataset with all nan values
if refonlyspc:
for v in refonlyspc:
devSds[v] = devSds[commonspc[0]]
devSds[v].data = np.full(devSds[v].shape, np.nan)
devSds[v].attrs['units'] = refSds[v].units
devEds[v] = devEds[commonspc[0]]
devEds[v].data = np.full(devEds[v].shape, np.nan)
devEds[v].attrs['units'] = refEds[v].units
commonspc.append(v)
# Add dev only species to ref dataset with all nan values
if devonlyspc:
for v in devonlyspc:
refSds[v] = refSds[commonspc[0]]
refSds[v].data = np.full(refSds[v].shape, np.nan)
devSds[v].attrs['units'] = refSds[v].units
refEds[v] = refEds[commonspc[0]]
refEds[v].data = np.full(refEds[v].shape, np.nan)
devEds[v].attrs['units'] = refEds[v].units
commonspc.append(v)
# Set list of variables to print in mass table. If this list was passed
# as argument, check that all the vars are now in commonspc to ensure
# in both datasets.
if varlist:
for v in varlist:
if v not in commonspc:
raise ValueError(
f"{dst} folder error: Variable {v} in varlist passed to make_benchmark_mass_tables is not present in Ref and Dev datasets"
)
else:
varlist = commonspc
# Sort the list of species to be printed alphabetically
varlist.sort()
# ==================================================================
# Create the mask arrays for the troposphere for Ref and Dev
# ==================================================================
refs_tropmask = get_troposphere_mask(refsmet)
refe_tropmask = get_troposphere_mask(refemet)
devs_tropmask = get_troposphere_mask(devsmet)
deve_tropmask = get_troposphere_mask(devemet)
# ==================================================================
# Create a dictionary to hold all of the meterological
# variables and mask variables that we need to pass down
# ==================================================================
met_and_masks = {
"Refs_Area": refs_area,
"Refe_Area": refe_area,
"Devs_Area": devs_area,
"Deve_Area": deve_area,
"Refs_Delta_P": refsmet["Met_DELPDRY"],
"Refe_Delta_P": refemet["Met_DELPDRY"],
"Devs_Delta_P": devsmet["Met_DELPDRY"],
"Deve_Delta_P": devemet["Met_DELPDRY"],
"Refs_BxHeight": refsmet["Met_BXHEIGHT"],
"Refe_BxHeight": refemet["Met_BXHEIGHT"],
"Devs_BxHeight": devsmet["Met_BXHEIGHT"],
"Deve_BxHeight": devemet["Met_BXHEIGHT"],
"Refs_TropMask": refs_tropmask,
"Refe_TropMask": refe_tropmask,
"Devs_TropMask": devs_tropmask,
"Deve_TropMask": deve_tropmask,
}
# ==================================================================
# Create global mass accumulation table
# ==================================================================
if subdst is not None:
mass_filename = f"GlobalMassAccumulation_TropStrat_{subdst}.txt"
else:
mass_filename = "GlobalMassAccumulation_TropStrat.txt"
mass_file = os.path.join(dst, mass_filename)
create_mass_accumulation_table(
refSds,
refEds,
refstr,
refperiodstr,
devSds,
devEds,
devstr,
devperiodstr,
varlist,
met_and_masks,
label,
outfilename=mass_file,
verbose=verbose,
spcdb_files=spcdb_files,
)
## ==================================================================
## Create tropospheric mass table
## ==================================================================
#if subdst is not None:
# mass_filename = f"GlobalMassAccumulation_Trop_{subdst}.txt"
#else:
# mass_filename = 'GlobalMassAccumulation_Trop.txt'
#mass_file = os.path.join(dst, mass_filename)
#create_mass_accumulation_table(
# refSds,
# refEds,
# refstr,
# devSds,
# devEds,
# devstr,
# varlist,
# met_and_masks,
# label,
# outfilename=mass_file,
# trop_only=True,
# verbose=verbose,
# spcdb_files=spcdb_files,
#)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refSds
del refEds
del devSds
del devEds
gc.collect()
[docs]
def make_benchmark_oh_metrics(
ref,
refmet,
refstr,
dev,
devmet,
devstr,
dst="./benchmark",
overwrite=False,
):
"""
Creates a text file containing metrics of global mean OH, MCF lifetime,
and CH4 lifetime for benchmarking purposes.
Parameters
----------
ref : str
Path name of "Ref" (aka "Reference") data set file.
refmet : str
Path name of ref meteorology data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name of "Dev" (aka "Development") data set file.
The "Dev" data set will be compared against the "Ref" data set.
devmet : list of str
Path name of dev meteorology data set.
devstr : str
A string to describe dev (e.g. version number).
dst : str, optional
A string denoting the destination folder where the file
containing emissions totals will be written.
Default value: "./benchmark"
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
"""
# ==================================================================
# Initialization
# ==================================================================
# Define destination directory
make_directory(dst, overwrite)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ==================================================================
# Read data from netCDF into Dataset objects
# ==================================================================
refds = xr.open_dataset(ref, drop_variables=SKIP_THESE_VARS)
devds = xr.open_dataset(dev, drop_variables=SKIP_THESE_VARS)
refmetds = xr.open_dataset(refmet, drop_variables=SKIP_THESE_VARS)
devmetds = xr.open_dataset(devmet, drop_variables=SKIP_THESE_VARS)
# ==================================================================
# Get tropopause mask
# ==================================================================
# Find the area variables in Ref and Dev
#ref_area = get_area_from_dataset(refds)
#dev_area = get_area_from_dataset(devds)
# Find required meteorological variables in Ref
# (or exit with an error if we can't find them)
metvar_list = [
"Met_AD",
"Met_AIRDEN",
"Met_BXHEIGHT",
"Met_T",
"Met_TropLev",
"FracOfTimeInTrop",
]
refmet = get_variables_from_dataset(refmetds, metvar_list)
devmet = get_variables_from_dataset(devmetds, metvar_list)
# Create the mask arrays for the troposphere for Ref and Dev
ref_tropmask = get_troposphere_mask(refmetds)
dev_tropmask = get_troposphere_mask(devmetds)
# ==================================================================
# Compute mass-weighted OH in the troposphere
# ==================================================================
# Ref
ref_oh_trop = np.ma.masked_array(refds["OHconcAfterChem"].values,
ref_tropmask)
ref_airmass_trop = np.ma.masked_array(refmetds["Met_AD"].values,
ref_tropmask)
ref_oh_mass = ref_oh_trop * ref_airmass_trop
ref_total_ohmass = np.sum(ref_oh_mass)
ref_total_airmass = np.sum(ref_airmass_trop)
ref_mean_oh = (ref_total_ohmass / ref_total_airmass) / 1e5
# Dev
dev_oh_trop = np.ma.masked_array(devds["OHconcAfterChem"].values,
dev_tropmask)
dev_airmass_trop = np.ma.masked_array(devmetds["Met_AD"].values,
dev_tropmask)
dev_oh_mass = dev_oh_trop * dev_airmass_trop
dev_total_ohmass = np.sum(dev_oh_mass)
dev_total_airmass = np.sum(dev_airmass_trop)
dev_mean_oh = (dev_total_ohmass / dev_total_airmass) / 1e5
# Diff
oh_diff = dev_mean_oh - ref_mean_oh
oh_pctdiff = ((dev_mean_oh - ref_mean_oh) / ref_mean_oh) * 100.0
# ==================================================================
# Compute MCF and CH4 lifetimes
# ==================================================================
# Select only boxes that are purely tropospheric
# This excludes influence from the stratosphere
ref_timetrop_mask = refmetds["FracOfTimeInTrop"].values != 1.0
dev_timetrop_mask = devmetds["FracOfTimeInTrop"].values != 1.0
# Get grid box volumes [cm3] (trop + strat)
ref_vol = (refmetds["Met_BXHEIGHT"] * refmetds["AREA"]) * 1e6
dev_vol = (devmetds["Met_BXHEIGHT"] * devmetds["AREA"]) * 1e6
# Get grid box volumes [cm3] (trop only)
ref_vol_trop = np.ma.masked_array(ref_vol.values, ref_timetrop_mask)
dev_vol_trop = np.ma.masked_array(dev_vol.values, dev_timetrop_mask)
# Get MCF and CH4 density [molec/cm3] (trop + strat)
# Assume that species is evenly distributed in air, with
# a mixing ratio of 1. Thus species density = air density.
ref_dens = refmetds["Met_AIRDEN"] / 1e6
dev_dens = devmetds["Met_AIRDEN"] / 1e6
# Get MCF and CH4 density [molec/cm3] (trop only)
ref_dens_trop = np.ma.masked_array(ref_dens.values, ref_timetrop_mask)
dev_dens_trop = np.ma.masked_array(dev_dens.values, dev_timetrop_mask)
# Get temperature [K] (trop only)
ref_temp = np.ma.masked_array(refmetds["Met_T"].values, ref_timetrop_mask)
dev_temp = np.ma.masked_array(devmetds["Met_T"].values, dev_timetrop_mask)
# Compute Arrhenius parameter K [cm3/molec/s]
ref_mcf_k = 1.64e-12 * np.exp(-1520e0 / ref_temp)
dev_mcf_k = 1.64e-12 * np.exp(-1520e0 / dev_temp)
ref_ch4_k = 2.45e-12 * np.exp(-1775e0 / ref_temp)
dev_ch4_k = 2.45e-12 * np.exp(-1775e0 / dev_temp)
# Numerator: Total atmospheric (trop+strat) burden
ref_num = np.sum(ref_dens.values * ref_vol.values)
dev_num = np.sum(dev_dens.values * dev_vol.values)
# Denominator: Loss rate in troposphere
ref_mcf_denom = np.sum(ref_mcf_k * ref_oh_trop *
ref_dens_trop * ref_vol_trop)
dev_mcf_denom = np.sum(dev_mcf_k * dev_oh_trop *
dev_dens_trop * dev_vol_trop)
ref_ch4_denom = np.sum(ref_ch4_k * ref_oh_trop *
ref_dens_trop * ref_vol_trop)
dev_ch4_denom = np.sum(dev_ch4_k * dev_oh_trop *
dev_dens_trop * dev_vol_trop)
# Compute lifetimes [years]
sec_to_year = 365.25 * 86400.0
ref_mcf_lifetime = (ref_num / ref_mcf_denom) / sec_to_year
dev_mcf_lifetime = (dev_num / dev_mcf_denom) / sec_to_year
ref_ch4_lifetime = (ref_num / ref_ch4_denom) / sec_to_year
dev_ch4_lifetime = (dev_num / dev_ch4_denom) / sec_to_year
# Compute differences
mcf_diff = dev_mcf_lifetime - ref_mcf_lifetime
ch4_diff = dev_ch4_lifetime - ref_ch4_lifetime
mcf_pctdiff = ((dev_mcf_lifetime - ref_mcf_lifetime) /
ref_mcf_lifetime) * 100.0
ch4_pctdiff = ((dev_ch4_lifetime - ref_ch4_lifetime) /
ref_ch4_lifetime) * 100.0
# ==================================================================
# Define function for writing metrics to file
# ==================================================================
def print_metrics_to_file(f, title1, title2, ref, dev, diff, pctdiff):
print("#" * 79, file=f)
print(f"{title1 : <76}{'###'}", file=f)
print(f"{title2 : <76}{'###'}", file=f)
print("#" * 79, file=f)
print("'{Ref' : <15}{'Dev' : <13}{'Dev - Ref` : <13}{'% diff' : <11}",
file=f)
print("{ref:11.6f} {dev:11.6f} {diff:11.6f} {pctdiff:9.4f}", file=f)
# ==================================================================
# Print metrics to file
# ==================================================================
# Create file
outfilename = os.path.join(dst, "OH_metrics.txt")
f = open(outfilename, "w")
# Write mean OH
title1 = "### Global mass-weighted OH concentration [1e5 molec/cm3]"
title2 = f"### Ref = {refstr}; Dev = {devstr}"
print_metrics_to_file(f, title1, title2, ref_mean_oh, dev_mean_oh,
oh_diff, oh_pctdiff)
# Write MCF lifetime
title1 = "### MCF lifetime w/r/t tropospheric OH [years]"
title2 = f"### Ref = {refstr}; Dev = {devstr}"
print_metrics_to_file(f, title1, title2, ref_mcf_lifetime,
dev_mcf_lifetime, mcf_diff, mcf_pctdiff)
# Write CH4 lifetime
title1 = "### CH4 lifetime w/r/t tropospheric OH [years]"
title2 = f"### Ref = {refstr}; Dev = {devstr}"
print_metrics_to_file(f, title1, title2, ref_ch4_lifetime,
dev_ch4_lifetime, ch4_diff, ch4_pctdiff)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
del refmetds
del devmetds
gc.collect()
[docs]
def make_benchmark_wetdep_plots(
ref,
refstr,
dev,
devstr,
collection,
spcdb_files,
dst="./benchmark",
cmpres=None,
datestr=None,
overwrite=False,
verbose=False,
benchmark_type="TransportTracersBenchmark",
plots=["sfc", "500hpa", "zonalmean"],
log_color_scale=False,
normalize_by_area=False,
areas=None,
refmet=None,
devmet=None,
weightsdir='.',
n_job=-1,
time_mean=False,
):
"""
Creates PDF files containing plots of species concentration
for model benchmarking purposes.
Parameters
----------
ref : str
Path name for the "Ref" (aka "Reference") data set.
refstr : str
A string to describe ref (e.g. version number).
dev : str
Path name for the "Dev" (aka "Development") data set.
This data set will be compared against the "Reference"
data set.
devstr : str
A string to describe dev (e.g. version number).
collection : str
String name of collection to plot comparisons for.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
dst : str, optional
A string denoting the destination folder where a PDF
file containing plots will be written.
Default value: ./benchmark
cmpres : str, optional
Grid resolution at which to compare ref and dev data, e.g. '1x1.25'.
datestr : str, optional
A string with date information to be included in both the
plot pdf filename and as a destination folder subdirectory
for writing plots.
Default value: None
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False.
verbose : bool, optional
Set this flag to True to print extra informational output.
Default value: False.
benchmark_type : str, optional
A string denoting the type of benchmark output to plot, options are
FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
Default value: "FullChemBenchmark"
plots : list of str, optional
List of plot types to create.
Default value: ['sfc', '500hpa', 'zonalmean']
log_color_scale : bool, optional
Set this flag to True to enable plotting data (not diffs)
on a log color scale.
normalize_by_area : bool, optional
Set this flag to true to enable normalization of data
by surface area (i.e. kg s-1 --> kg s-1 m-2).
Default value: False
areas : dict of xr.DataArray, optional
Grid box surface areas in m2 on Ref and Dev grids.
Default value: None
refmet : str, optional
Path name for ref meteorology.
Default value: None
devmet : str, optional
Path name for dev meteorology.
Default value: None
weightsdir : str, optional
Directory in which to place (and possibly reuse) xESMF regridder
netCDF files.
Default value: '.'
n_job : int, optional
Defines the number of simultaneous workers for parallel plotting.
Set to 1 to disable parallel plotting. Value of -1 allows the
application to decide.
Default value: -1
time_mean : bool, optional
Determines if we should average the datasets over time.
Default value: False
"""
# Create destination plot directory
make_directory(dst, overwrite)
# Make a collection subdirectory
targetdst = os.path.join(dst, collection)
if not os.path.isdir(targetdst):
os.mkdir(targetdst)
# If datestr is passed, create a further subdirectory
if datestr is not None:
targetdst = os.path.join(targetdst, datestr)
if not os.path.isdir(targetdst):
os.mkdir(targetdst)
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Get the function that will read file(s) into a dataset
reader = dataset_reader(time_mean, verbose=verbose)
# Open datasets
refds = reader(ref, drop_variables=SKIP_THESE_VARS)
devds = reader(dev, drop_variables=SKIP_THESE_VARS)
# Open met datasets if passed as arguments
refmetds = None
devmetds = None
if refmet is not None:
refmetds = reader(refmet, drop_variables=SKIP_THESE_VARS)
if devmet is not None:
devmetds = reader(devmet, drop_variables=SKIP_THESE_VARS)
# Compute mean of data over the time dimension (if time_mean=True)
if time_mean:
refds = dataset_mean(refds)
devds = dataset_mean(devds)
if refmet is not None:
refmetds = dataset_mean(refmetds)
if devmet is not None:
devmetds = dataset_mean(devmetds)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
# Turn this off for now since add_missing_variables inserts GCC area into
# GCHP files, which causes problems with area normalization (ewl)
#[refds, devds] = add_missing_variables(refds, devds)
# Get list of variables in collection
vardict = compare_varnames(refds, devds, quiet=not verbose)
varlist = [v for v in vardict["commonvars3D"] if collection + "_" in v]
varlist.sort()
# Surface plots
if "sfc" in plots:
if datestr is not None:
plotfilename = f"{collection}_Surface_{datestr}.pdf"
else:
plotfilename = f"{collection}_Surface.pdf"
pdfname = os.path.join(targetdst, plotfilename)
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
cmpres=cmpres,
ilev=0,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
normalize_by_area=normalize_by_area,
extra_title_txt=datestr,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=collection + '_',
verbose=verbose)
# 500 hPa plots
if "500hpa" in plots:
if datestr is not None:
plotfilename = f"{collection}_500hPa_{datestr}.pdf"
else:
plotfilename = f"{collection}_500hPa.pdf"
pdfname = os.path.join(targetdst, plotfilename)
compare_single_level(
refds,
refstr,
devds,
devstr,
varlist=varlist,
cmpres=cmpres,
ilev=22,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
normalize_by_area=normalize_by_area,
extra_title_txt=datestr,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=collection + '_',
verbose=verbose
)
# Zonal mean plots
if "zonalmean" in plots or "zm" in plots:
# Full column
if datestr is not None:
plotfilename = f"{collection}_FullColumn_ZonalMean_{datestr}.pdf"
else:
plotfilename = f"{collection}_FullColumn_ZonalMean.pdf"
pdfname = os.path.join(targetdst, plotfilename)
compare_zonal_mean(
refds,
refstr,
devds,
devstr,
varlist=varlist,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
log_color_scale=log_color_scale,
normalize_by_area=normalize_by_area,
extra_title_txt=datestr,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files,
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=collection + '_',
verbose=verbose
)
# Stratosphere
if datestr is not None:
plotfilename = f"{collection}_Strat_ZonalMean_{datestr}.pdf"
else:
plotfilename = f"{collection}_Strat_ZonalMean.pdf"
pdfname = os.path.join(targetdst, plotfilename)
compare_zonal_mean(
refds,
refstr,
devds,
devstr,
varlist=varlist,
refmet=refmetds,
devmet=devmetds,
pdfname=pdfname,
pres_range=[1, 100],
log_yaxis=True,
extra_title_txt=datestr,
normalize_by_area=normalize_by_area,
weightsdir=weightsdir,
n_job=n_job,
spcdb_files=spcdb_files
)
add_bookmarks_to_pdf(
pdfname,
varlist,
remove_prefix=collection + '_',
verbose=verbose
)
# -------------------------------------------
# Clean up
# -------------------------------------------
del refds
del devds
del refmetds
del devmetds
gc.collect()
[docs]
def make_benchmark_aerosol_tables(
devdir,
devlist_aero,
devlist_spc,
devlist_met,
devstr,
year,
days_per_mon,
spcdb_files,
dst='./benchmark',
overwrite=False,
is_gchp=False,
):
"""
Compute FullChemBenchmark aerosol budgets & burdens.
Parameters
----------
devdir : str
Path to development ("Dev") data directory.
devlist_aero : list of str
List of Aerosols collection files (different months).
devlist_spc : list of str
List of SpeciesConc collection files (different months).
devlist_met : list of str
List of meteorology collection files (different months).
devstr : str
Descriptive string for datasets (e.g. version number).
year : str
The year of the benchmark simulation (e.g. '2016').
days_per_mon : list of int
List of number of days per month for all months.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
dst : str, optional
Directory where budget tables will be created.
Default value: './benchmark'
overwrite : bool, optional
Overwrite burden & budget tables?
Default value: False
is_gchp : bool, optional
Whether datasets are for GCHP.
Default value: False
"""
# Create destination directory
make_directory(dst, overwrite)
# List of species (and subsets for the trop & strat)
species_list = [
"BCPI", "OCPI", "SO4", "DST1", "DST2", "DST3", "DST4",
"DSTbin1", "DSTbin2", "DSTbin3", "DSTbin4", "DSTbin5",
"DSTbin6", "DSTbin7", "SALA", "SALC"
]
# Read the species database files in the Ref & Dev rundirs, and
# return a dict containing metadata for the union of species.
# We'll need properties such as mol. wt. for unit conversions, etc.
_, dev_metadata = read_species_metadata(spcdb_files, quiet=True)
# Get the list of relevant AOD diagnostics from a YAML file
ifile= AOD_SPC
aod = read_config_file(
os.path.join(
os.path.dirname(__file__),
ifile,
),
quiet=True
)
ds_aer = xr.open_mfdataset(
devlist_aero,
drop_variables=SKIP_THESE_VARS
)
ds_spc = xr.open_mfdataset(
devlist_spc,
drop_variables=SKIP_THESE_VARS
)
ds_met = xr.open_mfdataset(
devlist_met,
drop_variables=SKIP_THESE_VARS
)
# Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new
# naming introduced in GEOS-Chem 14.1.0
ds_spc = rename_speciesconc_to_speciesconcvv(ds_spc)
# Trim species_list to the variables that are only in the dataset
varlist = [
var for var in species_list if f"SpeciesConcVV_{var}" in ds_spc.data_vars
]
# Molecular weights [g mol-1], as taken from the species database
mw = {}
full_names = {}
for var in varlist:
full_names[var] = dev_metadata[var]["FullName"].strip()
mw[var] = dev_metadata[var]["MW_g"]
mw["Air"] = MW_AIR_g
# Get troposphere mask
tropmask = get_troposphere_mask(ds_met)
# Get number of months
n_mon = len(days_per_mon)
# ------------------------------------------------------------------
# Surface area
# (kludgey but it works - revisit this)
# ------------------------------------------------------------------
# Get number of vertical levels
N_LEVS = ds_spc.dims["lev"]
if is_gchp:
area = ds_met["Met_AREAM2"].values
a = area.shape
area_m2 = np.zeros([a[0], N_LEVS, a[1], a[2], a[3]])
for t in range(n_mon):
for k in range(N_LEVS):
area_m2[t, k, :, :, :] = area[t, :, :, :]
total_area_m2 = np.sum(area_m2[0, 0, :, :, :])
else:
area = ds_met["AREA"].values
a = area.shape
area_m2 = np.zeros([a[0], N_LEVS, a[1], a[2]])
for t in range(n_mon):
for k in range(N_LEVS):
area_m2[t, k, :, :] = area[t, :, :]
total_area_m2 = np.sum(area_m2[0, 0, :, :])
# ------------------------------------------------------------------
# Conversion factors and time increments
# ------------------------------------------------------------------
# v/v dry --> Tg
vv_to_Tg = {}
for var in varlist:
if f"SpeciesConcVV_{var}" in ds_spc.data_vars:
vv_to_Tg[var] = \
ds_met["Met_AD"].values * (mw[var] / mw["Air"]) * 1e-9
# Days in the benchmark duration
days_per_yr = np.sum(days_per_mon)
# ------------------------------------------------------------------
# Define function to print tables
# ------------------------------------------------------------------
def print_aerosol_metrics(data, varlist, namelist, filename, title, label):
with open(filename, "w+", encoding=ENCODING) as ofile:
# Print top header
print("%" * 79, file=ofile)
print(f" {title} for {year} in {devstr}", file=ofile)
print(" (weighted by the number of days per month)", file=ofile)
print("%" * 79, file=ofile)
line = "\n" + " " *67 + "Strat Trop Strat+Trop\n"
line += " " * 67 + "----------- ---------- ----------"
print(line, file=ofile)
# Print data
for var in varlist:
line = f"{namelist[var] : <41} ({var : <7}) {label} : {data[var + '_s']:11.9f} {data[var + '_t']:10.8f} {data[var + '_f']:10.8f}\n"
print(line, file=ofile)
# ------------------------------------------------------------------
# Compute aerosol burdens [Tg] and print
# ------------------------------------------------------------------
# Table info
filename = f"{dst}/Aerosol_Burdens.txt"
if n_mon == 12:
title = "Annual average global aerosol burdens"
else:
title = f"Average global aerosol burdens across {n_mon} months"
label = "burden [Tg]"
# Initialize
q = {}
q_sum_f = np.zeros(n_mon)
q_sum_t = np.zeros(n_mon)
q_sum_s = np.zeros(n_mon)
burdens = {}
# Define the axes we need to sum over to make monthly sums
if is_gchp:
sum_axes = (1, 2, 3, 4)
else:
sum_axes = (1, 2, 3)
# Loop over species
for var in varlist:
# Whole-atmosphere and trop-only quantities [g]
# NOTE: DryDep is by nature trop-only
varname = f"SpeciesConcVV_{var}"
q[var + "_f"] = ds_spc[varname].values * vv_to_Tg[var]
q[var + "_t"] = np.ma.masked_array(q[var + "_f"], tropmask)
# Compute monthly sums, weighted by the number of days per month
q_sum_f = np.sum(q[var + "_f"], axis=sum_axes) * days_per_mon
q_sum_t = np.sum(q[var + "_t"], axis=sum_axes) * days_per_mon
q_sum_s = q_sum_f - q_sum_t
# Compute annual averages
burdens[var + "_f"] = np.sum(q_sum_f) / days_per_yr
burdens[var + "_t"] = np.sum(q_sum_t) / days_per_yr
burdens[var + "_s"] = np.sum(q_sum_s) / days_per_yr
print_aerosol_metrics(burdens, varlist, full_names, filename, title, label)
# ------------------------------------------------------------------
# Define function to print tables
# ------------------------------------------------------------------
def print_aods(data, varlist, filename, title):
with open(filename, "w+", encoding=ENCODING) as ofile:
# Print top header
print("%" * 79, file=ofile)
print(f" {title} for {year} in {devstr}", file=ofile)
print(" (weighted by the number of days per month)", file=ofile)
print("%" * 79, file=ofile)
line = "\n" + " " *32 + "Strat Trop Strat+Trop\n"
line += " " * 32 + "----------- ---------- ----------"
print(line, file=ofile)
# Print data
for var in varlist:
line = f"{var : <4} column optical depth [1]: {data[var + '_s']:11.9f} {data[var + '_t']:10.8f} {data[var + '_f']:10.8f}\n"
print(line, file=ofile)
# ------------------------------------------------------------------
# Compute average AOD's [Tg] and print
# ------------------------------------------------------------------
# Table info
filename = f"{dst}/Global_Mean_AOD.txt"
if n_mon == 12:
title = "Annual average global AODs"
else:
title = f"Average global AODs across {n_mon} months"
label = "mean AOD [1]"
# Initialize
q = {}
q_sum_f = np.zeros(n_mon)
q_sum_t = np.zeros(n_mon)
q_sum_s = np.zeros(n_mon)
aods = {}
# Define axes to sum over, and total surface area
if is_gchp:
sum_axes = (1, 2, 3, 4)
else:
sum_axes = (1, 2, 3)
# List of AOD species to plot
varlist = [var for var in aod.keys() if "Dust" in var or "Hyg" in var]
if is_gchp:
varlist = [var.replace('550nm', 'WL1') for var in varlist]
# Loop over AOD variables
aod_list = []
for varname in varlist:
# Extract the s
if "Dust" in varname:
var = "Dust"
elif "Total" in varname:
continue
else:
var = varname.split("_")[1]
aod_list.append(var)
# Whole-atmosphere AOD [1]
q[var + "_f"] = ds_aer[varname].values
# Tropospheric-only AOD [1]
q[var + "_t"] = np.ma.masked_array(q[var + "_f"], tropmask)
# Create monthly sums, weighted by the number of days per month
q_sum_f = np.sum(q[var + "_f"] * area_m2, axis=sum_axes) * days_per_mon
q_sum_t = np.sum(q[var + "_t"] * area_m2, axis=sum_axes) * days_per_mon
q_sum_s = q_sum_f - q_sum_t
# Take annual averages
aods[var + "_f"] = np.sum(q_sum_f) / total_area_m2 / days_per_yr
aods[var + "_t"] = np.sum(q_sum_t) / total_area_m2 / days_per_yr
aods[var + "_s"] = np.sum(q_sum_s) / total_area_m2 / days_per_yr
print_aods(aods, aod_list, filename, title)
# ------------------------------------------------------------------
# Clean up
# ------------------------------------------------------------------
del ds_aer
del ds_spc
del ds_met
gc.collect()
[docs]
def make_benchmark_operations_budget(
refstr,
reffiles,
devstr,
devfiles,
ref_interval,
dev_interval,
spcdb_files,
benchmark_type=None,
label=None,
col_sections=["Full", "Trop", "PBL", "FixedLevs", "Strat"],
operations=["Chemistry", "Convection", "EmisDryDep",
"Mixing", "Transport", "WetDep"],
compute_accum=True,
compute_restart=False,
require_overlap=False,
dst='.',
species=None,
overwrite=True,
verbose=False,
):
"""
Prints the "operations budget" (i.e. change in mass after
each operation) from a GEOS-Chem benchmark simulation.
Parameters
----------
refstr : str
Labels denoting the "Ref" versions.
reffiles : list of str
Lists of files to read from the "Ref" version.
devstr : str
Labels denoting the "Dev" versions.
devfiles : list of str
Lists of files to read from "Dev" version.
ref_interval : float
Number of seconds in the ref diagnostic interval.
dev_interval : float
Number of seconds in the dev diagnostic interval.
spcdb_files : list
Paths to species_database.yml files in Ref & Dev rundirs.
benchmark_type : str, optional
A string denoting the type of benchmark output to plot, options are
FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
Default value: None
label : str, optional
Contains the date or date range for each dataframe title.
Default value: None
col_sections : list of str, optional
List of column sections to calculate global budgets for. May
include Strat eventhough not calculated in GEOS-Chem, but Full
and Trop must also be present to calculate Strat.
Default value: ["Full", "Trop", "PBL", "FixedLevs", "Strat"]
operations : list of str, optional
List of operations to calculate global budgets for. Accumulation
should not be included. It will automatically be calculated if
all GEOS-Chem budget operations are passed and optional arg
compute_accum is True.
Default value: ["Chemistry","Convection","EmisDryDep",
"Mixing","Transport","WetDep"]
compute_accum : bool, optional
Optionally turn on/off accumulation calculation. If True, will
only compute accumulation if all six GEOS-Chem operations budgets
are computed. Otherwise a message will be printed warning that
accumulation will not be calculated.
Default value: True
compute_restart : bool, optional
Optionally turn on/off calculation of mass change based on restart
file. Only functional for "Full" column section.
Default value: False
require_overlap : bool, optional
Whether to calculate budgets for only species that are present in
both Ref or Dev.
Default value: False
dst : str, optional
Directory where plots & tables will be created.
Default value: '.' (directory in which function is called)
species : list of str, optional
List of species for which budgets will be created.
Default value: None (all species)
overwrite : bool, optional
Denotes whether to overwrite existing budget file.
Default value: True
verbose : bool, optional
Set this switch to True if you wish to print out extra
informational messages.
Default value: False
"""
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# ------------------------------------------
# Column sections
# ------------------------------------------
# Print info. Only allow Strat if Trop and Full are present
print("Column sections:")
for col_section in col_sections:
print(f" {col_section}")
n_sections = len(col_sections)
compute_strat = False
if "Strat" in col_sections:
compute_strat = True
if "Full" not in col_sections or "Trop" not in col_sections:
msg = "Strat budget table requires Full and Trop column sections"
raise ValueError(msg)
print("*** Will compute Strat budget as difference of Full"
" and Trop ***")
# Make a subset of column sections not including Strat
gc_sections = [r for r in col_sections if "Strat" not in r]
# ------------------------------------------
# Operations
# ------------------------------------------
# Rename the optional argument to be clear it is GEOS-Chem budget
# operations
gc_operations = operations
# Handle whether to compute accumulation.
all_operations = gc_operations
if compute_accum and len(gc_operations) == 6:
all_operations = gc_operations + ["ACCUMULATION"]
if compute_restart:
all_operations = gc_operations + ["RESTART"]
n_ops = len(all_operations)
# Print info
print("Operations:")
for all_operation in all_operations:
print(f" {all_operation}")
if compute_accum:
if "ACCUMULATION" in all_operations:
print("*** Will compute ACCUMULATION operation as sum of all "
"GEOS-Chem operations ***")
else:
print("***Will not compute ACCUMULATION since not all GEOS-Chem"
" operation budgets will be computed.")
if compute_restart:
print("*** Will compute RESTART operation as mass change "
"based on simulation start and end restart files ***")
# ------------------------------------------
# Read data
# ------------------------------------------
# Assume this will be annual budget if interval greater than 3e7 sec
annual = ref_interval > 3.0e7
# Read data from disk (either one month or 12 months)
print('Opening ref and dev data')
skip_vars = SKIP_THESE_VARS
if annual:
ref_ds = xr.open_mfdataset(reffiles, drop_variables=skip_vars)
dev_ds = xr.open_mfdataset(devfiles, drop_variables=skip_vars)
else:
ref_ds = xr.open_dataset(reffiles, drop_variables=skip_vars)
dev_ds = xr.open_dataset(devfiles, drop_variables=skip_vars)
# TODO: Add section for reading files for computing mass from restart file
# ------------------------------------------
# Species
# ------------------------------------------
# Get information about variables in data files
vardict = compare_varnames(ref_ds, dev_ds, quiet=True)
refonly = vardict["refonly"]
devonly = vardict["devonly"]
cmnvars = vardict["commonvars2D"]
# Reduce each list to only variables containing "Budget" and not "Strat"
refonly = [v for v in refonly if "Budget" in v and "Strat" not in v]
devonly = [v for v in devonly if "Budget" in v and "Strat" not in v]
cmnvars = [v for v in cmnvars if "Budget" in v and "Strat" not in v]
# Special handling for fixed level budget diagnostic
# Get variable name prefix, e.g. Levs1to35. Check that all fixed level
# vars have the same prefix. Update section names used in table.
fixedlevvars = [v for v in cmnvars if "Budget" in v and "Levs" in v]
if fixedlevvars is not None:
fixedlevnames = [v[v.index('Levs'):].split("_")[0] for v in fixedlevvars]
if len(set(fixedlevnames)) > 1:
msg = "Budget fixed level diagnostic name must be constant!"
raise ValueError(msg)
col_sections = [v.replace('FixedLevs',fixedlevnames[0]) for v in col_sections]
gc_sections = [v.replace('FixedLevs',fixedlevnames[0]) for v in gc_sections]
# Get the species list, depending on if species was passed as argument.
if species is not None:
spclist = species
else:
# For each column section, get the union or intersection (depending
# on optional argument require_overlap) of all budget diagnostic
# species and sort alphabetically. A species is counted even if only
# present for as few as one operation.
spclists = {}
for gc_section in gc_sections:
refspc = [v.split("_")[1] for v in refonly if gc_section in v]
devspc = [v.split("_")[1] for v in devonly if gc_section in v]
cmnspc = [v.split("_")[1] for v in cmnvars if gc_section in v]
if require_overlap:
section_spc = list(set(cmnspc))
else:
section_spc = list(set(refspc + devspc + cmnspc))
section_spc.sort()
spclists[gc_section] = section_spc
# If calculating Strat, use intersection of Trop and Full
if "Strat" in col_sections:
spclists["Strat"] = list(set(spclists["Trop"] + spclists["Full"]))
# For now, define one species list as species combined across all
# column sections. Compute budgets of these species for each section.
# NOTE: eventually would want to be able to compute budget for only
# species per column section, not all species for all section. If that
# is implemented then handling of optional species list needs to be
# revisited.
spclist = []
for s in spclists:
spclist = spclist + spclists[s]
# Make list items unique and put in alphabetical order
spclist = list(set(spclist))
spclist.sort()
n_spc = len(spclist)
# ------------------------------------------
# Concentration units and if a wetdep species
# ------------------------------------------
# Load a YAML file containing species properties
ref_metadata, dev_metadata = read_species_metadata(
spcdb_files,
quiet=True
)
# Determine what the converted units and conversion factor should be
# based on benchmark type and species (tracer) name. Assume raw data [kg/s]
ref_conv_fac = {}
dev_conv_fac = {}
units = {}
is_wetdep = {}
for spc in spclist:
# Identify wetdep species
is_wetdep[spc] = None
ref_species_metadata = ref_metadata.get(spc)
dev_species_metadata = dev_metadata.get(spc)
if ref_species_metadata is not None and \
dev_species_metadata is not None:
is_wetdep[spc] = \
ref_species_metadata.get("Is_WetDep") or \
dev_species_metadata.get("Is_WetDep")
# Unit conversion factors and units
ref_conv_fac[spc] = ref_interval * 1e-6
dev_conv_fac[spc] = dev_interval * 1e-6
units[spc] = '[Gg]'
if benchmark_type is not None:
if "TransportTracers" in benchmark_type and "Tracer" not in spc:
ref_conv_fac[spc] = ref_interval
dev_conv_fac[spc] = dev_interval
if annual:
units[spc] = '[kg/yr]'
else:
units[spc] = '[kg]'
elif annual:
ref_conv_fac[spc] = ref_interval * 1e-9
dev_conv_fac[spc] = dev_interval * 1e-9
units[spc] = '[Tg/yr]'
# ------------------------------------------
# Create dataframe
# ------------------------------------------
columns = ["Column_Section", "Species", "Operation", "Ref_raw", "Dev_raw",
"Units_converted", "Ref", "Dev", "Diff", "Pct_diff"]
# Make column data to initialize with
col_section = list(itertools.chain.from_iterable(
itertools.repeat(x, n_ops * n_spc) for x in col_sections))
col_spc = list(itertools.chain.from_iterable(
itertools.repeat(x, n_ops) for x in spclist)) * n_sections
col_ops = all_operations * n_sections * n_spc
# Put the column data together into a dictionary
data = {
'Species': col_spc,
'Operation': col_ops,
'Column_Section': col_section,
}
# Create the dataframe from the data dictionary and column names list
df = pd.DataFrame(data, columns=columns)
# ------------------------------------------
# Populate dataframe for GEOS-Chem operations and column sections
# ------------------------------------------
print('Calculating budgets for all data operations and column sections...')
# Loop over sections (only those with data in files)
for gc_section in gc_sections:
# Keep track of progress in log
print(f" {gc_section}")
# Loop over species in that section
for i, spc in enumerate(spclist):
# Keep track of progress (debugging print)
#if (i + 1) % 50 == 0:
# print(f" {gc_section}: species {i + 1} of {n_spc}")
# Loop over operations (only those with data in files)
for gc_operation in gc_operations:
# Get the dataframe row to fill. Skip if not found.
dfrow = (df["Column_Section"] == gc_section) \
& (df["Species"] == spc) \
& (df["Operation"] == gc_operation)
if not any(dfrow):
continue
# Get the variable name in the datasets
varname = "Budget" + gc_operation + gc_section + "_" + spc
# Calculate Ref and dev raw as global sum
if varname in ref_ds.data_vars.keys():
refraw = ref_ds[varname].values.sum()
elif gc_operation == "WetDep" and is_wetdep[spc] is None:
refraw = 0.0
else:
refraw = np.nan
if varname in dev_ds.data_vars.keys():
devraw = dev_ds[varname].values.sum()
elif gc_operation == "WetDep" and is_wetdep[spc] is None:
devraw = 0.0
else:
devraw = np.nan
# Convert units
refconv = refraw * ref_conv_fac[spc]
devconv = devraw * dev_conv_fac[spc]
# Calculate diff and % diff from conc with converted units
if not np.isnan(refconv) and not np.isnan(devconv):
diff = devconv - refconv
try:
pctdiff = diff / refconv * 100
except BaseException:
pctdiff = np.nan
else:
diff = np.nan
pctdiff = np.nan
# Fill dataframe
df.loc[dfrow, "Ref_raw"] = refraw
df.loc[dfrow, "Dev_raw"] = devraw
df.loc[dfrow, "Units_converted"] = units[spc]
df.loc[dfrow, "Ref"] = refconv
df.loc[dfrow, "Dev"] = devconv
df.loc[dfrow, "Diff"] = diff
df.loc[dfrow, "Pct_diff"] = pctdiff
# ------------------------------------------
# Compute Strat for each data operation (if applicable)
# ------------------------------------------
if compute_strat:
print('Computing Strat budgets from Trop and Full')
# Loop over species
for i, spc in enumerate(spclist):
# Keep track of progress (debugging print)
#if (i + 1) % 50 == 0:
# print(f" Strat: species {i + 1} of {n_spc}")
# Loop over operations (only those with data in files)
for gc_operation in gc_operations:
# Get the strat dataframe row to fill. Skip if not found.
dfrow = (df["Column_Section"] == "Strat") \
& (df["Species"] == spc) \
& (df["Operation"] == gc_operation)
if not any(dfrow):
continue
# Get the "Full" row to use in the calculation
dfrow_full = (df["Column_Section"] == "Full") \
& (df["Species"] == spc) \
& (df["Operation"] == gc_operation)
if not any(dfrow_full):
continue
# Get the "Trop" row to use in the calculation
dfrow_trop = (df["Column_Section"] == "Trop") \
& (df["Species"] == spc) \
& (df["Operation"] == gc_operation)
if not any(dfrow_trop):
continue
# Calculate strat concentrations as Full minus Trop. Do
# not bother with raw value computation.
refstrat = df.loc[dfrow_full, "Ref"].values[0] \
- df.loc[dfrow_trop, "Ref"].values[0]
devstrat = df.loc[dfrow_full, "Dev"].values[0] \
- df.loc[dfrow_trop, "Dev"].values[0]
# Calculate diff and % diff
if not np.isnan(refstrat) and not np.isnan(devstrat):
diff = devstrat - refstrat
try:
pctdiff = diff / refstrat * 100
except BaseException:
pctdiff = np.nan
else:
diff = np.nan
pctdiff = np.nan
# Fill dataframe
df.loc[dfrow, "Units_converted"] = units[spc]
df.loc[dfrow, "Ref"] = refstrat
df.loc[dfrow, "Dev"] = devstrat
df.loc[dfrow, "Diff"] = diff
df.loc[dfrow, "Pct_diff"] = pctdiff
# ------------------------------------------
# Compute Accumulation for each column section (if applicable)
# ------------------------------------------
if compute_accum:
print('Computing ACCUMULATION operation budgets...')
# Loop over all column sections
for col_section in col_sections:
# Keep track of progress in log
print(f" {col_section}")
# Loop over species
for i, spc in enumerate(spclist):
# Keep track of progress (debugging print)
#if (i + 1) % 50 == 0:
# print(f" {col_section}: species {i + 1} of {n_spc}")
# Get the accumulation dataframe row to fill.Skip if not found.
dfrow = (df["Column_Section"] == col_section) \
& (df["Species"] == spc) \
& (df["Operation"] == "ACCUMULATION")
if not any(dfrow):
continue
# Get the rows to sum
dfrows_to_sum = (df["Column_Section"] == col_section) \
& (df["Species"] == spc) \
& (df["Operation"].isin(gc_operations))
# Sum the concentrations. If there is one or more NaN values
# then set to NaN since not enough data for accumulation.
refsum = df.loc[dfrows_to_sum, "Ref"].sum()
devsum = df.loc[dfrows_to_sum, "Dev"].sum()
# Calculate diff and % diff
if not np.isnan(refsum) and not np.isnan(devsum):
diff = devsum - refsum
try:
pctdiff = diff / refsum * 100
except BaseException:
pctdiff = np.nan
else:
diff = np.nan
pctdiff = np.nan
# Fill dataframe
df.loc[dfrow, "Units_converted"] = units[spc]
df.loc[dfrow, "Ref"] = refsum
df.loc[dfrow, "Dev"] = devsum
df.loc[dfrow, "Diff"] = diff
df.loc[dfrow, "Pct_diff"] = pctdiff
# ------------------------------------------
# Compute mass change in restarts for each column section (if applicable)
# ------------------------------------------
if compute_restart:
print('Computing RESTART operation budgets...')
# Read the species database files in the Ref & Dev rundirs,
# and return a dict containing metadata for each.
ref_metadata, dev_metadata = read_species_metadata(
spcdb_files,
quiet=True
)
# Loop over all column sections
for col_section in col_sections:
# Loop over species
for i, spc in enumerate(spclist):
# Keep track of progress
if (i + 1) % 50 == 0:
print(f" {col_section}: species {i + 1} of {n_spc}")
# Get the accumulation dataframe row to fill. Skip if not found
# of if not Full column section.
dfrow = (df["Column_Section"] == "Full") \
& (df["Species"] == spc) \
& (df["Operation"] == "RESTART")
if not any(dfrow):
continue
# Get ref and dev mass
# Get species metadata for unit conversion. If none, skip.
ref_species_metadata = ref_metadata.get(spc)
dev_species_metadata = dev_metadata.get(spc)
if ref_species_metadata is None and \
dev_species_metadata is None:
continue
# Get molecular weights
#ref_mol_wt_g = get_molwt_from_metadata(
# ref_species_metadata,
# spc
#)
#dev_mol_wt_g = get_molwt_from_metadata(
# ref_species_metadata,
# spc
#)
# Specify target units
target_units = "Gg"
# ==============================================================
# Convert units of Ref and save to a DataArray
# (or skip if Ref contains NaNs everywhere)
# ==============================================================
refarray = refdata[v]
if not np.isnan(refdata[v].values).all():
refarray = convert_units(
refarray,
spc,
ref_species_metadata,
target_units,
area_m2=met_and_masks["Ref_Area"],
delta_p=met_and_masks["Ref_Delta_P"],
box_height=met_and_masks["Ref_BxHeight"],
)
# ==============================================================
# Convert units of Dev and save to a DataArray
# (or skip if Dev contains NaNs everywhere)
# ==============================================================
devarray = devdata[v]
if not np.isnan(devdata[v].values).all():
devarray = convert_units(
devarray,
spc_name,
dev_species_metadata,
target_units,
area_m2=met_and_masks["Dev_Area"],
delta_p=met_and_masks["Dev_Delta_P"],
box_height=met_and_masks["Dev_BxHeight"],
)
# Compute ref mass as end mass minus start mass in ref
# TODO
# Compute dev mass as end mass minus start mass in dev
# TODO - copy above once compete. The rest should just work.
# Calculate diff and % diff
if not np.isnan(refmass) and not np.isnan(devmass):
diff = devmass - refmass
try:
pctdiff = diff / refmass * 100
except BaseException:
pctdiff = np.nan
else:
diff = np.nan
pctdiff = np.nan
# Fill dataframe
df.loc[dfrow, "Units_converted"] = units[spc]
df.loc[dfrow, "Ref"] = refmass
df.loc[dfrow, "Dev"] = devmass
df.loc[dfrow, "Diff"] = diff
df.loc[dfrow, "Pct_diff"] = pctdiff
# Sanity check write to csv (for testing. Keep commented out otherwise)
#df.to_csv('df.csv', na_rep='NA')
# ------------------------------------------
# Make budget file
# ------------------------------------------
# Create the target output directory hierarchy if it doesn't already exist
make_directory(dst, overwrite)
# Print budgets to file
filename = f"{dst}/Budgets_After_Operations.txt"
with open(filename, "w+", encoding=ENCODING) as f:
print("#" * 78, file=f)
if label is not None and benchmark_type is not None:
print(f"{benchmark_type} budgets for {label}", file=f)
else:
print(f"Budgets across {ref_interval}/{dev_interval} sec", file=f)
print("\n", file=f)
print("NOTES:", file=f)
msg = " - When using the non-local mixing scheme (default), "\
"'Mixing' includes\n emissions and dry deposition "\
"applied below the PBL. 'EmisDryDep'\n therefore only "\
"captures fluxes above the PBL.\n - When using full mixing, "\
"'Mixing' and 'EmisDryDep' are fully separated.\n - Budgets "\
"are calculated as the sum of [kg/s] tendencies\n - Strat "\
"budget is calculated as Full minus Trop\n - ACCUMULATION "\
"is calculated as sum of all other operations"
print(msg, file=f)
print("#" * 78, file=f)
print(file=f)
# Loop over species
for i, spc in enumerate(spclist):
print(f"{spc} budgets (Ref={refstr}; Dev={devstr})", file=f)
# Print a table for each column section
for col_section in col_sections:
# Get the dataframe rows. Skip if none found.
dfrows = (df["Column_Section"] == col_section) \
& (df["Species"] == spc) \
& (df["Operation"].isin(all_operations))
if not any(dfrows):
continue
# Print dataframe subset to file
print(f"{col_section} {units[spc]} : {spc}", file=f)
print(tabulate(
df.loc[dfrows,
["Operation",
"Ref",
"Dev",
"Diff",
"Pct_diff"]],
headers='keys',
tablefmt='psql',
showindex=False,
floatfmt=(".5f", ".5f", ".5f", ".5f", ".5f"),
), file=f
)
print("\n", file=f)
# ------------------------------------------
# Clean up
# ------------------------------------------
del df
del ref_ds
del dev_ds
gc.collect()
[docs]
def create_benchmark_summary_table(
refpath,
refstr,
refdate,
devpath,
devstr,
devdate,
collections,
dst="./benchmark",
overwrite=False,
outfilename="Summary.txt",
verbose=False,
ref_gchp=False,
dev_gchp=False
):
"""
Creates a benchmark summary table that shows which data collections
have differences. Useful for scanning the 1-hr and 1-month benchmark
outputs.
Parameters
----------
refpath : str
Path to the first data set to be compared (aka "Ref").
refstr : str
A string that can be used to identify refdata
(e.g. a model version number or other identifier).
refdate : np.datetime64
Date/time stamp used by the "Ref" data files.
devpath : str
Path to the second data set to be compared (aka "Dev").
devstr : str
A string that can be used to identify the data set specified
by devfile (e.g. a model version number or other identifier).
devdate : np.datetime64
Date/time stamp used by the "Dev" data files.
collections : list of str
List of diagnostic collections to examine.
dst : str, optional
A string denoting the destination folder where the file
containing emissions totals will be written.
Default value: "./benchmark"
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
outfilename : str, optional
Name of the text file which will contain the table of
emissions totals.
Default value: "Summary.txt"
verbose : bool, optional
Set this switch to True if you wish to print out extra
informational messages.
Default value: False
ref_gchp : bool, optional
Set to True if the "Ref" data comes from a GCHP run.
Default value: False
dev_gchp : bool, optional
Set to True if the "Dev" data comes from a GCHP run.
Default value: False
Notes
-----
This method is mainly intended for model benchmarking purposes,
rather than as a general-purpose tool.
Species properties (such as molecular weights) are read from a
YAML file called "species_database.yml".
"""
# ==================================================================
# Open file for output
# ==================================================================
# Replace whitespace in the ref and dev labels
refstr = replace_whitespace(refstr)
devstr = replace_whitespace(devstr)
# Create the directory for output
make_directory(dst, overwrite)
# Create file
try:
f = open(os.path.join(dst, outfilename), "w", encoding=ENCODING)
except (IOError, OSError, FileNotFoundError) as e:
msg = f"Could not open {outfilename} for writing!"
raise e(msg) from e
# Title strings
title1 = "### Benchmark summary table"
title2 = f"### Ref = {refstr}"
title3 = f"### Dev = {devstr}"
# Print header to file
print("#" * 80, file=f)
print(f"{title1 : <77}{'###'}", file=f)
print(f"{'###' : <77}{'###'}", file=f)
print(f"{title2 : <77}{'###'}", file=f)
print(f"{title3 : <77}{'###'}", file=f)
print("#" * 80, file=f)
print(file=f)
# ==================================================================
# Read data and look differences btw Ref & Dev versions
# ==================================================================
# Variables to skip
skip_vars = SKIP_THESE_VARS
skip_vars.append("AREA")
# Pick the proper function to read the data
reader = dataset_reader(
multi_files=False,
verbose=verbose
)
# Loop over diagnostic files
for col in collections:
# Read Ref data
refdata = reader(
get_filepath(
refpath,
col,
refdate,
is_gchp=ref_gchp
),
drop_variables=skip_vars
)
# Get Dev data
devdata = reader(
get_filepath(
devpath,
col,
devdate,
is_gchp=dev_gchp
),
drop_variables=skip_vars
)
# Make sure that Ref and Dev datasets have the same variables.
# Variables that are in Ref but not in Dev will be added to Dev
# with all missing values (NaNs). And vice-versa.
[refdata, devdata] = add_missing_variables(
refdata,
devdata
)
# Find all common variables between the two datasets
vardict = compare_varnames(
refdata,
devdata,
quiet=True
)
# List of differences for this collection
diff_list = []
# Keep track of which variables are different
# NOTE: Use 32-point float for comparisons since this is
# the precision used for History diagnostics.
for v in vardict["commonvarsData"]:
if not array_equals(
refdata[v],
devdata[v],
dtype=np.float32
):
diff_list.append(v)
# Drop duplicate values from diff_list
diff_list = unique_values(diff_list, drop=[None])
if len(diff_list) == 0:
print("-" * 79, file=f)
print(f"{col}: {devstr} is identical to {refstr}", file=f)
print(file=f)
else:
print("-" * 79, file=f)
print(f"{col}: {devstr} differs from {refstr}", file=f)
print("\n Diagnostics that differ", file=f)
for i, v in enumerate(diff_list):
print(f" {v}", file=f)
if i > 10:
print(f" ... and {len(diff_list) - 10} others", file=f)
break
print(file=f)
# ==================================================================
# Close files
# ==================================================================
f.close()
[docs]
def diff_list_to_text(
refstr,
devstr,
diff_list,
fancy_format=False
):
"""
Converts a list of species/emissions/inventories/diagnostics that
show differences between GEOS-Chem versions to a printable text
string.
Parameters
----------
refstr : str
Label for the Ref version.
devstr : str
Label for the Dev version.
diff_list : list
List to be converted into text. "None" values will be dropped.
fancy_format : bool, optional
Set to True if you wish output text to be bookended with '###'.
Returns
-------
diff_text : str
String with concatenated list values.
"""
verify_variable_type(diff_list, list)
# Use "Dev" and "Ref" for inserting into a header
if fancy_format:
refstr = "Ref"
devstr = "Dev"
# Strip out duplicates from diff_list
# Prepare a message about species differences (or alternate msg)
diff_list = unique_values(diff_list, drop=[None])
# Print the text
n_diff = len(diff_list)
if n_diff > 0:
diff_text = f"{devstr} and {refstr} show {n_diff} differences"
else:
diff_text = f"{devstr} and {refstr} are identical"
# If we are placing the text in a header, trim the length of diff_text
# to fit. NOTE: TABLE_WIDTH-7 leaves room for the '### ' at the start
# of the string and the '###' at the end of the string,
if fancy_format:
diff_text = f"### {diff_text : <{TABLE_WIDTH-7}}{'###'}"
diff_text = wrap_text(
diff_text,
width=TABLE_WIDTH
)
return diff_text.strip()
[docs]
def diff_of_diffs_toprow_title(config, model):
"""
Creates the diff-of-diffs plot title for the top row of the
six-plot output. If the title string is too long (as empirically
determined), then a newline will be inserted in order to prevent
the title strings from overlapping.
Parameters
----------
config : dict
Dictionary containing the benchmark options (as read from a
YAML file such as 1mo_benchmark.yml, etc.)
model : str
The model to plot. Accepted values are "gcc" or "gchp".
Returns
-------
title : str
The plot title string for the diff-of-diffs.
"""
verify_variable_type(config, dict)
verify_variable_type(model, str)
if not "gcc" in model and not "gchp" in model:
msg = "The 'model' argument must be either 'gcc' or 'gchp'!"
raise ValueError(msg)
title = (
config["data"]["dev"][model]["version"]
+ " - "
+ config["data"]["ref"][model]["version"]
)
if len(title) > 40:
title = (
config["data"]["dev"][model]["version"]
+ " -\n"
+ config["data"]["ref"][model]["version"]
)
return title
[docs]
def create_benchmark_sanity_check_table(
devpath,
devstr,
devdate,
collections,
dst="./benchmark",
is_gchp=False,
overwrite=False,
outfilename="Diagnostic_Sanity_Check.txt",
verbose=False,
):
"""
Creates a diagnostic sanity check table that shows which diagnostic
variables are zero or NaN everywhere. This can help to identify
bugs in diagnostic output.
Parameters
----------
devpath : str
Path to the data set to be compared (aka "Dev").
devstr : str
A string that can be used to identify the data set specified
by devfile (e.g. a model version number or other identifier).
devdate : np.datetime64
Date/time stamp used by the "Dev" data files.
collections : list of str
List of diagnostic collections to examine.
dst : str, optional
A string denoting the destination folder where the file
containing emissions totals will be written.
Default value: "./benchmark"
is_gchp : bool, optional
Set this flag to true to denote if the data is from GCHP.
overwrite : bool, optional
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
outfilename : str, optional
Name of the text file which will contain the table of
emissions totals.
Default value: "Summary.txt"
verbose : bool, optional
Set this switch to True if you wish to print out extra
informational messages.
Default value: False
Notes
-----
This method is mainly intended for model benchmarking purposes,
rather than as a general-purpose tool.
"""
# ==================================================================
# Initial preparations
# ==================================================================
# Replace whitespace in the ref and dev labels
devstr = replace_whitespace(devstr)
# Create the directory for output (if necessary)
make_directory(dst, overwrite)
outfilename = os.path.join(dst, outfilename)
# Pick the proper function to read the data
reader = dataset_reader(
multi_files=False,
verbose=verbose
)
# Variables to skip
skip_vars = SKIP_THESE_VARS
skip_vars.append("AREA")
# ==================================================================
# Open output file and write header
# ==================================================================
with open(outfilename, "w", encoding=ENCODING) as ofile:
# Title strings
title1 = "### Benchmark diagnostic sanity check table"
title2 = f"### Dev = {devstr}"
# Print header to file
print("#" * 80, file=ofile)
print(f"{title1 : <77}{'###'}", file=ofile)
print(f"{'###' : <77}{'###'}", file=ofile)
print(f"{title2 : <77}{'###'}", file=ofile)
print("#" * 80, file=ofile)
# ==============================================================
# Loop over diagnostic collections and scan files
# ==============================================================
for col in collections:
# Read data into an xr.DataSet object
file_name = get_filepath(
devpath,
col,
devdate,
is_gchp=is_gchp,
)
dset = reader(
file_name,
drop_variables=skip_vars
)
# Determine which variables are all zeroes or NaN
all_zeros_or_nans = []
for var in dset.data_vars:
data = dset[var].values
if np.all(data == 0) or np.all(data == np.nan):
all_zeros_or_nans.append(var)
# ===========================================================
# Print results for each collection
# ===========================================================
print("", file=ofile)
print("="*80, file=ofile)
print(f"{os.path.basename(file_name)}", file=ofile)
print("="*80, file=ofile)
print("", file=ofile)
if len(all_zeros_or_nans) == 0:
print("No variables were all zero or all NaN", file=ofile)
else:
print("These variables were all zero or all NaN:", file=ofile)
for var in all_zeros_or_nans:
print(f" {var}", file=ofile)