Source code for gcpy.benchmark.modules.budget_ox

"""
Computes the budget of Ox from 1-year GEOS-Chem Classic
or GCHP benchmark simulations.
"""

# ======================================================================
# Imports etc.
# ======================================================================

import os
import warnings
from calendar import monthrange
import gc
import numpy as np
import xarray as xr
from gcpy.constants import \
    AVOGADRO, ENCODING, G, MW_AIR_g, SKIP_THESE_VARS
from gcpy.grid import get_troposphere_mask
from gcpy.util import \
    get_filepath, read_config_file, read_species_metadata, \
    rename_and_flip_gchp_rst_vars, reshape_MAPL_CS, replace_whitespace
from gcpy.benchmark.modules.benchmark_utils import \
    add_lumped_species_to_dataset, get_lumped_species_definitions

# Suppress harmless run-time warnings (mostly about underflow in division)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# ======================================================================
# Define a class for passing global variables to the methods below
# ======================================================================

class _GlobVars:
    """
    Private class _GlobVars contains global data that needs to be
    shared among the methods in this module.
    """
    def __init__(
            self,
            devstr,
            devdir,
            devrstdir,
            year,
            dst,
            overwrite,
            spcdb_files,
            is_gchp,
            gchp_res,
            gchp_is_pre_14_0
    ):
        """
        Initializes the _GlobVars class.

        Parameters
        ----------
        devstr : str
            Label denoting the "Dev" version.
        devdir : str
            Directory where diagnostic files are found.
        devrstdir : str
            Directory where restart files are found.
        dst : str
            Directory where plots & tables will be created.
        year : int
            Year of the benchmark simulation.
        overwrite : bool
            Denotes whether to overwrite existing budget tables.
        spcdb_files : list
            Paths to species_database.yml files in Ref & Dev rundirs.
        is_gchp : bool
            Denotes if this is GCHP (True) or GCC (False) data.
        gchp_res : str
            GCHP resolution string (e.g. "c24", "c48", etc.)
        gchp_is_pre_14_0 : bool
            Denotes if the GCHP version is prior to 14.0.0 (True)
            or not (False).
        """
        # --------------------------------------------------------------
        # Arguments from outside
        # --------------------------------------------------------------
        self.devstr = replace_whitespace(devstr)
        self.devdir = devdir
        self.devrstdir = devrstdir
        self.dst = dst
        self.overwrite = overwrite
        self.spcdb_files = spcdb_files
        self.is_gchp = is_gchp
        self.gchp_res = gchp_res
        self.gchp_is_pre_14_0 = gchp_is_pre_14_0

        # ---------------------------------------------------------------
        # Benchmark year
        # ---------------------------------------------------------------
        self.y0 = int(year)
        self.y1 = self.y0 + 1
        self.y0_str = f"{self.y0}"
        self.y1_str = f"{self.y1}"

        # --------------------------------------------------------------
        # Read data into datasets
        # --------------------------------------------------------------

        # Lumped species definition for Ox (looks in rundir first)
        self.lspc_dict = self.get_lumped_species_dict()

        # Read initial and final restart files
        self.ds_ini = self.read_rst(self.y0_str)
        self.ds_end = self.read_rst(self.y1_str)

        # Change the restart datasets into format similar to GCC, and flip
        # vertical axis.  Also test if the restart files have the BXHEIGHT
        # variable contained within them.
        if is_gchp:
            self.ds_ini = rename_and_flip_gchp_rst_vars(self.ds_ini)
            self.ds_end = rename_and_flip_gchp_rst_vars(self.ds_end)

        # Read diagnostics
        self.get_diag_paths()
        self.ds_dry = self.read_diag("DryDep", prefix="DryDep_")
        self.ds_pl = self.read_diag("ProdLoss")
        self.ds_met = self.read_diag("StateMet", prefix="Met_")
        self.ds_wcv = self.read_diag("WetLossConv", prefix="WetLossConv_")
        self.ds_wls = self.read_diag("WetLossLS", prefix="WetLossLS_")

        # --------------------------------------------------------------
        # Get other quantities for the budget computations
        # --------------------------------------------------------------
        self.get_area_and_volume()
        self.tropmask = get_troposphere_mask(self.ds_met)
        self.get_time_info()
        self.get_conv_factors(spcdb_files)


    # ==================================================================
    # Instance methods
    # ==================================================================
    def get_lumped_species_dict(self):
        """
        Returns a dictionary of lumped species definitions.

        Returns
        -------
        lspc_dict : dict
            Dictionary of lumped species definitions.
        """
        # First look in the current folder
        lspc_path = "lumped_species.yml"
        if os.path.exists(lspc_path):
            lspc_dict = read_config_file(lspc_path, quiet=True)
            return lspc_dict

        # Then look in the same folder where the species database is
        lspc_path = os.path.join(__file__, "lumped_species.yml")
        if os.path.exists(lspc_path):
            lspc_dict = read_config_file(lspc_path, quiet=True)
            return lspc_dict

        # Then look in the GCPy source code folder
        lspc_dict = get_lumped_species_definitions()
        return lspc_dict


    def rst_file_path(self, ystr):
        """
        Returns the restart file path.

        Parameters
        ----------
        ystr : str
            Year string in YYYY format.
        """
        return get_filepath(
            self.devrstdir,
            "Restart",
            np.datetime64(f"{ystr}-01-01T00:00:00"),
            is_gchp=self.is_gchp,
            gchp_res=self.gchp_res,
            gchp_is_pre_14_0=self.gchp_is_pre_14_0
        )


    def get_diag_paths(self):
        """
        Generates data paths for diagnostic collection files.
        """

        # List of diagnostic collections
        collections = [
            'DryDep',
            'ProdLoss',
            'StateMet',
            'WetLossConv',
            'WetLossLS'
        ]

        # Path where files in each collection are found
        self.pathlist = {}
        for c in collections:
            self.pathlist[c] = os.path.join(
                self.devdir,
                f"*.{c}.{self.y0_str}*.nc4"
            )


    def read_rst(self, ystr):
        """
        Reads a restart file into an xarray Dataset.

        Parameters
        ----------
        ystr : str
            String containing the year (YYYY format).

        Returns
        -------
        ds : xr.Dataset
            The restart dataset.
        """
        ds = xr.open_dataset(
            self.rst_file_path(ystr),
            drop_variables=SKIP_THESE_VARS
        )

        if self.is_gchp:
            rst_prefix="SPC_"
        else:
            rst_prefix="SpeciesRst_"

        ds = add_lumped_species_to_dataset(
            ds,
            lspc_dict=self.lspc_dict,
            verbose=False,
            prefix=rst_prefix
        )

        return ds


    def read_diag(self, collection, prefix=None):
        """
        Reads a restart file into an xarray Dataset.

        Parameters
        ----------
        collection : str
            Name of the collection to read.

        Returns
        -------
        ds : xr.Dataset
            The diagnostic dataset.
        """
        ds = xr.open_mfdataset(
            self.pathlist[collection],
            drop_variables=SKIP_THESE_VARS,
            combine="nested",
            concat_dim="time"
        )

        if prefix is not None:
            ds = add_lumped_species_to_dataset(
                ds,
                lspc_dict=self.lspc_dict,
                verbose=False,
                prefix=prefix
            )

        return ds


    def get_area_and_volume(self):
        """
        Returns area and volume quantities for the budget computations.
        """
        # The area in m2 is on the restart file grid
        # The area in cm2 is on the History diagnostic grid
        # Both grids are identical in GCClassic but differ in GCHP
        if self.is_gchp:
            if 'Met_AREAM2' not in self.ds_met.data_vars.keys():
                msg = 'Could not find Met_AREAM2 in StateMet_avg collection!'
                raise ValueError(msg)
            area_m2 = self.ds_met["Met_AREAM2"].isel(time=0)
            area_m2 = reshape_MAPL_CS(area_m2)
            self.area_m2 = area_m2
            self.area_cm2 = self.ds_met["Met_AREAM2"].isel(time=0) * 1.0e4
        else:
            self.area_m2 = self.ds_met["AREA"].isel(time=0)
            self.area_cm2 = self.area_m2 * 1.0e4

        # Box volume [cm3]
        self.vol_cm3 = self.ds_met["Met_AIRVOL"].values * 1.0e6


    def get_time_info(self):
        """
        Returns time information for the budget computations.
        """
        # Months
        self.n_months = 12

        # Days per month in the benchmark year
        self.d_per_m = np.zeros(self.n_months)
        self.s_per_m = np.zeros(self.n_months)
        for t in range(self.n_months):
            self.d_per_m[t] = monthrange(self.y0, t + 1)[1] * 1.0
            self.s_per_m[t] = self.d_per_m[t] * 86400.0

        # Days and seconds in the benchmark year
        self.d_per_a = np.sum(self.d_per_m)
        self.s_per_a = np.sum(self.s_per_m)

        # Fraction of year occupied by each month
        self.frac_of_a = np.zeros(self.n_months)
        for t in range(self.n_months):
            self.frac_of_a[t] = self.d_per_m[t] / self.d_per_a


    def get_conv_factors(self, spcdb_files):
        """
        Gets conversion factors used in budget computations.

        Parameters
        ----------
        spcdb_files : str
            Paths to the species_database.yml file in Ref & Dev.
        """
        # Read the species database file from the Dev model
        _, spcdb = read_species_metadata(spcdb_files, quiet=True)

        # Molecular weights [kg mol-1], as taken from the species database
        self.mw = {}
        self.mw["O3"] = spcdb["O3"]["MW_g"] * 1.0e-3
        self.mw["Ox"] = self.mw["O3"]
        self.mw["Air"] = MW_AIR_g * 1.0e-3

        # kg/s --> Tg/d
        self.kg_s_to_tg_a_value = 86400.0 * self.d_per_a * 1e-9


# ======================================================================
# Methods
# ======================================================================


[docs] def init_and_final_mass( globvars, species ): """ Computes global species mass from the initial & final restart files. Parameters ---------- globvars : _GlobVars Global variables needed for budget computations. species : list List of species names to compute mass for. Returns ------- result : dict Contains initial & final tropospheric mass of O3. """ # Error checks if species is None: raise ValueError("Argument species is undefined!") # Meterological quantities (from restart file met) if 'time' in globvars.ds_ini["Met_DELPDRY"].dims: deltap_ini = globvars.ds_ini["Met_DELPDRY"].isel(time=0).values else: deltap_ini = globvars.ds_ini["Met_DELPDRY"].values if 'time' in globvars.ds_end["Met_DELPDRY"].dims: deltap_end = globvars.ds_end["Met_DELPDRY"].isel(time=0).values else: deltap_end = globvars.ds_end["Met_DELPDRY"].values g100 = 100.0 / G airmass_ini = (deltap_ini * globvars.area_m2.values) * g100 airmass_end = (deltap_end * globvars.area_m2.values) * g100 # Conversion factors mw_ratio = globvars.mw["O3"] / globvars.mw["Air"] kg_to_tg = 1.0e-9 vv_to_tg_ini = airmass_ini * (mw_ratio * kg_to_tg) vv_to_tg_end = airmass_end * (mw_ratio * kg_to_tg) # Loop over the species list result = {} for s in species: v = "SpeciesRst_" + s # Determine if restart file variables have a time dimension # (if one does, they all do) if 'time' in globvars.ds_ini[v].dims: mass_ini = globvars.ds_ini[v].isel(time=0).values * vv_to_tg_ini else: mass_ini = globvars.ds_ini[v].values * vv_to_tg_ini if 'time' in globvars.ds_end[v].dims: mass_end = globvars.ds_end[v].isel(time=0).values * vv_to_tg_end else: mass_end = globvars.ds_end[v].values * vv_to_tg_end # Compute tropospheric masses [Tg Ox] tropmass_ini = np.ma.masked_array( mass_ini, globvars.tropmask[0, :, :, :] ) tropmass_end = np.ma.masked_array( mass_end, globvars.tropmask[globvars.n_months-1, :, :, :] ) # Create a dict to return values result[s + "_ini"] = np.sum(tropmass_ini) result[s + "_end"] = np.sum(tropmass_end) result[s + "_acc"] = result[s + "_end"] - result[s + "_ini"] return result
[docs] def annual_average_prodloss( globvars ): """ Parameters ---------- globvars : _GlobVars Global variables needed for budget computations. Returns ------- result : dict Contains annual monthly-weighted average of Prod_Ox and Loss_Ox. """ # Conversion factors mw_avo = globvars.mw["Ox"] / AVOGADRO kg_to_tg = 1.0e-9 # Tropospheric P(Ox) and L(Ox) [molec/s] prod_trop = np.ma.masked_array( globvars.ds_pl["Prod_Ox"].values * globvars.vol_cm3, globvars.tropmask ) loss_trop = np.ma.masked_array( globvars.ds_pl["Loss_Ox"].values * globvars.vol_cm3, globvars.tropmask ) # Compute monthly-weighted averages [Tg Ox] prod_tot = 0.0 loss_tot = 0.0 for t in range(globvars.n_months): prod_tot += np.sum(prod_trop[t, :, :, :]) * globvars.frac_of_a[t] loss_tot += np.sum(loss_trop[t, :, :, :]) * globvars.frac_of_a[t] prod_tot *= globvars.s_per_a * kg_to_tg * mw_avo loss_tot *= globvars.s_per_a * kg_to_tg * mw_avo # Make a dict for returning results result = {} result["POx"] = np.sum(prod_tot) result["LOx"] = np.sum(loss_tot) result["POx-LOx"] = result["POx"] - result["LOx"] return result
[docs] def annual_average_drydep( globvars ): """ Parameters ---------- globvars : _GlobVars Global variables needed for budget computations. Returns ------- result : float Annual average dry deposition [Tg Ox]. """ # Conversion factors and area mw_avo = globvars.mw["Ox"] / AVOGADRO kg_to_tg = 1.0e-9 area_cm2 = globvars.area_cm2.values # Get drydep flux of Ox [molec/cm2/s] dry = globvars.ds_dry["DryDep_Ox"].values # Convert to Tg Ox dry_tot = 0.0 for t in range(globvars.n_months): dry_tot += np.nansum(dry[t, :, :] * area_cm2) * globvars.frac_of_a[t] result = dry_tot * globvars.s_per_a * kg_to_tg * mw_avo return result
[docs] def annual_average_wetdep(globvars): """ Parameters ---------- globvars : _GlobVars Global variables needed for budget computations. Returns ------- result : dict Annual average wet deposition [Tg Ox], with keys "CV" (convective), "LS" (large-scale), and "Total". """ # Conversion factors kg_to_tg = 1.0e-9 # Tropospheric wet loss (convective & large scale) # Take only tropospheric values wetcv_trop = np.ma.masked_array( globvars.ds_wcv["WetLossConv_Ox"].values, globvars.tropmask ) wetls_trop = np.ma.masked_array( globvars.ds_wls["WetLossLS_Ox"].values, globvars.tropmask ) # Monthly-weighted conv & LS wet losses [kg HNO3/s] wetcv_tot = 0.0 wetls_tot = 0.0 for t in range(globvars.n_months): wetcv_tot += np.nansum(wetcv_trop[t, :, :, :]) * globvars.frac_of_a[t] wetls_tot += np.nansum(wetls_trop[t, :, :, :]) * globvars.frac_of_a[t] # Convert [kg HNO3/s] to [Tg Ox/a] wetcv_tot *= globvars.s_per_a * kg_to_tg #* hno3_to_ox wetls_tot *= globvars.s_per_a * kg_to_tg #* hno3_to_ox # Create a dict to return the results result = {} result["CV"] = wetcv_tot result["LS"] = wetls_tot result["Total"] = result["CV"] + result["LS"] return result
[docs] def annual_metrics( globvars, mass, prodloss, wetdep, drydep ): """ Computes the following terms: 1. "dyn": Ox subsiding from the stratosphere 2. "net": Net Ox = (POx-LOx) + Dyn - Drydep - Wetdep 3. "life": Ox lifetime (d) = Ox burden / (LOx + Drydep + Wetdep) Parameters ---------- globvars : _GlobVars Global variables needed for budget computations. mass : dict Mass terms. prodloss : dict Production and loss terms. wetdep : dict Wet deposition terms. drydep : numpy.float64 Dry deposition term. Returns ------- result : dict Contains dyn, net, and life terms. """ result = {} acc = mass["Ox_acc"] burden = mass["Ox_end"] chem = prodloss["POx-LOx"] lox = prodloss["LOx"] wetd = wetdep["Total"] result["dyn"] = acc - (chem - wetd - drydep) result["net"] = (chem + result["dyn"]) - (wetd + drydep) result["life"] = (burden / (lox + wetd + drydep)) * globvars.d_per_a return result
[docs] def global_ox_budget( devstr, devdir, devrstdir, year, spcdb_file, dst='./1yr_benchmark', overwrite=True, is_gchp=False, gchp_res="c24", gchp_is_pre_14_0=False ): """ Main program to compute Ox budgets from GEOS-Chem Classic or GCHP benchmark simulations. Parameters ---------- devstr : str Label for the Dev version. devdir : str Path to the Dev data directory. devrstdir : str Path to the Dev restart file directory. year : int Year of the benchmark simulation. spcdb_file : str Path to the Dev species_database.yml file. dst : str, optional Directory where tables will be written. overwrite : bool, optional Should existing tables be overwritten? is_gchp : bool, optional Is Dev from a GCHP benchmark simulation? gchp_res : str, optional GCHP resolution string (e.g. "c24"). gchp_is_pre_14_0 : bool, optional Is Dev from a GCHP version prior to 14.0.0? """ # Store global variables in a private class globvars = _GlobVars( devstr, devdir, devrstdir, year, dst, overwrite, spcdb_file, is_gchp, gchp_res, gchp_is_pre_14_0, ) # ================================================================== # Compute Ox budget [Tg a-1] # ================================================================== # Mass from initial & final restart file mass = init_and_final_mass( globvars, ["O3", "Ox"] ) # Sources and sinks prodloss = annual_average_prodloss( globvars ) wetdep = annual_average_wetdep( globvars ) drydep = annual_average_drydep( globvars ) # Dynamics, net Ox, lifetime in days metrics = annual_metrics( globvars, mass, prodloss, wetdep, drydep ) # ================================================================== # Print budgets to file # ================================================================== print_budget( globvars, mass, prodloss, wetdep, drydep, metrics ) # ================================================================== # Force garbage collection # ================================================================== del globvars del mass del prodloss del wetdep del drydep del metrics gc.collect()