Plot Timeseries

This example script may also be found at gcpy/examples/plotting/plot_single_panel.py.

#!/usr/bin/env python
'''
Example of plotting timeseries data from GEOS-Chem and saving
the output to a PDF file.  You can modify this for your particular
diagnostic output.  This also contains a good overview of

This example script creates a PDF file with 2 pages.

   Page 1:
   -------
       O3 from the first model layer (from the "SpeciesConc"
       diagnostic collection is) plotted in blue.

       O3 at 10 meter height (from the "SpeciesConc_10m"
       diagnostic collection) is plotted in red.

   Page 2:
   -------
       HNO3 from the first model layer (from the SpeciesConc
       diagnostic collection is) plotted in blue.

       HNO3 at 10 meter height (from the SpeciesConc_10m
       diagnostic collection) is plotted in red.

You can of course modify this for your own particular applications.

Author:
-------
Bob Yantosca
yantosca@seas.harvard.edu
23 Aug 2019
'''

# Imports
import os
import warnings
import numpy as np
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import xarray as xr
from gcpy import constants


# Tell matplotlib not to look for an X-window, as we are plotting to
# a file and not to the screen.  This will avoid some warning messages.
os.environ['QT_QPA_PLATFORM'] = 'offscreen'

# Suppress harmless run-time warnings (mostly about underflow in division)
warnings.filterwarnings('ignore', category=RuntimeWarning)
warnings.filterwarnings('ignore', category=UserWarning)


def find_files_in_dir(path, substrs):
    '''
    Returns a list of all files in a directory that match one or more
    substrings.

    Args:
    -----
        path : str
            Path to the directory in which to search for files.

        substrs : list of str
            List of substrings used in the search for files.

    Returns:
    --------
        file_list : list of str
            List of files in the directory (specified by path)
            that match all substrings (specified in substrs).
    '''

    # Initialize
    file_list = []

    # Walk through the given data directory.  Then for each file found,
    # add it to file_list if it matches text in search_list.
    for root, directory, files in os.walk(path):
        for f in files:
            for s in substrs:
                if s in f:
                    file_list.append(os.path.join(root, f))

    # Return an alphabetically sorted list of files
    file_list.sort()
    return file_list


def find_value_index(seq, val):
    '''
    Finds the index of a numpy array that is close to a value.

    Args:
    -----
        seq : numpy ndarray
            An array of numeric values.

        val : number
            The value to search for in seq.

    Returns:
    --------
        result : integer
            The index of seq that has a value closest to val.

    Remarks:
    --------
    This algorithm was found on this page:
    https://stackoverflow.com/questions/48900977/find-all-indexes-of-a-numpy-array-closest-to-a-value
    '''
    r = np.where(np.diff(np.sign(seq - val)) != 0)
    idx = r + (val - seq[r]) / (seq[r + np.ones_like(r)] - seq[r])
    idx = np.append(idx, np.where(seq == val))
    idx = np.sort(idx)
    result = np.round(idx)

    # NOTE: xarray needs integer values, so convert here!
    return int(result[0])


def read_geoschem_data(path, collections):
    '''
    Returns an xarray Dataset containing timeseries data.

    Args:
    -----
        path : str
            Directory path where GEOS-Chem diagnostic output
            files may be found.

        collections: list of str
            List of GEOS-Chem collections.  Files for these
            collections will be read into the xarray Dataset.

    Returns:
    --------
        ds : xarray Dataset
            A Dataset object containing the GEOS-Chem diagnostic
            output corresponding to the collections that were
            specified.
    '''

    # Get a list of variables that GCPy should not read.
    # These are mostly variables introduced into GCHP with the MAPL v1.0.0
    # update.  These variables contain either repeated or non-standard
    # dimensions that can cause problems in xarray when combining datasets.
    skip_vars = constants.skip_these_vars

    # Find all files in the given
    file_list = find_files_in_dir(path, collections)

    # Return a single xarray Dataset containing data from all files
    # NOTE: Need to add combine="nested" for xarray 0.15 and higher
    v = xr.__version__.split(".")
    if int(v[0]) == 0 and int(v[1]) >= 15:
        return xr.open_mfdataset(file_list,
                                 drop_variables=skip_vars,
                                 combine="nested",
                                 concat_dim=None)
    else:
        return xr.open_mfdataset(file_list,
                                 drop_variables=skip_vars)


def plot_timeseries_data(ds, site_coords):
    '''
    Plots a timseries of data at a given (lat,lon) location.

    Args:
    -----
        ds : xarray Dataset
            Dataset containing GEOS-Chem timeseries data.

        site_coords : tuple
            Contains the coordinate (lat, lon) of a site location
            at which the timeseries data will be plotted.
    '''

    # ----------------------------------------------------------------------
    # Get the GEOS-Chem data for O3 and HNO3 corresponding to the
    # location of the observational station.  We will save these into
    # xarray DataArray objects, which we'll need for plotting.
    #
    # YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!
    # ----------------------------------------------------------------------

    # Find the indices corresponding to the site lon and lat
    lat_idx = find_value_index(ds.lat.values, site_coords[0])
    lon_idx = find_value_index(ds.lon.values, site_coords[1])

    # Save O3 from the first level (~60m height) (ppb) into a DataArray
    O3_L1 = ds['SpeciesConc_O3'].isel(lon=lon_idx, lat=lat_idx, lev=0)
    O3_L1 *= 1.0e9
    O3_L1.attrs['units'] = 'ppbv'

    # Save O3 @ 10m height into a DataArray
    O3_10m = ds['SpeciesConc10m_O3'].isel(lon=lon_idx, lat=lat_idx)
    O3_10m *= 1.0e9
    O3_10m.attrs['units'] = 'ppbv'

    # Save HNO3 from the first level (~60m height) into a DataArray
    HNO3_L1 = ds['SpeciesConc_HNO3'].isel(lon=lon_idx, lat=lat_idx, lev=0)
    HNO3_L1 *= 1.0e9
    HNO3_L1.attrs['units'] = 'ppbv'

    # Save HNO3 @ 10m height into a DataArray
    HNO3_10m = ds['SpeciesConc10m_HNO3'].isel(lon=lon_idx, lat=lat_idx)
    HNO3_10m *= 1.0e9
    HNO3_10m.attrs['units'] = 'ppbv'

    # ----------------------------------------------------------------------
    # Create a PDF file of the plots
    # ----------------------------------------------------------------------

    # Get min & max days of the plot span (for setting the X-axis range).
    # To better center the plot, add a cushion of 12 hours on either end.
    time = ds['time'].values
    datemin = np.datetime64(time[0]) - np.timedelta64(12, 'h')
    datemax = np.datetime64(time[-1]) + np.timedelta64(12, 'h')

    # Define a PDF object so that we can save the plots to PDF
    pdf = PdfPages('O3_and_HNO3.pdf')

    # Loop over number of desired pages (in this case, 2)
    for i in range(0, 2):

        # Create a new figure: 1 plot per page, 2x as wide as high
        figs, ax0 = plt.subplots(1, 1, figsize=[12, 6])

        # -----------------------------
        # Plot O3 on the first page
        # -----------------------------
        if i == 0:

            # 1st model level
            O3_L1.plot.line(ax=ax0, x='time', color='blue',
                            marker='o', label='O3 from 1st model level',
                            linestyle='-')

            # 10 mheight
            O3_10m.plot.line(ax=ax0, x='time', color='red',
                             marker='x', label='O3 at 10m height',
                             linestyle='-')

            # Set title (has to be after the line plots are drawn)
            ax0.set_title('O3 from the 1st model level and at 10m height')

            # Set Y-axis minor tick marks at every 2 ppb (5 intervals)
            ax0.yaxis.set_minor_locator(mticker.AutoMinorLocator(5))

            # Set y-axis title
            ax0.set_ylabel('O3 (ppbv)')

        # -----------------------------
        # Plot HNO3 on the second page
        # -----------------------------
        if i == 1:

            # 1st model level
            HNO3_L1.plot.line(ax=ax0, x='time', color='blue',
                              marker='o', label='HNO3 from 1st model level',
                              linestyle='-')

            # 10m height
            HNO3_10m.plot.line(ax=ax0, x='time', color='red',
                               marker='x', label='HNO3 at 10m height',
                               linestyle='-')

            # Set title (has to be after the line plots are drawn
            ax0.set_title('HNO3 from the 1st model level and at 10m height')

            # Set Y-axis minor tick marks at every 0.05 ppb (4 intervals)
            ax0.yaxis.set_minor_locator(mticker.AutoMinorLocator(4))

            # Set y-axis title
            ax0.set_ylabel('HNO3 (ppbv)')

        # -----------------------------
        # Set general plot parameters
        # -----------------------------

        # Add the plot legend
        ax0.legend()

        # Set the X-axis range
        ax0.set_xlim(datemin, datemax)

        # Set the X-axis major tickmarks
        locator = mdates.DayLocator()
        formatter = mdates.DateFormatter('%d')
        ax0.xaxis.set_major_locator(locator)
        ax0.xaxis.set_major_formatter(formatter)

        # Set X-axis minor tick marks at noon of each day
        # (i.e. split up the major interval into 2 bins)
        ax0.xaxis.set_minor_locator(mticker.AutoMinorLocator(2))

        # Don't rotate the X-axis jtick labels
        ax0.xaxis.set_tick_params(rotation=0)

        # Center the X-axis tick labels
        for tick in ax0.xaxis.get_major_ticks():
            tick.label1.set_horizontalalignment('center')

        # Set X-axis and Y-axis labels
        ax0.set_xlabel('Day of July (and August) 2016')

        # -----------------------------
        # Save this page to PDF
        # -----------------------------
        pdf.savefig(figs)
        plt.close(figs)

    # ----------------------------------------------------------------------
    # Save the PDF file to disk
    # ----------------------------------------------------------------------
    pdf.close()


def main():
    '''
    Main program.
    '''
    # Path where the data files live
    # (YOU MUST EDIT THIS FOR YUR OWN PARTICULAR APPLICATION!)
    path_to_data = '/path/to/GEOS-Chem/diagnostic/data/files'

    # Get a list of files in the ConcAboveSfc and SpeciesConc collections
    # (YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!)
    collections = ['ConcAboveSfc', 'SpeciesConc']

    # Read GEOS-Chem data into an xarray Dataset
    ds = read_geoschem_data(path_to_data, collections)

    # Plot timeseries data at Centerville, AL (32.94N, 87.18W)
    # (YOU CAN EDIT THIS FOR YOUR OWN PARTICULAR APPLICATION!)
    site_coords = (32.94, -87.18)
    plot_timeseries_data(ds, site_coords)


if __name__ == "__main__":
    main()