Source code for gcpy.kpp.kppsa_utils

#!/usr/bin/env python3
"""
Utility functions for visualizing output from the
KPP-Standalone box model.
"""

# Imports
from os.path import expanduser, join
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from gcpy.constants import ENCODING
from gcpy.util import get_element_of_series, verify_variable_type


[docs] def kppsa_get_file_list(input_dir, pattern=""): """ Returns a list of KPP-Standalone log files matching a search pattern. Parameters ---------- input_dir : str Directory containing KPP-Standalone log files. pattern : str, optional Glob pattern used to filter filenames. All files containing this string will be returned. Default: ``""`` (matches all files) Returns ------- file_list : list of str List of file paths matching the search criteria. """ return glob(join(expanduser(input_dir), f"*{pattern}*"))
[docs] def kppsa_read_one_csv_file(file_name): """ Reads a single KPP-Standalone log file in CSV format into a pandas DataFrame. Metadata fields (location, timestamp, longitude, latitude, vertical level, and pressure) are extracted from the file header and appended as additional columns. Parameters ---------- file_name : str Path to the CSV-format KPP-Standalone log file to be read. Returns ------- dframe : pandas.DataFrame DataFrame containing species concentrations and associated metadata extracted from the file header. Added columns are: ``Location``, ``DateTime``, ``Longitude``, ``Latitude``, ``Level``, and ``Pressure``. """ verify_variable_type(file_name, str) # Initialize variables latitude = "" level = "" longitude = "" location = "" pressure = "" timestamp = "" # Read file header contents with open(file_name, "r", encoding=ENCODING) as ifile: # Find the number of rows to skip skiprows = int(ifile.readline().strip()) + 1 # Read the rest of the header contents for line in ifile: line = line.strip() # Extract selected metadata fields if "Location" in line: location = line.split(":")[1].strip() if "Timestamp" in line: substrs = line.split(":") timestamp = substrs[1].strip() + ":" + substrs[2].strip() timestamp = timestamp.replace("/", "-").replace(" ", "T") if "Longitude" in line: longitude = line.split(":")[1].strip() if "Latitude" in line: latitude = line.split(":")[1].strip() if "GEOS-Chem Vertical Level" in line: level = line.split(":")[1].strip() if "Pressure (hPa)" in line: pressure = line.split(":")[1].strip() # Exit the loop once the column header row is reached if "Species Name" in line: break # Read the CSV into a DataFrame object dframe = pd.read_csv( file_name, skiprows=skiprows, delimiter=",", dtype={ "Initial Concentration (molec/cm3)": np.float64, "Final Concentration (molec/cm3)": np.float64, }, engine="c" ) # Add series with metadata obtained from the header dframe = dframe.assign(Location=location) dframe = dframe.assign(DateTime=np.datetime64(timestamp)) dframe = dframe.assign(Longitude=float(longitude)) dframe = dframe.assign(Latitude=float(latitude)) dframe = dframe.assign(Level=int(level)) dframe = dframe.assign(Pressure=float(pressure)) return dframe
[docs] def kppsa_read_csv_files(file_list): """ Reads multiple KPP-Standalone log files and concatenates them into a single pandas DataFrame. Parameters ---------- file_list : list of str Paths to KPP-Standalone CSV log files to be read. Returns ------- dframe_all : pandas.DataFrame Concatenated DataFrame containing data from all files in ``file_list``. """ dframe_all = pd.DataFrame() for file_name in file_list: dframe = kppsa_read_one_csv_file(file_name) dframe_all = pd.concat([dframe_all, dframe], ignore_index=True) del dframe return dframe_all
[docs] def kppsa_prepare_site_data(dframe, site_name, species): """ Returns a DataFrame and plot title for a given species and observation site. Data is filtered to include only observations at or below 500 hPa (surface to lower stratosphere) and sorted by descending pressure. Parameters ---------- dframe : pandas.DataFrame KPP-Standalone output data as returned by :func:`kppsa_read_csv_files`. site_name : str Name of the observation site to extract data for. species : str Name of the species to extract data for. Returns ------- site_data : pandas.DataFrame or None DataFrame filtered to the given site and species, sorted by descending pressure, and limited to pressures >= 500 hPa. Returns ``None`` if ``dframe`` is ``None``. site_title : str or None Plot title string including the site name, cardinal coordinates, and timestamp (e.g. ``"Mace Head (53°N,10°W) at 2019-07-01T00:00"``). Returns ``None`` if ``dframe`` is ``None``. """ # Exit if the data frame is set to None if dframe is None: return None, None # Get data for a given species at all locations site_data = dframe.loc[dframe["Location"] == site_name] site_data = site_data.loc[site_data["Species Name"] == species] site_data = site_data.loc[site_data["Pressure"] >= 500] site_data = site_data.sort_values(by="Pressure", ascending=False) # Create the top title for the subplot for this observation site # (use integer lon & lat values and N/S lat and E/W lon notation) lat = int(round(get_element_of_series(site_data["Latitude"], 0))) lon = int(round(get_element_of_series(site_data["Longitude"], 0))) time = get_element_of_series(site_data["DateTime"], 0) ystr = "S" if lat < 0 else "N" xstr = "W" if lon < 0 else "E" lon = abs(lon) lat = abs(lat) site_title = \ f"{site_name.strip()} ({lat}$^\\circ${ystr},{lon}$^\\circ${xstr})" site_title += f" at {time}" return site_data, site_title
[docs] def kppsa_plot_single_site( fig, rows_per_page, cols_per_page, subplot_index, subplot_title, ref_data, ref_label, dev_data, dev_label, species, font_scale, ): """ Plots initial and final species concentrations as vertical profiles at a single observation site. Parameters ---------- fig : matplotlib.figure.Figure Figure object into which the subplot will be added. rows_per_page : int Number of subplot rows on the page. cols_per_page : int Number of subplot columns on the page. subplot_index : int Zero-based index of this subplot within the page grid. subplot_title : str Title string displayed above this subplot. ref_data : pandas.DataFrame KPP-Standalone output for the ``Ref`` model version at this site, as returned by :func:`kppsa_prepare_site_data`. ref_label : str Legend label for the ``Ref`` model final concentration. dev_data : pandas.DataFrame or None KPP-Standalone output for the ``Dev`` model version at this site. Pass ``None`` to omit the Dev profile. dev_label : str or None Legend label for the ``Dev`` model final concentration. Ignored if ``dev_data`` is ``None``. species : str Name of the species being plotted, used for axis labels and error checking. font_scale : float Multiplicative scale factor applied to all font sizes, allowing text to be enlarged or reduced uniformly. Raises ------ ValueError If ``species`` is not found in ``ref_data``. ValueError If ``dev_data`` is not ``None`` and ``species`` is not found in ``dev_data``. Notes ----- The Y-axis (pressure) is plotted from 1020 hPa at the bottom to 500 hPa at the top. The Y-axis label is only applied to leftmost panels (``subplot_index`` is 0 or a multiple of ``cols_per_page``). """ # Error checks if species not in list(ref_data["Species Name"]): raise ValueError("Species not found in Ref model output!") if dev_data is not None: if species not in list(dev_data["Species Name"]): raise ValueError("Species not found in Dev model output!") # Create matplotlib axes object for this subplot axes_subplot = fig.add_subplot( rows_per_page, cols_per_page, subplot_index + 1, ) # Title for each subplot axes_subplot.set_title( subplot_title, weight='bold', fontsize=8*font_scale, ) # Initial concentration axes_subplot.plot( ref_data["Initial Concentration (molec/cm3)"].astype(np.float64), ref_data["Pressure"], color='k', marker='o', markersize=3*font_scale, lw=1, label=f"Initial {species}", ) # Final concentration (Ref) axes_subplot.plot( ref_data["Final Concentration (molec/cm3)"].astype(np.float64), ref_data["Pressure"], color='r', marker='o', markersize=3*font_scale, lw=1, label=f"{ref_label}", ) # Final concentration (Dev) if dev_data is not None: axes_subplot.plot( dev_data["Final Concentration (molec/cm3)"].astype(np.float64), dev_data["Pressure"], color='b', marker='o', markersize=3*font_scale, lw=1, label=f"{dev_label}", ) # Set X and Y axis labels axes_subplot.set_xlabel( f"{species} (molec/cm3)", fontsize=8*font_scale, ) # Apply y-axis label only if this is a leftmost plot panel if subplot_index == 0 or subplot_index % cols_per_page == 0: axes_subplot.set_ylabel( "Pressure (hPa)", fontsize=8*font_scale, ) # Set Y-axis range and ticks axes_subplot.set_ylim(1020.0, 500.0) axes_subplot.set_yticks([1000, 900, 800, 700, 600, 500]) axes_subplot.tick_params( axis='both', which='major', labelsize=8*font_scale )
[docs] def kppsa_plot_one_page( pdf, site_names, ref_dframe, ref_label, dev_dframe, dev_label, species="O3", rows_per_page=3, cols_per_page=3, font_scale=1.0, ): """ Plots a single page of Ref vs. Dev vertical profiles for a list of observation sites and saves it to a PDF file. Parameters ---------- pdf : matplotlib.backends.backend_pdf.PdfPages Open PDF file object to which the page will be saved. site_names : list of str Names of the observation sites to plot on this page. ref_dframe : pandas.DataFrame KPP-Standalone output for the ``Ref`` model version, as returned by :func:`kppsa_read_csv_files`. ref_label : str Legend label for the ``Ref`` model data. dev_dframe : pandas.DataFrame KPP-Standalone output for the ``Dev`` model version, as returned by :func:`kppsa_read_csv_files`. dev_label : str Legend label for the ``Dev`` model data. species : str, optional Name of the species to plot. Default: ``"O3"`` rows_per_page : int, optional Number of subplot rows on the page. Default: ``3`` cols_per_page : int, optional Number of subplot columns on the page. Default: ``3`` font_scale : float, optional Multiplicative scale factor applied to all font sizes. Default: ``1.0`` Notes ----- The page is landscape-oriented (11" × 8"). A shared legend is placed at the top of the page. Vertical spacing between subplots is adjusted automatically. """ # Define a new matplotlib.figure.Figure object for this page # Landscape width: 11" x 8" fig = plt.figure(figsize=(11, 8)) fig.tight_layout() # Loop over all of the stations that fit on the page for subplot_index, site_name in enumerate(site_names): # Get the data for the given species and site ref_site_data, site_title = kppsa_prepare_site_data( ref_dframe, site_name, species, ) dev_site_data, _ = kppsa_prepare_site_data( dev_dframe, site_name, species, ) # Plot species vertical profile at a given site kppsa_plot_single_site( fig, rows_per_page, cols_per_page, subplot_index, site_title, ref_site_data, ref_label, dev_site_data, dev_label, species, font_scale, ) # Add extra spacing around plots plt.subplots_adjust(hspace=0.4, top=0.9) # Add top-of-page legend plt.legend( ncol=3, bbox_to_anchor=(0.5, 0.98), bbox_transform=fig.transFigure, loc='upper center' ) # Save this page to the PDF file pdf.savefig(fig)
[docs] def kppsa_get_unique_site_names(dframe): """ Returns a list of unique observation site names from KPP-Standalone output, ordered from north to south. Parameters ---------- dframe : pandas.DataFrame KPP-Standalone output data as returned by :func:`kppsa_read_csv_files`. Returns ------- unique_site_names : list of str Unique site names sorted by descending latitude (north to south). """ # Sort the sites from north to south site_names = dframe[["Location", "Latitude"]].sort_values( by="Latitude", ascending=False ) # Get a list of unique site names, preserving the ordering unique_site_names = [] seen = set() for site_name in site_names["Location"]: if site_name not in seen: seen.add(site_name) unique_site_names.append(site_name) return unique_site_names