#!/usr/bin/env python
# coding: utf-8
"""
Plots ozone sonde data versus data from GEOS-Chem 1 year
benchmark simulations.
"""
import os
import numpy as np
import pandas as pd
import xarray as xr
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
from gcpy.cstools import extract_grid
from gcpy.grid import get_nearest_model_data, get_vert_grid
from gcpy.util import make_directory, replace_whitespace, verify_variable_type
from gcpy.benchmark.modules.benchmark_utils import \
read_ref_and_dev, rename_speciesconc_to_speciesconcvv
# Tell matplotlib not to look for an X-window
os.environ["QT_QPA_PLATFORM"] = "offscreen"
[docs]
def get_ref_and_dev_model_data(
ref_filepaths,
dev_filepaths,
varname="SpeciesConcVV_O3",
):
"""
Returns GEOS-Chem model ozone data.
Parameters
----------
ref_filepaths : str or list
Path(s) to the Ref model data files.
dev_filepaths : str or list
Path(s) to the Dev model data files.
varname : str, optional
Variable name to read.
Returns
-------
ref_data : xr.DataArray
Ozone data from the Ref model [ppbv].
dev_data : xr.DataArray
Ozone data from the Dev model [ppbv].
"""
verify_variable_type(ref_filepaths, (str,list))
verify_variable_type(dev_filepaths, (str,list))
# Get the model data
ref_data, dev_data = read_ref_and_dev(
ref_filepaths,
dev_filepaths,
multi_file=True
)
# Rename variables starting with "SpeciesConc_" to "SpeciesConcVV_",
# for backwards compatibility with GC versions prior to 14.1.0.
ref_data = rename_speciesconc_to_speciesconcvv(ref_data)
dev_data = rename_speciesconc_to_speciesconcvv(dev_data)
# Return the species of interest and convert to ppbv
return ref_data[varname]*1.0e9, dev_data[varname]*1.0e9
[docs]
def get_obs_ozone_data(file_path):
"""
Returns the ozone sonde observations as a pandas DataFrame object.
Parameters
----------
file_path : str
Path to ozone sonde file.
Returns
-------
data : pd.DataFrame
Sonde observations (ppbv).
"""
verify_variable_type(file_path, str)
data = pd.read_csv(file_path, delimiter=',', index_col=0)
data = data.rename(columns={"Latitude": "lat", "Longitude": "lon"})
return data
[docs]
def get_site_coords(
obs_data,
obs_site_metadata,
site_name,
):
"""
Returns the lon, lat, and surface pressure at each sonde site.
Parameters
----------
obs_data : pd.DataFrame
Observational data.
obs_site_metadata : pd.DataFrame
Surface pressure at observation sites.
site_name : str
Observation site name.
Returns
-------
lat : float
Latitude at observation site.
lon : float
Longitude at observation site.
p_sfc : float
Surface pressure (hPa) at obs site.
"""
search = obs_data['Site'] == site_name
lat = obs_data[search]['lat'].iloc[0]
lon = obs_data[search]['lon'].iloc[0]
search = obs_site_metadata['Site'] == site_name
p_sfc = obs_site_metadata[search]["Surface Pressure (hPa)"].iloc[0]
return lat, lon, p_sfc
[docs]
def get_seasonal_means(
obs_data,
ref_data,
dev_data,
months,
varname="SpeciesConcVV_O3",
):
"""
Returns seasonally averaged data for the observations
and models.
Parameters
----------
obs_data : pd.DataFrame
O3 observations (ppbv).
ref_data : pd.DataFrame
O3 from Ref model (ppbv).
dev_data : pd.DataFrame
O3 from Dev model (ppbv).
months : list
Months in each season.
varname : str, optional
GEOS-Chem diagnostic variable name.
Returns
-------
obs_seas_mean : pd.DataFrame
Seasonal mean O3 observations (ppbv).
std_dev : pd.DataFrame
Standard deviation in mean_obs (ppbv).
ref_seas_mean : pd.DataFrame
Seasonal mean O3 from Ref (ppbv).
dev_seas_mean : pd.DataFrame
Seasonal mean O3 from Dev (ppbv).
"""
verify_variable_type(obs_data, pd.DataFrame)
verify_variable_type(ref_data, pd.DataFrame)
verify_variable_type(dev_data, pd.DataFrame)
verify_variable_type(months, list)
# Filter data for season
obs_this_season = obs_data[(obs_data['month'].isin(months))]
ref_this_season = ref_data[(ref_data["month"].isin(months))]
dev_this_season = dev_data[(dev_data["month"].isin(months))]
# Take the seasonal means
obs_seas_mean = obs_this_season.groupby('pressure')['o3_ppb'].mean()
std_dev = obs_this_season.groupby('pressure')['o3_ppb_sd'].mean()
ref_seas_mean = ref_this_season.groupby('pressure')[varname].mean()
dev_seas_mean = dev_this_season.groupby('pressure')[varname].mean()
return obs_seas_mean, std_dev, ref_seas_mean, dev_seas_mean
[docs]
def get_nearest_model_data_to_obs(
data,
lon,
lat,
p_sfc,
cs_grid=None,
):
"""
Returns the nearest GEOS-Chem data to an observation. Also
inserts the GEOS-Chem pressure levels into the dataset.
Parameters
----------
data : xr.DataArray
GEOS-Chem data.
lon : float
Longitude at obs site.
lat : float
Latitude at obs site.
p_sfc : float
Surface pressure at obs site.
cs_grid : xr.Dataset or None, optional
Cubed-sphere grid metadata.
Returns
-------
nearest : pd.DataFrame
GEOS-Chem data nearest to the observation site, with pressure
and month columns added.
"""
verify_variable_type(data, xr.DataArray)
verify_variable_type(cs_grid, (xr.Dataset, type(None)))
# Nearest data to the observation (pd.DataFrame)
nearest = get_nearest_model_data(
data,
lon,
lat,
cs_grid
).reset_index()
# Vertical pressure grid for GEOS-Chem.
_, p_mid, _ = get_vert_grid(data, p_sfc=p_sfc)
# Repeat surface pressure for 12 months of data
p_mid = np.tile(p_mid, 12)
# Add "Pressure" and "month" columns
nearest.insert(4, "pressure", p_mid)
nearest["month"] = nearest["time"].dt.month
return nearest
[docs]
def plot_one_site(
axes_subplot,
season,
season_idx,
site_idx,
obs_seas_mean,
std_dev,
ref_label,
ref_seas_mean,
dev_label,
dev_seas_mean,
):
"""
Plots ozonesonde vs. model data for all seasons at a single site
(i.e. one column of a page).
Parameters
----------
axes_subplot : matplotlib.axes.Axes
The current subplot.
season : str
Name of the season.
season_idx : int
Index of the current season.
site_idx : int
Index of the current site.
obs_seas_mean : pd.Series
Seasonal mean O3 obs (ppbv).
std_dev : pd.Series
Std dev of mean_obs (ppbv).
ref_label : str
Label for the Ref version.
ref_seas_mean : pd.Series
Seasonal mean O3 from Ref (ppbv).
dev_label : str
Label for the Dev version.
dev_seas_mean : pd.Series
Seasonal mean O3 from Dev (ppbv).
"""
verify_variable_type(season, str)
verify_variable_type(season_idx, int)
verify_variable_type(site_idx, int)
verify_variable_type(obs_seas_mean, pd.Series)
verify_variable_type(std_dev, pd.Series)
verify_variable_type(ref_label, str)
verify_variable_type(ref_seas_mean, pd.Series)
verify_variable_type(dev_label, str)
verify_variable_type(dev_seas_mean, pd.Series)
# Plotting observations with error bars
axes_subplot.errorbar(
obs_seas_mean,
obs_seas_mean.index,
xerr=std_dev,
fmt='-o',
color='black',
markersize=3,
label='Observations'
)
# Plotting model data
axes_subplot.plot(
ref_seas_mean,
ref_seas_mean.index,
color='r',
marker='o',
markersize=3,
lw=1,
label=ref_label
)
axes_subplot.plot(
dev_seas_mean,
dev_seas_mean.index,
color='g',
marker='o',
markersize=3,
lw=1,
label=dev_label,
)
# Inverting y-axis and setting y-axis to start from 1000 hPa
axes_subplot.invert_yaxis()
axes_subplot.set_ylim(1000, 50)
# Setting x-axis scale to 0-160 ppb
axes_subplot.set_xlim(0, 160)
# X/Y ticks
axes_subplot.tick_params(axis='both', labelsize=12)
# Get current x-axis tick values
xticks = axes_subplot.get_xticks()
# Exclude '0' from the x-axis tick labels
xtick_labels = [f'{tick:.0f}' if tick != 0 else '' for tick in xticks]
# Set modified tick labels
axes_subplot.set_xticks(xticks)
axes_subplot.set_xticklabels(xtick_labels)
# Add a legend in the fourth column and first row
if season_idx == 1 and site_idx == 4:
axes_subplot.legend(
loc='lower right',
fontsize='large'
)
# Adding latitude and season as text inside the plot
if site_idx == 1:
axes_subplot.text(
0.95,
0.2,
f'{season}',
transform=axes_subplot.transAxes,
fontsize=20,
verticalalignment='top',
horizontalalignment="right",
)
[docs]
def page_adjustments(fig):
"""
Adjusts the page settings after all the subplots have been made.
Parameters
----------
fig : matplotlib.figure.Figure
Figure object.
"""
# Eliminating space between subplots completely
plt.subplots_adjust(wspace=0, hspace=0)
# Adjust subplot layout
plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)
# Setting common labels
fig.text(
0.5,
0.01,
'Ozone Concentration (ppb)',
ha='center',
fontsize=20
)
fig.text(
0.0,
0.5,
'Pressure (hPa)',
va='center',
rotation='vertical',
fontsize=20
)
[docs]
def sort_sites_by_lat(
obs_data
):
"""
Returns a list of sonde sites sorted from N to S in latitude.
Parameters
----------
obs_data : pd.DataFrame
Observations from sondes (all sites).
Returns
-------
site_names : list
Sorted list of site names N to S.
"""
sites = obs_data[['Site', "lat"]].sort_values(by=["lat"], ascending=False)
return sites["Site"].unique()
[docs]
def plot_the_data(
obs_data,
obs_site_metadata,
ref_label,
ref_data,
dev_label,
dev_data,
dst="./benchmark",
varname="SpeciesConcVV_O3",
):
"""
Creates plots of model data vs. ozonesonde data.
Parameters
----------
obs_data : pd.DataFrame
Observational data.
obs_site_metadata : pd.DataFrame
Surface pressure at observation sites.
ref_label : str
Label for Ref model data.
ref_data : xr.DataArray
Ref model data.
dev_label : str
Label for Dev model data.
dev_data : xr.DataArray
Dev model data.
dst : str, optional
Destination folder.
varname : str, optional
Name of variable to plot.
"""
# ==================================================================
# Initialization
# ==================================================================
# Get cubed-sphere metadata (will be None for non-CS grids)
# This is needed to find the nearest grid box to the observations
ref_cs_grid = extract_grid(ref_data)
dev_cs_grid = extract_grid(dev_data)
# Define seasons
seasons = {
'DJF': [12, 1, 2],
'JJA': [3, 4, 5],
'MAM': [6, 7, 8],
'SON': [9, 10, 11]
}
# List of site names from N to S latitude
sorted_sites = sort_sites_by_lat(obs_data)
# Creating the PDF with no space between subplots
# Open the plot as a PDF document
pdf_file = f"{dst}/models_vs_sondes.{varname.split('_')[1]}.pdf"
pdf_pages = PdfPages(pdf_file)
# ==================================================================
# Loop over pages (4 sites per page)
# ==================================================================
for page_idx in range(0, len(sorted_sites), 4):
fig, axes = plt.subplots(
4, 4,
figsize=(15, 15),
sharex=True,
sharey=True
)
# Selecting the next four sites for this page
sites_for_page = sorted_sites[page_idx:page_idx+4]
# ==============================================================
# Loop over sites
# ==============================================================
for site_idx, site in enumerate(sites_for_page, 1):
# Get coordinates of observation site
lat, lon, p_sfc = get_site_coords(
obs_data,
obs_site_metadata,
site,
)
# Get nearest model data to the observation as pd.DataFrame
nearest_ref = get_nearest_model_data_to_obs(
ref_data,
lon,
lat,
p_sfc,
cs_grid=ref_cs_grid,
)
nearest_dev = get_nearest_model_data_to_obs(
dev_data,
lon,
lat,
p_sfc,
cs_grid=dev_cs_grid,
)
# Adding site names at the top of each column
axes[0, site_idx-1].set_title(f'{site} ({lat}°)', size=15)
# ==========================================================
# Loop over seasons (DJF, JJA, MAM, SON)
# ==========================================================
for season_idx, (season, months) in enumerate(seasons.items(), 1):
# Get the seasonal means of observations & model data
obs_seas_mean, std_dev, \
ref_seas_mean, dev_seas_mean = get_seasonal_means(
obs_data[obs_data["Site"] == site],
nearest_ref,
nearest_dev,
months
)
# Create a plot for a single site (all seasons)
axes_subplot = axes[season_idx-1, site_idx-1]
plot_one_site(
axes_subplot,
season,
season_idx,
site_idx,
obs_seas_mean,
std_dev,
ref_label,
ref_seas_mean,
dev_label,
dev_seas_mean,
)
# ================================================================
# Global page adjustments
# ================================================================
page_adjustments(fig)
pdf_pages.savefig()
plt.close()
# ================================================================
# Save the PDF
# ================================================================
pdf_pages.close()
[docs]
def make_benchmark_models_vs_sondes_plots(
obs_data_file,
obs_site_file,
ref_filepaths,
ref_label,
dev_filepaths,
dev_label,
dst="./benchmark",
overwrite=False,
varname="SpeciesConcVV_O3",
):
"""
Creates plots of sonde data vs. GEOS-Chem output. For use in the
1-year benchmark plotting workflow.
Parameters
----------
obs_data_file : str
File containing sonde data.
obs_site_file : str
File containing sonde site metadata.
ref_filepaths : str or list
Files for the GEOS-Chem Ref version.
ref_label : str
GEOS-Chem Ref version label.
dev_filepaths : str or list
Files for the GEOS-Chem Dev version.
dev_label : str
GEOS-Chem Dev version label.
dst : str, optional
Folder where PDF w/ plots will be created.
overwrite : bool, optional
Overwrite contents of dst folder?
varname : str, optional
GEOS-Chem diagnostic variable name.
"""
verify_variable_type(obs_data_file, str)
verify_variable_type(obs_site_file, str)
verify_variable_type(ref_filepaths, (str, list))
verify_variable_type(ref_label, str)
verify_variable_type(dev_filepaths, (str, list))
verify_variable_type(dev_label, str)
# Replace whitespace in the ref and dev labels
ref_label = replace_whitespace(ref_label)
dev_label = replace_whitespace(dev_label)
# Create the destination folder
make_directory(
dst,
overwrite=overwrite
)
# Get the observational data and site metadata
obs_data = get_obs_ozone_data(obs_data_file)
obs_site_metadata = pd.read_csv(obs_site_file)
# Get the model data from Ref and Dev versions
ref_data, dev_data = get_ref_and_dev_model_data(
ref_filepaths,
dev_filepaths,
varname=varname,
)
# Create plots
plot_the_data(
obs_data,
obs_site_metadata,
ref_label,
ref_data,
dev_label,
dev_data,
dst,
)