Source code for gcpy.benchmark.modules.benchmark_gcclassic_stats

#!/usr/bin/env python3
r"""
Script to scrape statistics from a 1-month GEOS-Chem Classic benchmark run,
which can then be placed in the "GEOS-Chem 1-month Benchmark Stats"
Google spreadsheet.

Examples
--------

.. code-block:: console

   $ conda activate gcpy_env
   $ python -m gcpy.benchmark.modules.benchmark_scrape_gcclassic_stats \
     14.5.0-alpha.5 \
     14.5.0-alpha.6
"""
import sys
import requests
from gcpy.util import replace_whitespace, verify_variable_type

# ----------------------------------------------------------------------
# Global variables
# ----------------------------------------------------------------------

ROOT = "https://s3.amazonaws.com/benchmarks-cloud"

LOG_TEMPLATE = f"{ROOT}/benchmarks/1Mon/gcc/ID/RunGCC.txt"

METRICS_TEMPLATE = f"{ROOT}/diff-plots/1Mon/ID/BenchmarkResults/Tables/OH_metrics.txt"

TIMERS = [
    "GEOS-Chem                     :",
    "HEMCO                         :",
    "=> Gas-phase chem             :",
    "=> Photolysis                 :",
    "=> Aerosol chem               :",
    "=> Linearized chem            :",
    "Transport                     :",
    "Convection                    :",
    "Boundary layer mixing         :",
    "Dry deposition                :",
    "Wet deposition                :",
    "Diagnostics                   :",
    "Unit conversions              :",
]

# ----------------------------------------------------------------------
# Functions
# ----------------------------------------------------------------------





[docs] def format_timer(timer): """ Strips spaces and preceding "=>" characters from a GEOS-Chem Classic timer name """ return timer.strip().replace("=> ", "").replace(":", "")
[docs] def parse_timer(timer): """ Extracts the timer name and time in seconds from the given text. Parameters ---------- timer : str Line of text with GEOS-Chem Classic timing output. """ sub_strings = timer.split(":") timer = format_timer(sub_strings[0]) seconds = sub_strings[3].split()[1].strip() return timer, seconds
[docs] def scrape_stats(text): """ Extracts timing statistics and OH metrics from the given text. Parameters ---------- text : str Text scraped from the log file and metrics file. """ # Copy global variable to local for efficiency timers = TIMERS # Define empty dictionary for output and a counter stats = {} line_count = 0 # Read the text backwards since the timers and OH are at the end for line in reversed(text.splitlines()): # Skip reading the rest of the file once we have # found the start of the timers section if "G E O S - C H E M T I M E R S" in line: break # Look for the various metrics if line_count == 2 and "Dev" in line: stats["CH4"] = line.split(":")[1].strip() if line_count == 10 and "Dev" in line: stats["CH3CCl3"] = line.split(":")[1].strip() if line_count == 18 and "Dev" in line: stats["Mean OH"] = line.split(":")[1].strip() # Skip commands if "++ sed" in line: line_count += 1 continue # Wall time if "wall clock" in line: stats["Wall Time"] = line.split("m:ss):")[1].strip() # Memory (GB) if "Maximum resident set size" in line: stats["Memory"] = str(float(line.split(":")[1]) / 1.0e6).strip() # GEOS-Chem Classic timers for timer in timers: if timer in line: timer, seconds = parse_timer(line) stats[timer] = str(round(float(seconds))) # Increment counter line_count += 1 return stats
[docs] def get_text_from_web(url): """ Returns the text from a file located on the web. Parameters ---------- url : str URL of the file to be parsed. """ try: text = requests.get(url, timeout=10).text except FileNotFoundError as exc: err_msg = f"Could not download {url} from AWS!" raise FileNotFoundError(err_msg) from exc return text
[docs] def main(ref_label, dev_label): """ Main program. Given the labels from two benchmark simulations (ref and dev), downloads the relevant files from AWS and passes the text to function "scrape_info" where it will be analyzed. Parameters ---------- ref_label : str Label for the Ref version. dev_label : str Label for the Dev version. """ verify_variable_type(ref_label, str) verify_variable_type(dev_label, str) # Replace whitespace in the ref and dev labels ref_label = replace_whitespace(ref_label) dev_label = replace_whitespace(dev_label) # Scrape the log file text into a variable bmk_id = f"gcc-4x5-1Mon-{dev_label}" text = get_text_from_web(LOG_TEMPLATE.replace("ID", bmk_id)) # Append the metrics file text bmk_id = f"diff-gcc-4x5-1Mon-{ref_label}-gcc-4x5-1Mon-{dev_label}" text += get_text_from_web(METRICS_TEMPLATE.replace("ID", bmk_id)) # Scrape the relevant statistics from the text and print to stdout stats = scrape_stats(text) print_stats(stats)
# ---------------------------------------------------------------------- # For use from the command line # ---------------------------------------------------------------------- if __name__ == '__main__': if len(sys.argv) != 3: ERR_MSG = "Usage: stats.py REF-LABEL DEV-LABEL" raise ValueError(ERR_MSG) main(sys.argv[1], sys.argv[2])