Source code for pycmor.data_request.table

import json
import pathlib
from abc import abstractmethod
from dataclasses import dataclass
from importlib.resources import files
from typing import List

import pendulum
from semver.version import Version

from ..core.factory import MetaFactory
from ..core.logging import logger
from .variable import (
    CMIP6DataRequestVariable,
    CMIP7DataRequestVariable,
    DataRequestVariable,
)

################################################################################
# BLUEPRINTS: Abstract classes for the data request tables
################################################################################


[docs] @dataclass class DataRequestTable(metaclass=MetaFactory): """Abstract base class for a generic data request table.""" @property @abstractmethod def table_name(self) -> str: """Name of the table.""" raise NotImplementedError @property def table_id(self) -> str: """Alias for table_name.""" return self.table_name @property @abstractmethod def variables(self) -> List[DataRequestVariable]: """List of variables in the table.""" raise NotImplementedError
[docs] @abstractmethod def get_variable(self, name: str) -> DataRequestVariable: """Retrieve a variable's details by name.""" raise NotImplementedError
@property @abstractmethod def header(self) -> "DataRequestTableHeader": """Header of the table.""" raise NotImplementedError
[docs] @classmethod @abstractmethod def from_dict(cls, data: dict) -> "DataRequestTable": """Create a DataRequestTable from a dictionary.""" raise NotImplementedError
[docs] @classmethod @abstractmethod def table_dict_from_directory(cls, path: str) -> dict: """Create a dictionary of tables from a directory.""" raise NotImplementedError
################################################################################ # TODO(PG): In general, this class needs to be reworked to determine which fields # are generic and which are specific to CMIP6 or CMIP7. The current implementation # was on CMIP6, under the assumption that all fields will also be present in CMIP7.
[docs] @dataclass class DataRequestTableHeader(metaclass=MetaFactory): @property @abstractmethod def data_specs_version(self) -> Version: """Data specifications version.""" raise NotImplementedError @property @abstractmethod def cmor_version(self) -> Version: """CMOR version.""" raise NotImplementedError @property @abstractmethod def table_id(self) -> str: """Name of the table.""" raise NotImplementedError @property @abstractmethod def realm(self) -> str: """Realm of the table.""" raise NotImplementedError @property @abstractmethod def table_date(self) -> pendulum.date: """Date of the table.""" raise NotImplementedError @property @abstractmethod def missing_value(self) -> float: """Missing Value""" raise NotImplementedError @property @abstractmethod def int_missing_value(self) -> int: """Integer missing value""" raise NotImplementedError @property @abstractmethod def product(self) -> str: """Product""" raise NotImplementedError @property @abstractmethod def approx_interval(self) -> float or None: """Approximate interval (time in days)""" raise NotImplementedError # TODO(PG): Find out if this is needed for *all* tables, or if it is # something specific only to CMIP6! @property @abstractmethod def generic_levels(self) -> List[str]: """Generic levels""" raise NotImplementedError @property @abstractmethod def mip_era(self) -> str: """MIP era""" raise NotImplementedError @property @abstractmethod def Conventions(self) -> str: """Conventions""" raise NotImplementedError
[docs] @classmethod @abstractmethod def from_dict(cls, data: dict) -> "DataRequestTableHeader": """Create a DataRequestTableHeader from a dictionary.""" raise NotImplementedError
################################################################################ # END BLUEPRINTS ################################################################################
[docs] @dataclass class CMIP7DataRequestTableHeader(DataRequestTableHeader): ############################################################################ # Attributes without known defaults: _table_id: str _realm: List[str] _approx_interval: float # Optional _generic_levels: List[str] @property def table_id(self) -> str: return self._table_id @property def realm(self) -> List[str]: return self._realm @property def approx_interval(self) -> float: return self._approx_interval @property def generic_levels(self) -> List[str]: return self._generic_levels ############################################################################ ############################################################################ # Attributes with known defaults: _data_specs_version: Version = Version.parse("1", optional_minor_and_patch=True) _cmor_version: Version = Version.parse("3.5", optional_minor_and_patch=True) _mip_era: str = "CMIP7" _Conventions: str = "CF-1.7 CMIP-7.0" _missing_value: float = 1.0e20 _int_missing_value: int = -999 _product: str = "model-output" # NOTE(PG): We refer here to the CMIP7 Data Request publication date, which # is on GitHub: https://github.com/CMIP-Data-Request/CMIP7_DReq_Software/tree/v1.0 # Tag was created on: 22 Nov 2024 _table_date: pendulum.Date = pendulum.Date(2024, 11, 22) @property def data_specs_version(self) -> Version: return self._data_specs_version @property def cmor_version(self) -> Version: return self._cmor_version @property def mip_era(self) -> str: return self._mip_era @property def Conventions(self) -> str: return self._Conventions @property def missing_value(self) -> float: return self._missing_value @property def int_missing_value(self) -> int: return self._int_missing_value @property def product(self) -> str: return self._product @property def table_date(self) -> pendulum.Date: return self._table_date ############################################################################ ############################################################################ # Constructor methods:
[docs] @classmethod def from_all_var_info( cls, table_name: str, all_var_info: dict = None ) -> "CMIP7DataRequestTableHeader": if all_var_info is None: _all_var_info = files("pycmor.data.cmip7").joinpath("all_var_info.json") all_var_info = json.load(open(_all_var_info, "r")) all_vars_for_table = { k: v for k, v in all_var_info["Compound Name"].items() if k.startswith(table_name) } attrs_for_table = { "realm": set(), "approx_interval": set(), } for var in all_vars_for_table.values(): attrs_for_table["realm"].add(var["modeling_realm"]) attrs_for_table["approx_interval"].add( cls._approx_interval_from_frequency(var["frequency"]) ) # We assume that all variables in the table have the same approx_interval # If not, we need to raise an error if len(attrs_for_table["approx_interval"]) != 1: raise ValueError( f"approx_interval in the table is not consistent: {attrs_for_table['approx_interval']}" ) # Build a table header, always using defaults for known fields return cls( _table_id=table_name, _realm=list(attrs_for_table["realm"]), _approx_interval=attrs_for_table["approx_interval"].pop(), _generic_levels=[], )
############################################################################ ############################################################################ # Static methods: Useful stuff that doesn't need to be on an instance
[docs] @staticmethod def _approx_interval_from_frequency(frequency: str) -> float: if frequency == "1hr": return 1.0 / 24.0 if frequency == "3hr": return 0.125 if frequency == "6hr": return 0.25 if frequency == "day": return 1.0 if frequency == "dec": return 365.0 * 10.0 if frequency == "fx": return None # Maybe this should be 0.0? if frequency == "mon": return 30.0 if frequency == "subhr": return 1.0 / 60.0 # Not sure about this one... if frequency == "yr": return 365.0 raise ValueError(f"Frequency {frequency} not recognized.")
[docs] @dataclass class CMIP6DataRequestTableHeader(DataRequestTableHeader): ############################################################################ # NOTE(PG): The defaults here refer to the CMIP6 Data Request Tables # found in commit 1131220 of the cmip6-cmor-tables repository. Some # of these defaults might not be correct for later versions. # # Manual cleanup in the hard-coded defaults: # - data_specs_version: "01.00.33" -> "1.0.33" to match semver ############################################################################ # Properties without defaults: # ---------------------------- _table_id: str _realm: List[str] _table_date: pendulum.Date _approx_interval: float # Optional _generic_levels: List[str] # Properties with known defaults: # ------------------------------- # NOTE(PG): I don't like doing it this way, but it is fastest to # implement for right by now... # Key: Value --> Old: New _HARD_CODED_DATA_SPECS_REPLACEMENTS = { "01.00.33": "1.0.33", "01.00.27": "1.0.27", } _data_specs_version: Version = Version.parse( "1.0.33", optional_minor_and_patch=True, ) _cmor_version: Version = Version.parse( "3.5", optional_minor_and_patch=True, ) _mip_era: str = "CMIP6" _Conventions: str = "CF-1.7 CMIP-6.2" _missing_value: float = 1.0e20 _int_missing_value: int = -999 _product: str = "model-output"
[docs] @classmethod def from_dict(cls, data: dict) -> "CMIP6DataRequestTableHeader": # The input dict needs to have these, since we have no defaults: extracted_data = dict( _table_id=data["table_id"].lstrip("Table "), _realm=[data["realm"]], _table_date=pendulum.parse(data["table_date"], strict=False).date(), # This might be None, if the approx interval is an empty string... _approx_interval=( float(data["approx_interval"]) if data["approx_interval"] else None ), _generic_levels=data["generic_levels"].split(" "), ) # Optionally get the rest, which might not be present: for key in cls.__dataclass_fields__.keys(): if key.lstrip("_") in data and key not in extracted_data: extracted_data[key] = data[key.lstrip("_")] # Handle Version conversions if "_data_specs_version" in extracted_data: for old_value, new_value in cls._HARD_CODED_DATA_SPECS_REPLACEMENTS.items(): extracted_data["_data_specs_version"] = extracted_data[ "_data_specs_version" ].replace(old_value, new_value) extracted_data["_data_specs_version"] = Version.parse( extracted_data["_data_specs_version"], optional_minor_and_patch=True, ) if "_cmor_version" in extracted_data: extracted_data["_cmor_version"] = Version.parse( extracted_data["_cmor_version"], optional_minor_and_patch=True, ) # Handle types for missing_value and int_missing_value if "_missing_value" in extracted_data: extracted_data["_missing_value"] = float(extracted_data["_missing_value"]) if "_int_missing_value" in extracted_data: extracted_data["_int_missing_value"] = int( extracted_data["_int_missing_value"] ) return cls(**extracted_data)
@property def table_id(self) -> str: return self._table_id @property def realm(self) -> List[str]: return self._realm @property def table_date(self) -> pendulum.Date: return self._table_date @property def missing_value(self) -> float: return self._missing_value @property def int_missing_value(self) -> int: return self._int_missing_value @property def product(self) -> str: return self._product @property def approx_interval(self) -> float: return self._approx_interval @property def generic_levels(self) -> List[str]: return self._generic_levels @property def mip_era(self) -> str: return self._mip_era @property def Conventions(self) -> str: return self._Conventions @property def data_specs_version(self) -> Version: return self._data_specs_version @property def cmor_version(self) -> Version: return self._cmor_version
################################################################################
[docs] @dataclass class CMIP6JSONDataRequestTableHeader(CMIP6DataRequestTableHeader):
[docs] @classmethod def from_json_file(cls, jfile) -> "CMIP6JSONDataRequestTableHeader": with open(jfile, "r") as f: data = json.load(f) header = data["Header"] return cls.from_dict(header)
################################################################################
[docs] class CMIP6DataRequestTable(DataRequestTable): """DataRequestTable for CMIP6.""" # FIXME(PG): This might bite itself in the ass... def __init__( self, header: CMIP6DataRequestTableHeader, variables: List[DataRequestVariable], ): self._header = header self._variables = variables @property def variables(self) -> List[str]: return self._variables @property def header(self) -> CMIP6DataRequestTableHeader: return self._header @property def table_name(self) -> str: return self.header.table_id
[docs] def get_variable(self, name: str, find_by="name") -> DataRequestVariable: """Returns the first variable with the matching name. Parameters ---------- name : str Returns ------- DataRequestVariable """ for v in self._variables: if getattr(v, find_by) == name: return v raise ValueError( f"A Variable with the attribute {find_by}={name} not found in the table." )
[docs] @classmethod def from_dict(cls, data: dict) -> "CMIP6DataRequestTable": header = CMIP6DataRequestTableHeader.from_dict(data["Header"]) variables = [ CMIP6DataRequestVariable.from_dict(v) for v in data["variable_entry"].values() ] return cls(header, variables)
[docs] @classmethod def table_dict_from_directory(cls, path) -> dict: # We need to know which files to skip... _skip_files = [ "CMIP6_CV_test.json", "CMIP6_coordinate.json", "CMIP6_CV.json", "CMIP6_formula_terms.json", "CMIP6_grids.json", "CMIP6_input_example.json", ] path = pathlib.Path(path) # noop if already a Path tables = {} for file in path.iterdir(): if file.name in _skip_files: continue if file.is_file() and file.suffix == ".json": table = cls.from_json_file(file) tables[table.table_id] = table return tables
[docs] @classmethod def from_json_file(cls, jfile) -> "CMIP6DataRequestTable": with open(jfile, "r") as f: data = json.load(f) return cls.from_dict(data)
################################################################################
[docs] @dataclass class CMIP7DataRequestTable(DataRequestTable): """DataRequestTable for CMIP7.""" # FIXME(PG): This might bite itself in the ass... def __init__( self, header: CMIP7DataRequestTableHeader, variables: List[DataRequestVariable], ): self._header = header self._variables = variables @property def variables(self) -> List[str]: return self._variables @property def header(self) -> CMIP7DataRequestTableHeader: return self._header @property def table_name(self) -> str: return self.header.table_id
[docs] def get_variable(self, name: str, find_by="name") -> DataRequestVariable: """Returns the first variable with the matching name. Parameters ---------- name : str Returns ------- DataRequestVariable """ for v in self._variables: if getattr(v, find_by) == name: return v raise ValueError( f"A Variable with the attribute {find_by}={name} not found in the table." )
[docs] @classmethod def from_dict(cls, data: dict) -> "CMIP7DataRequestTable": header = CMIP7DataRequestTableHeader.from_dict(data["Header"]) variables = [] for var_key, var_data in data["Compound Name"].items(): table_name, var_name = var_key.split(".") var_data["table_name"] = table_name var_data["name"] = var_name variables.append(CMIP7DataRequestVariable.from_dict(var_data)) return cls(header, variables)
[docs] @classmethod def from_all_var_info_json(cls, table_name: str) -> "CMIP7DataRequestTable": _all_var_info = files("pycmor.data.cmip7").joinpath("all_var_info.json") all_var_info = json.load(open(_all_var_info, "r")) return cls.from_all_var_info(table_name, all_var_info)
[docs] @classmethod def from_all_var_info(cls, table_name: str, all_var_info: dict = None): if all_var_info is None: _all_var_info = files("pycmor.data.cmip7").joinpath("all_var_info.json") all_var_info = json.load(open(_all_var_info, "r")) header = CMIP7DataRequestTableHeader.from_all_var_info(table_name, all_var_info) variables = [] for var_name, var_dict in all_var_info["Compound Name"].items(): if var_dict["cmip6_cmor_table"] == table_name: variables.append(CMIP7DataRequestVariable.from_dict(var_dict)) return cls(header, variables)
[docs] @classmethod def table_dict_from_directory(cls, path) -> dict: path = pathlib.Path(path) # noop if already a Path tables = {} try: with open(path / "all_var_info.json", "r") as f: all_var_info = json.load(f) except FileNotFoundError: logger.error(f"No all_var_info.json found in {path}.") logger.error( "It is currently possible to only create tables from the all_var_info.json file!" ) logger.error("Sorry...") raise FileNotFoundError table_ids = set(k.split(".")[0] for k in all_var_info["Compound Name"].keys()) for table_id in table_ids: table = cls.from_all_var_info(table_id, all_var_info) tables[table_id] = table return tables
[docs] @classmethod def from_json_file(cls, jfile) -> "CMIP7DataRequestTable": with open(jfile, "r") as f: data = json.load(f) return cls.from_dict(data)
@property def table_id(self) -> str: """Alias for table_name.""" return self.table_name
################################################################################