Source code for pycmor.data_request.variable

"""
This module defines the ``DataRequestVariable`` abstract base class and its
concrete implementation ``CMIP6DataRequestVariable``.

The ``DataRequestVariable`` class outlines the necessary properties and methods
that any variable class should implement. It includes properties such as frequency,
modeling realm, standard name, units, cell methods, cell measures, long name,
comment, dimensions, out name, type, positive direction, valid minimum and
maximum values, acceptable minimum and maximum mean absolute values, and the
table name.

The ``CMIP6DataRequestVariable`` class is a concrete implementation of the
``DataRequestVariable`` class, specifically for CMIP6 variables. It uses the
``dataclass`` decorator to automatically generate the ``__init__``, ``__repr__``,
and other special methods.

The module also provides class methods for constructing ``DataRequestVariable``
instances from dictionaries and JSON files, as well as a method for converting
a ``DataRequestVariable`` instance to a dictionary representation.
"""

import copy
import json
from abc import abstractmethod
from dataclasses import dataclass
from importlib.resources import files
from typing import Optional

from ..core.factory import MetaFactory


[docs] @dataclass class DataRequestVariable(metaclass=MetaFactory): """Abstract base class for a generic variable.""" _type_strings = { "real": float, "integer": int, "string": str, "double": float, "float": float, "char": str, "int": int, "long": int, "short": int, "boolean": bool, "logical": bool, "character": str, } """dict: conversion of string names in the tables to actual Python types""" ################################################################# # Properties a DataRequestVariable needs to have ################################################################# @property @abstractmethod def name(self) -> str: """Name of the variable""" raise NotImplementedError @property def variable_id(self) -> str: """Variable ID""" return self.name @property @abstractmethod def frequency(self) -> str: # Or should this return Frequency? """Frequency of this variable""" raise NotImplementedError @property @abstractmethod def modeling_realm(self) -> str: """Modeling Realm of this variable""" raise NotImplementedError @property @abstractmethod def standard_name(self) -> str: """The CF standard name of the variable""" raise NotImplementedError @property @abstractmethod def units(self) -> str: """The units of the variable""" raise NotImplementedError @property @abstractmethod def cell_methods(self) -> str: """Methods applied to the cell""" # FIXME(PG): I have no idea what this is raise NotImplementedError @property @abstractmethod def cell_measures(self) -> str: """What this cell measure""" # FIXME(PG): I have no idea what this is raise NotImplementedError @property @abstractmethod def long_name(self) -> str: """The CF long name for this variable""" raise NotImplementedError @property @abstractmethod def comment(self) -> str: """Comment for NetCDF attributes""" raise NotImplementedError @property @abstractmethod def dimensions(self) -> tuple[str, ...]: """Dimensions of this variable""" raise NotImplementedError @property @abstractmethod def out_name(self) -> str: """Short name (array name) of this variable""" raise NotImplementedError @property @abstractmethod def typ(self) -> type: """The type of this array: int, float, str""" raise NotImplementedError @property @abstractmethod def positive(self) -> str: """For 3-D variables, which direction is up/down""" raise NotImplementedError @property @abstractmethod def valid_min(self) -> float: """Valid minimum""" raise NotImplementedError @property @abstractmethod def valid_max(self) -> float: """Valid maximum""" raise NotImplementedError @property @abstractmethod def ok_min_mean_abs(self) -> float: """ok minimum, mean, and absolute value""" raise NotImplementedError @property @abstractmethod def ok_max_mean_abs(self) -> float: """ok maximum, mean, and absolute value""" raise NotImplementedError @property @abstractmethod def table_name(self) -> Optional[str]: """The table this variable is define in""" raise NotImplementedError @property @abstractmethod def attrs(self) -> dict: """Attributes to update the Xarray DataArray with""" raise NotImplementedError ################################################################# # Class methods for construction #################################################################
[docs] @classmethod def from_dict(cls, data: dict) -> "DataRequestVariable": """Create a DataRequestVariable instance from a dictionary.""" raise NotImplementedError
[docs] @classmethod def from_json_file(cls, jfile: str, varname: str) -> "DataRequestVariable": """Create a DataRequestVariable instance from a JSON file.""" raise NotImplementedError
################################################################# # Methods for serialization #################################################################
[docs] def to_dict(self) -> dict: """Convert the variable to a dictionary representation""" return self.__dict__
################################################################# # Other methods #################################################################
[docs] @abstractmethod def global_attrs(self, override_dict: dict = None) -> dict: """Global attributes for this variable, used to set on the xr.Dataset""" raise NotImplementedError
[docs] @abstractmethod def clone(self) -> "DataRequestVariable": """Create a copy of this variable""" raise NotImplementedError
[docs] @dataclass class CMIP6DataRequestVariable(DataRequestVariable): _variable_id: str _name: str _frequency: str _modeling_realm: str _standard_name: str _units: str _cell_methods: str _cell_measures: str _long_name: str _comment: str _dimensions: tuple[str, ...] _out_name: str _typ: type _positive: str _valid_min: float _valid_max: float _ok_min_mean_abs: float _ok_max_mean_abs: float _table_name: Optional[str] = None
[docs] @classmethod def from_dict(cls, data: dict) -> "CMIP6DataRequestVariable": """Create a DataRequestVariable instance from a dictionary.""" typ = cls._type_strings.get(data["type"]) if typ is None: raise ValueError(f"Unsupported type: {data['type']}") return cls( # NOTE(PG): This one is self-defined, ``name`` is not in the dict, but useful _name=data["out_name"], _variable_id=data["out_name"], _frequency=data["frequency"], _modeling_realm=data["modeling_realm"], _standard_name=data["standard_name"], _units=data["units"], _cell_methods=data["cell_methods"], _cell_measures=data["cell_measures"], _long_name=data["long_name"], _comment=data["comment"], # NOTE(PG): tuple, because of immutability _dimensions=tuple(data["dimensions"].split(" ")), _out_name=data["out_name"], _typ=cls._type_strings[data["type"]], _positive=data["positive"], _valid_min=data["valid_min"], _valid_max=data["valid_max"], _ok_min_mean_abs=data["ok_min_mean_abs"], _ok_max_mean_abs=data["ok_max_mean_abs"], _table_name=data.get("table_name"), )
@property def name(self) -> str: return self._name @property def frequency(self) -> str: return self._frequency @property def modeling_realm(self) -> str: return self._modeling_realm @property def standard_name(self) -> str: return self._standard_name @property def units(self) -> str: return self._units @property def cell_methods(self) -> str: return self._cell_methods @property def cell_measures(self) -> str: return self._cell_measures @property def long_name(self) -> str: return self._long_name @property def comment(self) -> str: return self._comment @property def dimensions(self) -> tuple[str, ...]: return self._dimensions @property def out_name(self) -> str: return self._out_name @property def typ(self) -> type: return self._typ @property def positive(self) -> str: return self._positive @property def valid_min(self) -> float: return self._valid_min @property def valid_max(self) -> float: return self._valid_max @property def ok_min_mean_abs(self) -> float: return self._ok_min_mean_abs @property def ok_max_mean_abs(self) -> float: return self._ok_max_mean_abs @property def table_name(self) -> Optional[str]: return self._table_name @property def attrs(self) -> dict: return { "standard_name": self.standard_name, "long_name": self.long_name, "units": self.units, "cell_methods": self.cell_methods, "cell_measures": self.cell_measures, "_FillValue": getattr(self, "_FillValue", None), "missing_value": getattr(self, "missing_value", None), }
[docs] def global_attrs(self, override_dict: dict = None) -> dict: """Return a dictionary of global attributes for a CMIP6 variable Parameters ---------- override_dict : dict A dictionary of attributes to override the default values """ override_dict = override_dict or {} # FIXME: This needs to come from the CVs somehow rdict = { "Conventions": None, "activity_id": None, "creation_date": None, "data_specs_version": None, "experiment": None, "experiment_id": None, "forcing_index": None, "frequency": None, "further_info_url": None, "grid": None, "grid_label": None, "initialization_index": None, "institution": None, "institution_id": None, "license": None, "mip_era": None, "nominal_resolution": None, "physics_index": None, "product": None, "realization_index": None, "realm": None, "source": None, "source_id": None, "source_type": None, "sub_experiment": None, "sub_experiment_id": None, "table_id": None, "tracking_id": None, "variable_id": None, "variant_label": None, } rdict.update(override_dict) return rdict
[docs] def clone(self) -> "CMIP6DataRequestVariable": clone = copy.deepcopy(self) return clone
[docs] class CMIP6JSONDataRequestVariable(CMIP6DataRequestVariable):
[docs] @classmethod def from_json_file(cls, jfile: str, varname: str) -> "CMIP6DataRequestVariable": with open(jfile, "r") as f: data = json.load(f) header = data["Header"] table_name = header["table_id"].replace("Table ", "") var_data = data["variable_entry"][varname] var_data["table_name"] = table_name return cls.from_dict(var_data)
[docs] @dataclass class CMIP7DataRequestVariable(DataRequestVariable): # Attributes without defaults _frequency: str _modeling_realm: str _standard_name: str _units: str _cell_methods: str _cell_measures: str _long_name: str _comment: str _dimensions: tuple[str, ...] _out_name: str _typ: type _positive: str _spatial_shape: str _temporal_shape: str _cmip6_cmor_table: str _name: str _table_name: Optional[str] = None
[docs] @classmethod def from_dict(cls, data): extracted_data = dict( _name=data["out_name"], _frequency=data["frequency"], _modeling_realm=data["modeling_realm"], # FIXME(PG): Not all variables appear to have standard_name _standard_name=data.get("standard_name"), _units=data["units"], _cell_methods=data["cell_methods"], _cell_measures=data["cell_measures"], _long_name=data["long_name"], _comment=data["comment"], _dimensions=tuple(data["dimensions"].split(" ")), _out_name=data["out_name"], _typ=cls._type_strings[data["type"]], _positive=data["positive"], _spatial_shape=data["spatial_shape"], _temporal_shape=data["temporal_shape"], _cmip6_cmor_table=data["cmip6_cmor_table"], _table_name=data["cmip6_cmor_table"], ) return cls(**extracted_data)
[docs] @classmethod def from_all_var_info_json(cls, var_name: str, table_name: str): _all_var_info = files("pycmor.data.cmip7").joinpath("all_var_info.json") all_var_info = json.load(open(_all_var_info, "r")) key = f"{table_name}.{var_name}" data = all_var_info["Compound Name"][key] data["out_name"] = var_name data["cmip6_cmor_table"] = table_name return cls.from_dict(data)
@property def attrs(self) -> dict: raise NotImplementedError("CMI7 attributes are not yet finalized") @property def cell_measures(self) -> str: raise NotImplementedError("CMIP7 does not have cell measures") @property def cell_methods(self) -> str: return self._cell_methods @property def comment(self) -> str: return self._comment @property def dimensions(self) -> tuple[str, ...]: return self._dimensions @property def frequency(self) -> str: return self._frequency @property def global_attrs(self, override_dict: dict = None) -> dict: raise NotImplementedError("CMIP7 global attributes not yet finalized") @property def long_name(self) -> str: # FIXME(PG): I'm not sure about this one return self._standard_name @property def modeling_realm(self) -> str: return self._modeling_realm @property def name(self) -> str: return self._name @property def ok_max_mean_abs(self) -> float: raise NotImplementedError("Not yet figured out") @property def ok_min_mean_abs(self) -> float: raise NotImplementedError("Not yet figured out") @property def out_name(self) -> str: return self._out_name @property def positive(self) -> str: return self._positive @property def standard_name(self) -> str: return self._standard_name @property def table_name(self) -> Optional[str]: if self._table_name is None: raise ValueError("Table name not set") return self._table_name @property def typ(self) -> type: return self._typ @property def units(self) -> str: return self._units @property def valid_max(self) -> float: raise NotImplementedError("Not yet figured out") @property def valid_min(self) -> float: raise NotImplementedError("Not yet figured out")
[docs] def clone(self) -> "CMIP7DataRequestVariable": clone = copy.deepcopy(self) return clone