Source code for pycmor.data_request.collection

import json
import pathlib
from abc import abstractmethod
from enum import Enum
from importlib.resources import files
from typing import Dict

import deprecation

from ..core.factory import MetaFactory
from ..core.utils import download_json_tables_from_url, list_files_in_directory
from .table import CMIP6DataRequestTable, CMIP7DataRequestTable, DataRequestTable
from .variable import CMIP7DataRequestVariable


[docs] class DataRequest(metaclass=MetaFactory):
[docs] @classmethod @abstractmethod def from_tables(cls, tables: Dict[str, DataRequestTable]) -> "DataRequest": """Create a DataRequest from a dictionary of tables.""" raise NotImplementedError
[docs] @classmethod @abstractmethod def from_directory(cls, directory: str) -> "DataRequest": """Create a DataRequest from a directory of tables.""" raise NotImplementedError
[docs] @classmethod @abstractmethod def from_git(cls, url: str, branch: str = "master") -> "DataRequest": """Create a DataRequest from a git repository.""" raise NotImplementedError
[docs] @classmethod @deprecation.deprecated(details="Use from_directory instead.") @abstractmethod def from_tables_dir(cls, directory: str) -> "DataRequest": """Create a DataRequest from a directory of tables.""" raise NotImplementedError
[docs] class CMIP7DataRequest(DataRequest): GIT_URL = "https://github.com/CMIP-Data-Request/CMIP7_DReq_Software/" """str: The URL of the CMIP7 data request repository.""" def __init__( self, tables: Dict[str, DataRequestTable], variables: Dict[str, CMIP7DataRequestVariable] = None, ): self.tables = tables self.variables = variables
[docs] @classmethod def from_json_file(cls, jfile: str) -> "CMIP7DataRequest": """Creates a CMIP7DataRequest instance from a single JSON file""" # At the moment, we assume that this file is the "all_vars_info" file with open(jfile, "r") as f: data = json.load(f) return cls.from_all_var_info(data)
[docs] @classmethod def from_vendored_json(cls): _all_var_info = files("pycmor.data.cmip7").joinpath("all_var_info.json") all_var_info = json.load(open(_all_var_info, "r")) return cls.from_all_var_info(all_var_info)
[docs] @classmethod def from_all_var_info(cls, data): tables = {} variables = {} table_ids = set(k.split(".")[0] for k in data["Compound Name"].keys()) for table_id in table_ids: table = CMIP7DataRequestTable.from_all_var_info(table_id, data) tables[table_id] = table for variable in table.variables: variable.table_header = table.header variables[variable.variable_id] = variable return cls(tables, variables)
[docs] @classmethod def from_tables(cls, tables: Dict[str, DataRequestTable]) -> "CMIP7DataRequest": for table in tables.values(): if not isinstance(table, DataRequestTable): raise ValueError("All tables must be instances of DataRequestTable.") return cls(tables)
[docs] @classmethod def from_directory(cls, directory: str) -> "CMIP7DataRequest": """Creates the CMIP7 data request from a directory""" directory = pathlib.Path(directory) for file in directory.iterdir(): # We assume that the directory contains only 1 JSON file, the "all_vars_info" file if file.is_file() and file.suffix == ".json": return cls.from_json_file(file)
[docs] @classmethod @deprecation.deprecated(details="Use from_directory instead.") def from_tables_dir(cls, directory: str) -> "CMIP7DataRequest": return cls.from_directory(directory)
[docs] class CMIP6DataRequest(DataRequest): GIT_URL = "https://github.com/PCMDI/cmip6-cmor-tables/" """str: The URL of the CMIP6 data request repository.""" _IGNORE_TABLE_FILES = [ "CMIP6_CV_test.json", "CMIP6_coordinate.json", "CMIP6_CV.json", "CMIP6_formula_terms.json", "CMIP6_grids.json", "CMIP6_input_example.json", ] """List[str]: Table files to ignore when reading from a directory.""" def __init__( self, tables: Dict[str, CMIP6DataRequestTable], flattable_variables: bool = True, include_table_headers_in_variables: bool = True, ): """ Create a CMIP6DataRequest instance. Parameters ---------- tables : Dict[str, DataRequestTable] A dictionary of tables. flattable_variables: bool, optional Whether or not to "flatten" tables by key, generating a unique key for each variable. This is composed of the table_id and variable_id. Default is True. include_table_headers_in_variables: bool, optional Whether or not to include the table header in the variable object. Default is False. """ self.tables = tables self.variables = {} for table in tables.values(): for variable in table.variables: if flattable_variables: var_key = f"{table.table_id}.{variable.variable_id}" else: var_key = variable.variable_id if include_table_headers_in_variables: variable.table_header = table.header self.variables[var_key] = variable
[docs] @classmethod def from_tables(cls, tables: Dict[str, DataRequestTable]) -> "CMIP6DataRequest": for table in tables.values(): if not isinstance(table, DataRequestTable): raise ValueError("All tables must be instances of DataRequestTable.") return cls(tables)
[docs] @classmethod def from_directory(cls, directory: str) -> "CMIP6DataRequest": tables = {} directory = pathlib.Path(directory) for file in directory.iterdir(): if file.is_file() and file.suffix == ".json": if file.name in cls._IGNORE_TABLE_FILES: continue table = CMIP6DataRequestTable.from_json_file(file) tables[table.table_id] = table for table in tables.values(): if table in CMIP6IgnoreTableFiles.values(): tables.pop(table) # Remove the table from the dictionary return cls(tables)
[docs] @classmethod def from_git(cls, url: str = None, branch: str = "main") -> "CMIP6DataRequest": if url is None: url = cls.GIT_URL raw_url = f"{url}/{branch}/Tables".replace( "github.com", "raw.githubusercontent.com" ) # Something for parsing the tables at the URL tables = list_files_in_directory(url, "Tables", branch=branch) # Something for downloading dir = download_json_tables_from_url(raw_url, tables) return cls.from_directory(dir)
[docs] @classmethod @deprecation.deprecated(details="Use from_directory instead.") def from_tables_dir(cls, directory: str) -> "CMIP6DataRequest": return cls.from_directory(directory)
[docs] @classmethod def from_variables(cls, variables: Dict[str, Dict[str, str]]) -> "CMIP6DataRequest": tables = {} instance = cls(tables) instance.variables = variables return instance
[docs] class CMIP6IgnoreTableFiles(Enum): """Table files to ignore when reading from a directory.""" CV_TEST = "CMIP6_CV_test.json" COORDINATE = "CMIP6_coordinate.json" CV = "CMIP6_CV.json" FORMULA_TERMS = "CMIP6_formula_terms.json" GRIDS = "CMIP6_grids.json" INPUT_EXAMPLE = "CMIP6_input_example.json"
[docs] @classmethod def values(cls): return [item.value for item in cls]