Source code for pycmor.core.controlled_vocabularies

"""
Controlled vocabularies for CMIP6
"""

import glob
import json
import os
import re
from pathlib import Path

import requests

from .factory import MetaFactory


[docs] class ControlledVocabularies(dict, metaclass=MetaFactory):
[docs] @classmethod def from_directory(cls, directory: str) -> "ControlledVocabularies": """Create ControlledVocabularies from a directory of CV files""" raise NotImplementedError
[docs] @classmethod def load_from_git(cls, tag: str) -> "ControlledVocabularies": """Load the ControlledVocabularies from the git repository""" raise NotImplementedError
[docs] @classmethod def load(cls, table_dir: str) -> "ControlledVocabularies": """Load the ControlledVocabularies using the default method""" raise NotImplementedError
[docs] class CMIP6ControlledVocabularies(ControlledVocabularies): """Controlled vocabularies for CMIP6""" def __init__(self, json_files): """Create a new ControlledVocabularies object from a list of json files Parameters ---------- json_files : list List of json files to load Returns ------- ControlledVocabularies A new ControlledVocabularies object, behaves like a dictionary. """ super().__init__() for f in json_files: d = self.dict_from_json_file(f) self.update(d)
[docs] @classmethod def load(cls, table_dir=None): """Load the controlled vocabularies from the CMIP6_CVs directory""" cv_dir = Path(table_dir) return cls.from_directory(cv_dir)
[docs] @classmethod def from_directory(cls, directory): """Create a new ControlledVocabularies object from a directory of json files Parameters ---------- directory : str Path to the directory containing the json files """ json_files = glob.glob(os.path.join(directory, "*.json")) return cls(json_files)
[docs] def print_experiment_ids(self): """Print experiment ids with start and end years and parent experiment ids""" for k, v in self["experiment_id"].items(): print( f"{k} {v['start_year']}-{v['end_year']} parent:{', '.join(v['parent_experiment_id'])}" )
[docs] @staticmethod def dict_from_json_file(path): """Load a json file into a dictionary object Parameters ---------- path : str Path to the json file to load Raises ------ ValueError If the file cannot be loaded """ try: with open(path, "r") as file: return json.load(file) except json.JSONDecodeError as e: raise ValueError(f"file {path}: {e.msg}")
[docs] @classmethod def load_from_git(cls, tag: str = "6.2.58.64"): """Load the controlled vocabularies from the git repository Parameters ---------- tag : str The git tag to use. Default is 6.2.58.64 If tag is None, the main branch is used. Returns ------- ControlledVocabularies A new ControlledVocabularies object, behaves like a dictionary. """ if tag is None: tag = "refs/heads/main" else: tag = "refs/tags/" + tag url = f"https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/{tag}" filenames = ( "CMIP6_DRS.json", "CMIP6_activity_id.json", "CMIP6_experiment_id.json", "CMIP6_frequency.json", "CMIP6_grid_label.json", "CMIP6_institution_id.json", "CMIP6_license.json", "CMIP6_nominal_resolution.json", "CMIP6_realm.json", "CMIP6_required_global_attributes.json", "CMIP6_source_id.json", "CMIP6_source_type.json", "CMIP6_sub_experiment_id.json", "CMIP6_table_id.json", "mip_era.json", ) name_pattern = re.compile(r"^(?:CMIP6_)?(?P<name>[^\.]+)\.json$").match data = {} for fname in filenames: name = name_pattern(fname).groupdict().get("name") fpath = "/".join([url, fname]) r = requests.get(fpath) r.raise_for_status() content = r.content.decode() content = json.loads(content) data[name] = content.get(name) obj = cls([]) obj.update(data) return obj
[docs] class CMIP7ControlledVocabularies(ControlledVocabularies): pass