Source code for pycmor.core.validate

"""
Provides validation of user configuration files by checking against a schema.
"""

import glob
import importlib
import pathlib

from cerberus import Validator


[docs] class DirectoryAwareValidator(Validator): """ A Validator that can check if a field is a directory. """
[docs] def _validate_is_directory(self, is_directory, field, value): """ Checks if a string can be a pathlib.Path object. The rule's arguments are validated against this schema: {'type': 'boolean'} """ if is_directory: try: if glob.has_magic(value): self._error(field, "Must not contain glob characters") except TypeError as e: self._error(field, f"{e.args[0]}. Must be a string") else: try: pathlib.Path(value).expanduser().resolve() except TypeError as e: self._error(field, f"{e.args[0]}. Must be a string")
[docs] class GeneralSectionValidator(DirectoryAwareValidator): """A Validator for the general section of the configuration file"""
[docs] class PipelineSectionValidator(Validator): """ Validator for pipeline configuration. See Also -------- * https://cerberus-sanhe.readthedocs.io/customize.html#class-based-custom-validators """
[docs] def _validate_is_qualname_or_script(self, is_qualname, field, value): """Test if a string is a Python qualname. The rule's arguments are validated against this schema: {'type': 'boolean'} """ if is_qualname and not isinstance(value, str): self._error(field, "Must be a string") if is_qualname: if value.startswith("script://"): script_path = value.replace("script://", "") script_path = script_path.rsplit(":", 1)[0] try: pathlib.Path(script_path).expanduser().resolve() except TypeError as e: self._error(field, f"{e.args[0]}. Must be a string") if not pathlib.Path(script_path).expanduser().resolve().is_file(): self._error(field, "Must be a valid file path") else: parts = value.split(".") module_name, attr_name = ".".join(parts[:-1]), parts[-1] try: module = importlib.import_module(module_name) if not hasattr(module, attr_name): self._error(field, "Must be a valid Python qualname") except (ImportError, ModuleNotFoundError): self._error(field, "Must be a valid Python qualname")
[docs] def _validate(self, document): super()._validate(document) if "steps" not in document and "uses" not in document: self._error( "document", 'At least one of "steps" or "uses" must be specified' )
[docs] class RuleSectionValidator(DirectoryAwareValidator): """Validator for rules configuration."""
GENERAL_SCHEMA = { "general": { "type": "dict", "allow_unknown": True, "schema": { "cmor_version": { "type": "string", "required": True, "allowed": [ "CMIP6", "CMIP7", ], }, "CV_Dir": { "type": "string", "required": True, "is_directory": True, }, "CMIP_Tables_Dir": { "type": "string", "required": True, "is_directory": True, }, }, }, } """dict : Schema for validating general configuration.""" GENERAL_VALIDATOR = GeneralSectionValidator(GENERAL_SCHEMA) """Validator : Validator for general configuration.""" PIPELINES_SCHEMA = { "pipelines": { "type": "list", "schema": { "type": "dict", "schema": { "name": {"type": "string", "required": False}, "uses": {"type": "string", "excludes": "steps"}, "steps": { "type": "list", "excludes": "uses", "schema": {"type": "string", "is_qualname_or_script": True}, }, }, }, }, } """dict : Schema for validating pipelines configuration.""" PIPELINES_VALIDATOR = PipelineSectionValidator(PIPELINES_SCHEMA) """Validator : Validator for pipelines configuration.""" RULES_SCHEMA = { "rules": { "type": "list", "schema": { "type": "dict", "allow_unknown": True, "schema": { "name": {"type": "string", "required": False}, "cmor_variable": {"type": "string", "required": True}, "model_variable": {"type": "string", "required": False}, "input_type": { "type": "string", "required": False, "allowed": [ "xr.DataArray", "xr.Dataset", ], }, "input_source": { "type": "string", "required": False, "allowed": [ "xr_tutorial", ], }, "inputs": { "type": "list", "schema": { "type": "dict", # Each item in the list must be a dictionary "schema": { # Define the required keys in the dictionary "path": {"type": "string", "required": True}, "pattern": {"type": "string", "required": True}, # Add more keys and their types as needed }, }, "required": True, }, "enabled": {"type": "boolean", "required": False}, "description": {"type": "string", "required": False}, "pipelines": { "type": "list", # FIXME(PG): Should cross-check with pipelines. "schema": {"type": "string"}, }, "cmor_unit": {"type": "string", "required": False}, "model_unit": {"type": "string", "required": False}, "file_timespan": {"type": "string", "required": False}, "variant_label": { "type": "string", "required": True, "regex": r"^r\d+i\d+p\d+f\d+$", }, "source_id": {"type": "string", "required": True}, "output_directory": { "type": "string", "required": True, "is_directory": True, }, "instition_id": {"type": "string", "required": False}, "experiment_id": {"type": "string", "required": True}, "adjust_timestamp": {"type": "string", "required": False}, "further_info_url": {"type": "string", "required": False}, # "model_component" examples: # aerosol, atmos, land, landIce, ocnBgchem, ocean, seaIce "model_component": {"type": "string", "required": True}, "grid_label": {"type": "string", "required": True}, "array_order": {"type": "list", "required": False}, "time_units": { "type": "string", "required": False, "regex": ( r"^\s*(days|hours|minutes|seconds|milliseconds|microseconds|nanoseconds)" r"\s+since\s+\d{4}-\d{2}-\d{2}(\s+\d{2}:\d{2}:\d{2}(.\d+)?)?\s*$" ), }, "time_calendar": { "type": "string", "required": False, "allowed": [ "standard", "gregorian", "proleptic_gregorian", "noleap", "365_day", "all_leap", "366_day", "360_day", "julian", "none", ], }, }, }, }, } """dict : Schema for validating rules configuration.""" RULES_VALIDATOR = RuleSectionValidator(RULES_SCHEMA)