"""
This module defines the configuration hierarchy for the pycmor application, using
``everett``'s ``~everett.manager.ConfigManager``. The configuration hierarchy is as follows (lowest to highest
priority):
1. Hardcoded defaults
2. User configuration file
3. Run-specific configuration
4. Environment variables
5. Command-line switches
The configuration hierarchy is defined in the ``from_pycmor_cfg`` class method, and
cannot be modified outside the class. You should initialize a ``PycmorConfigManager``
object (probably in your ``CMORizer``) and grab config values from it by calling with the
config key as an argument.
User Configuration File
-----------------------
You can define global configuration options in a user configuration file. The files found at these
locations will be used, in highest to lowest priority order:
1. ``${PYCMOR_CONFIG_FILE}``
2. ``${XDG_CONFIG_HOME}/pycmor.yaml``
3. ``${XDG_CONFIG_HOME}/pycmor/pycmor.yaml``
4. ``~/.pycmor.yaml``
Note that the ``${XDG_CONFIG_HOME}`` environment variable defaults to ``~/.config`` if it is not set.
Configuration Options
---------------------
You can configure the following:
.. autocomponentconfig:: pycmor.core.config.PycmorConfig
:case: upper
:show-table:
:namespace: pycmor
Usage
-----
Here are some examples of how to use the configuration manager::
>>> pycmor_cfg = {}
>>> config = PycmorConfigManager.from_pycmor_cfg(pycmor_cfg)
>>> engine = config("xarray_engine")
>>> print(f"Using xarray backend: {engine}")
Using xarray backend: netcdf4
>>> parallel = config("parallel")
>>> print(f"Running in parallel: {parallel}")
Running in parallel: True
You can define a user file at ``${XDG_CONFIG_DIR}/pycmor/pycmor.yaml``::
>>> import pathlib
>>> import yaml
>>> cfg_file = pathlib.Path("~/.config/pycmor/pycmor.yaml").expanduser()
>>> cfg_file.parent.mkdir(parents=True, exist_ok=True)
>>> cfg_to_dump = {"xarray_engine": "zarr"}
>>> with open(cfg_file, "w") as f:
... yaml.dump(cfg_to_dump, f)
>>> config = PycmorConfigManager.from_pycmor_cfg()
>>> engine = config("xarray_engine")
>>> print(f"Using xarray backend: {engine}")
Using xarray backend: zarr
See Also
--------
- `Everett Documentation <https://everett.readthedocs.io/en/latest/>`_
"""
import os
import pathlib
from importlib.resources import files
from everett import InvalidKeyError
from everett.ext.yamlfile import ConfigYamlEnv
from everett.manager import (
ChoiceOf,
ConfigDictEnv,
ConfigManager,
ConfigOSEnv,
Option,
_get_component_name,
parse_bool,
)
DIMENSIONLESS_MAPPING_TABLE = files("pycmor.data").joinpath(
"dimensionless_mappings.yaml"
)
[docs]
def _parse_bool(value):
if isinstance(value, bool):
return value
return parse_bool(value)
[docs]
class PycmorConfig:
[docs]
class Config:
# [FIXME] Keep the list of all options alphabetical!
dask_cluster = Option(
default="local",
doc="Dask cluster to use. See: https://docs.dask.org/en/stable/deploying.html",
parser=ChoiceOf(
str,
choices=[
"local",
"slurm",
],
),
)
dask_cluster_scaling_fixed_jobs = Option(
default=5,
doc="Number of jobs to create for Jobqueue-backed Dask Cluster",
parser=int,
)
dask_cluster_scaling_maximum_jobs = Option(
default=10,
doc="Maximum number of jobs to create for Jobqueue-backed Dask Clusters (adaptive)",
parser=int,
)
dask_cluster_scaling_minimum_jobs = Option(
default=1,
doc="Minimum number of jobs to create for Jobqueue-backed Dask Clusters (adaptive)",
parser=int,
)
dask_cluster_scaling_mode = Option(
default="adapt",
doc="Flexible dask cluster scaling",
parser=ChoiceOf(
str,
choices=[
"adapt",
"fixed",
],
),
)
dimensionless_mapping_table = Option(
default=DIMENSIONLESS_MAPPING_TABLE,
doc="Where the dimensionless unit mapping table is defined.",
parser=str,
)
enable_dask = Option(
default="yes",
doc="Whether to enable Dask-based processing",
parser=_parse_bool,
)
enable_flox = Option(
default="yes",
doc="Whether to enable flox for group-by operation. See: https://flox.readthedocs.io/en/latest/",
parser=_parse_bool,
)
enable_output_subdirs = Option(
default="no",
doc="Whether to create subdirectories under output_dir when saving data-sets.",
parser=_parse_bool,
)
file_timespan = Option(
default="1YS",
doc="""Default timespan for grouping output files together.
Use the special flag ``'file_native'`` to use the same grouping as in the input
files. Otherwise, use a ``pandas``-flavoured string, see: https://tinyurl.com/38wxf8px
""",
parser=str,
)
parallel = Option(
default="yes",
doc="Whether to run in parallel.",
parser=_parse_bool,
)
parallel_backend = Option(
default="dask",
doc="Which parallel backend to use.",
)
pipeline_workflow_orchestrator = Option(
default="prefect",
doc="Which workflow orchestrator to use for running pipelines",
parser=ChoiceOf(
str,
choices=[
"native",
"prefect",
],
),
)
prefect_task_runner = Option(
default="thread_pool",
doc="Which runner to use for Prefect flows.",
parser=ChoiceOf(
str,
choices=[
"thread_pool",
"dask",
],
),
)
quiet = Option(
default=False,
doc="Whether to suppress output.",
parser=_parse_bool,
)
raise_on_no_rule = Option(
default="no",
doc="Whether or not to raise an error if no rule is found for every single DataRequestVariable",
parser=_parse_bool,
)
warn_on_no_rule = Option(
default="yes",
doc="Whether or not to issue a warning if no rule is found for every single DataRequestVariable",
parser=_parse_bool,
)
xarray_default_missing_value = Option(
default=1.0e30,
doc="Which missing value to use for xarray. Default is 1e30.",
parser=float,
)
xarray_open_mfdataset_engine = Option(
default="netcdf4",
doc="Which engine to use for xarray.open_mfdataset().",
parser=ChoiceOf(
str,
choices=[
"netcdf4",
"h5netcdf",
"zarr",
],
),
)
xarray_open_mfdataset_parallel = Option(
default="yes",
doc=(
"Whether to use parallel processing when opening multiple files "
"with xarray.open_mfdataset(). Default is True."
),
parser=_parse_bool,
)
xarray_skip_unit_attr_from_drv = Option(
default="yes",
doc="Whether to skip setting the unit attribute from the DataRequestVariable, this can be handled via Pint",
parser=_parse_bool,
)
xarray_time_dtype = Option(
default="float64",
doc="The dtype to use for time axis in xarray.",
parser=ChoiceOf(
str,
choices=[
"float64",
"datetime64[ns]",
],
),
)
xarray_time_enable_set_axis = Option(
default="yes",
doc="Whether to enable setting the axis for the time axis in xarray.",
parser=_parse_bool,
)
xarray_time_remove_fill_value_attr = Option(
default="yes",
doc="Whether to remove the fill_value attribute from the time axis in xarray.",
parser=_parse_bool,
)
xarray_time_set_long_name = Option(
default="yes",
doc="Whether to set the long name for the time axis in xarray.",
parser=_parse_bool,
)
xarray_time_set_standard_name = Option(
default="yes",
doc="Whether to set the standard name for the time axis in xarray.",
parser=_parse_bool,
)
xarray_time_taxis_str = Option(
default="T",
doc="Which axis to set for the time axis in xarray.",
parser=str,
)
xarray_time_unlimited = Option(
default="yes",
doc="Whether the time axis is unlimited in xarray.",
parser=_parse_bool,
)
[docs]
class PycmorConfigManager(ConfigManager):
"""
Custom ConfigManager for Pycmor, with a predefined hierarchy and
support for injecting run-specific configuration.
"""
_XDG_CONFIG_HOME = os.environ.get("XDG_CONFIG_HOME", "~/.config")
"""str : The XDG configuration directory."""
_CONFIG_FILES = [
str(f)
for f in [
# Prefer new env var, fall back to legacy
os.environ.get("PYCMOR_CONFIG_FILE") or os.environ.get("PYMOR_CONFIG_FILE"),
# Prefer new locations
pathlib.Path(f"{_XDG_CONFIG_HOME}/pycmor.yaml").expanduser(),
pathlib.Path(f"{_XDG_CONFIG_HOME}/pycmor/pycmor.yaml").expanduser(),
pathlib.Path("~/.pycmor.yaml").expanduser(),
# Legacy fallbacks
pathlib.Path(f"{_XDG_CONFIG_HOME}/pymor.yaml").expanduser(),
pathlib.Path(f"{_XDG_CONFIG_HOME}/pymor/pymor.yaml").expanduser(),
pathlib.Path("~/.pymor.yaml").expanduser(),
]
if f
]
"""List[str] : The list of configuration files to check for user configuration."""
[docs]
@classmethod
def from_pycmor_cfg(cls, run_specific_cfg=None):
"""
Create a PycmorConfigManager with the appropriate hierarchy.
Parameters
----------
run_specific_cfg : dict
Optional. Overrides specific values for this run.
"""
# Configuration higherarchy (highest to lowest priority):
# 5. Command-line switches
# Not implemented here
# 4. Environment variables
env_vars = ConfigOSEnv()
# 3. Run-specific configuration
run_specific = ConfigDictEnv(run_specific_cfg or {})
# 2. User config file
user_file = ConfigYamlEnv(cls._CONFIG_FILES)
# 1. Hardcoded defaults
# Handled by ``manager.with_options`` below
# Combine everything into a new PycmorConfigManager instance
manager = cls(
environments=[user_file, run_specific, env_vars],
)
manager = manager.with_options(PycmorConfig)
return manager
# NOTE(PG): Need to override this method, the original implementation in the parent class
# explicitly uses ConfigManager (not cls) to create the clone instance.
[docs]
def clone(self):
my_clone = PycmorConfigManager(
environments=list(self.envs),
doc=self.doc,
msg_builder=self.msg_builder,
with_override=self.with_override,
)
my_clone.namespace = list(self.namespace)
my_clone.bound_component = self.bound_component
my_clone.bound_component_prefix = []
my_clone.bound_component_options = self.bound_component_options
my_clone.original_manager = self.original_manager
return my_clone
def __repr__(self) -> str:
if self.bound_component:
name = _get_component_name(self.bound_component)
return f"<PycmorConfigManager({name}): namespace:{self.get_namespace()}>"
else:
return f"<PycmorConfigManager: namespace:{self.get_namespace()}>"
[docs]
def get(self, key, default=None, parser=None):
"""
Get a configuration value by key, with a default value.
Parameters
----------
key : str
The configuration key to get.
default : Any
The default value to return if the key is not found.
parser : Callable
Optional. A callable to parse the configuration value.
Returns
-------
Any
The configuration value.
"""
try:
return self(key, parser=parser)
except InvalidKeyError:
return default
# ---------------------------------------------------------------------------
# Backward compatibility aliases (to be removed in a future release)
# ---------------------------------------------------------------------------
PymorConfig = PycmorConfig
PymorConfigManager = PycmorConfigManager
# Legacy constructor compatibility
setattr(
PycmorConfigManager,
"from_pymor_cfg",
classmethod(
lambda cls, run_specific_cfg=None: cls.from_pycmor_cfg(run_specific_cfg)
),
)