Source code for pycmor.std_lib.setgrid

"""
Set grid information on the data file.

xarray does not have a built-in `setgrid` operator unlike `cdo`.  Using
`xarray.merge` directly to merge grid with data may or may not produce the
desired result all the time.

Some guiding rules to set the grid information:

1. At least one dimension size in both data file and grid file should match.
2. If the dimension size match but not the dimension name, then the dimension
   name in data file is renamed to match the dimension name in grid file.
3. The matching dimension size must be one of the coordinate variables in both data
   file and grid file.
4. If all above conditions are met, then the data file is merged with the grid file.
5. The coordinate variables and boundary variables (lat_bnds, lon_bnds) from the grid file
   are kept, while other data variables in grid file are dropped.
6. The result of the merge is always a xarray.Dataset

Note: Rule 5 is not strict and may go away if it is not desired.
"""

from typing import Union

import xarray as xr

from ..core.logging import logger
from ..core.rule import Rule


[docs] def setgrid( da: Union[xr.Dataset, xr.DataArray], rule: Rule ) -> Union[xr.Dataset, xr.DataArray]: """ Appends grid information to data file if necessary coordinate dimensions exits in data file. Renames dimensions in data file to match the dimension names in grid file if necessary. Parameters ---------- da : xr.Dataset or xr.DataArray The input dataarray or dataset. rule: Rule object containing gridfile attribute Returns ------- xr.Dataset The output dataarray or dataset with the grid information. """ logger.info("[SetGrid] Starting grid merge operation") gridfile = rule.get("grid_file") logger.info(f" → Grid File : {gridfile}") if gridfile is None: raise ValueError("Missing grid file. Please set 'grid_file' in the rule.") grid = xr.open_dataset(gridfile) required_dims = set(sum([gc.dims for _, gc in grid.coords.items()], ())) logger.info(f" → Required Dimensions: {sorted(required_dims)}") to_rename = {} can_merge = False for dim in required_dims: dimsize = grid.sizes[dim] if dim in da.sizes: can_merge = True if da.sizes[dim] != dimsize: raise ValueError( f"Mismatch dimension sizes {dim} {dimsize} (grid) {da.sizes[dim]} (data)" ) logger.info(f" → Dimension '{dim}' : ✅ Found (size={dimsize})") else: logger.info( f" → Dimension '{dim}' : ❌ Not found, checking for size matches..." ) for name, _size in da.sizes.items(): if dimsize == _size: can_merge = True to_rename[name] = dim logger.info( f" • Found size match : '{name}' ({_size}) → '{dim}' ({dimsize})" ) logger.info( f" → Merge Status : {'✅ Possible' if can_merge else '❌ Not possible'}" ) if can_merge: if to_rename: logger.info(f" → Renaming Dims : {dict(to_rename)}") da = da.rename(to_rename) # Keep coordinate variables and boundary variables (lat_bnds, lon_bnds) required_vars = list(grid.coords.keys()) # Always include coordinate variables logger.info(f" → Coordinate Vars : {sorted(required_vars)}") # Add boundary variables if they exist boundary_vars = ["lat_bnds", "lon_bnds"] boundary_found = [] for var in boundary_vars: if var in grid.variables: required_vars.append(var) boundary_found.append(var) if boundary_found: logger.info(f" → Boundary Vars : {sorted(boundary_found)}") else: logger.info(" → Boundary Vars : None found") new_grid = grid[required_vars] da = new_grid.merge(da) logger.info(" → Grid Merge : ✅ Completed") else: logger.warning(" → Warning : ❌ No compatible dimensions found!") logger.warning(" Check grid and data dimension compatibility.") logger.info("-" * 50) return da