Source code for pycmor.core.infer_freq

import warnings
from collections import namedtuple

import numpy as np
import pandas as pd
import xarray as xr
from xarray.core.extensions import (
    register_dataarray_accessor,
    register_dataset_accessor,
)

from .logging import logger
from .time_utils import get_time_label

# Result object for frequency inference with metadata
FrequencyResult = namedtuple(
    "FrequencyResult",
    [
        "frequency",  # str or None: inferred frequency string (e.g., 'M', '2D')
        "delta_days",  # float or None: median time delta in days
        "step",  # int or None: step multiplier for the frequency
        "is_exact",  # bool: whether the time series has exact regular spacing
        "status",  # str: status message ('valid', 'irregular', 'no_match', etc.)
    ],
)


[docs] def _convert_cftime_to_ordinals(times_values): """Convert cftime objects to ordinal values.""" ref_date = times_values[0] ordinals = np.array( [ (t - ref_date).days + (t.hour / 24 + t.minute / 1440 + t.second / 86400) for t in times_values ] ) # Adjust to make ordinals absolute (add reference ordinal) try: ref_ordinal = ref_date.toordinal() ordinals = ordinals + ref_ordinal except (AttributeError, ValueError): # If toordinal fails, use a simpler approach ordinals = np.array( [ t.year * 365.25 + t.month * 30.4375 + t.day + t.hour / 24 + t.minute / 1440 + t.second / 86400 for t in times_values ] ) return ordinals
[docs] def _convert_standard_datetime_to_ordinals(times_values): """Convert standard datetime objects to ordinal values.""" return np.array( [ t.toordinal() + t.hour / 24 + t.minute / 1440 + t.second / 86400 for t in times_values ] )
[docs] def _convert_numeric_timestamps_to_ordinals(times_values): """Convert numeric timestamps (e.g., numpy.datetime64) to ordinal values.""" return np.array([pd.Timestamp(t).to_julian_date() for t in times_values])
[docs] def _convert_times_to_ordinals(times_values): """ Convert various datetime types to ordinal values for frequency analysis. This function handles three main datetime types: 1. cftime objects (with calendar attribute) 2. Standard datetime objects (with toordinal method) 3. Numeric timestamps (numpy.datetime64, etc.) Parameters ---------- times_values : array-like Array of datetime-like objects Returns ------- np.ndarray Array of ordinal values representing the datetime objects """ if hasattr(times_values[0], "toordinal"): if hasattr(times_values[0], "calendar"): # cftime objects - convert to days since a reference date return _convert_cftime_to_ordinals(times_values) else: # Standard datetime objects return _convert_standard_datetime_to_ordinals(times_values) else: # Assume numeric timestamps (e.g., numpy.datetime64) return _convert_numeric_timestamps_to_ordinals(times_values)
# Core frequency inference
[docs] def _infer_frequency_core( times, tol=0.05, return_metadata=False, strict=False, calendar="standard", log=False ): """ Infer time frequency from datetime-like array, returning pandas-style frequency strings. Parameters ---------- times : array-like List of datetime-like objects (cftime or datetime64). tol : float, optional Tolerance for delta comparisons (in days). Defaults to 0.05. return_metadata : bool, optional If True, returns (frequency, median_delta, step, is_exact, status) instead of just the frequency string. Defaults to False. strict : bool, optional If True, performs additional checks for irregular time series and returns a status message. Defaults to False. calendar : str, optional Calendar type to use for cftime objects. Defaults to "standard". log : bool, optional If True, logs the results of the frequency check. Defaults to False. Returns ------- str or FrequencyResult Inferred frequency string (e.g., 'M') or (freq, delta, step, is_exact, status) if return_metadata=True. """ if len(times) < 2: if log: log_frequency_check( "Time Series", None, None, None, False, "too_short", strict ) return ( FrequencyResult(None, None, None, False, "too_short") if return_metadata else None ) # Handle both pandas-like objects (with .values) and plain lists/arrays try: times_values = times.values if hasattr(times, "values") else times ordinals = _convert_times_to_ordinals(times_values) except (AttributeError, TypeError, ValueError) as e: error_status = f"invalid_input: {str(e)}" if log: log_frequency_check( "Time Series", None, None, None, False, error_status, strict ) if return_metadata: return FrequencyResult(None, None, None, False, error_status) return None deltas = np.diff(ordinals) median_delta = np.median(deltas) std_delta = np.std(deltas) days_in_calendar_year = { "standard": 365.25, "gregorian": 365.25, "noleap": 365.0, "360_day": 360.0, }.get(calendar, 365.25) base_freqs = { "H": 1 / 24, "D": 1, "W": 7, "M": days_in_calendar_year / 12, "Q": days_in_calendar_year / 4, "A": days_in_calendar_year, "10A": days_in_calendar_year * 10, } matched_freq = None matched_step = None for freq, base_days in base_freqs.items(): for step in range(1, 13): test_delta = base_days * step if abs(median_delta - test_delta) <= tol * test_delta: matched_freq = freq matched_step = step break if matched_freq: break if matched_freq is None: # For irregular time series, try to find the closest match with relaxed tolerance relaxed_tol = 0.5 # Much more relaxed tolerance for irregular data for freq, base_days in base_freqs.items(): for step in range(1, 13): test_delta = base_days * step if abs(median_delta - test_delta) <= relaxed_tol * test_delta: matched_freq = freq matched_step = step break if matched_freq: break if matched_freq is None: if log: log_frequency_check( "Time Series", None, median_delta, None, False, "no_match", strict ) return ( FrequencyResult(None, median_delta, None, False, "no_match") if return_metadata else None ) is_exact = std_delta < tol * (base_freqs[matched_freq] * matched_step) status = "valid" if is_exact else "irregular" if strict: expected_steps = (ordinals[-1] - ordinals[0]) / ( base_freqs[matched_freq] * matched_step ) actual_steps = len(times) - 1 if not np.all(np.abs(deltas - median_delta) <= tol * median_delta): status = "irregular" is_exact = False # Fix: Update is_exact to be consistent if abs(expected_steps - actual_steps) >= 1: status = "missing_steps" is_exact = False # Fix: Update is_exact to be consistent freq_str = f"{matched_step}{matched_freq}" if matched_step > 1 else matched_freq # Log the results if requested if log: log_frequency_check( "Time Series", freq_str, median_delta, matched_step, is_exact, status, strict, ) return ( FrequencyResult(freq_str, median_delta, matched_step, is_exact, status) if return_metadata else freq_str )
# xarray fallback
[docs] def infer_frequency( times, return_metadata=False, strict=False, calendar="standard", log=False ): """ Infer time frequency from datetime-like array, returning pandas-style frequency strings. Parameters ---------- times : array-like List of datetime-like objects (cftime or datetime64). return_metadata : bool, optional If True, returns (frequency, median_delta, step, is_exact, status) instead of just the frequency string. Defaults to False. strict : bool, optional If True, performs additional checks for irregular time series and returns a status message. Defaults to False. calendar : str, optional Calendar type to use for cftime objects. Defaults to "standard". log : bool, optional If True, logs the results of the frequency check. Defaults to False. Returns ------- str or FrequencyResult Inferred frequency string (e.g., 'M') or (freq, delta, step, is_exact, status) if return_metadata=True. """ # Extract values from xarray objects if needed if hasattr(times, "values"): times_values = times.values else: times_values = times try: freq = xr.infer_freq(times_values) if freq is not None: if log: log_frequency_check("Time Series", freq, None, 1, True, "valid", strict) return ( FrequencyResult(freq, None, 1, True, "valid") if return_metadata else freq ) except Exception: pass return _infer_frequency_core( times_values, return_metadata=return_metadata, strict=strict, calendar=calendar, log=log, )
# Logger
[docs] def log_frequency_check(name, freq, delta, step, exact, status, strict=False): """ Log the results of the frequency check. """ logger.info(f"[Freq Check] {name}") logger.info(f" → Inferred Frequency : {freq or 'None'}") logger.info(f" → Step Multiple : {step or 'None'}") # Handle None delta values safely if delta is not None: logger.info(f" → Median Δ (days) : {delta:.2f}") else: logger.info(" → Median Δ (days) : None") logger.info(f" → Regular Spacing : {'✅' if exact else '❌'}") logger.info(f" → Strict Mode : {'✅' if strict else '❌'}") logger.info(f" → Status : {status}") logger.info("-" * 40)
[docs] def approx_interval_to_frequency_str(approx_interval, tolerance=0.1): """ Convert an approximate interval in days to a pandas-style frequency string. This function uses algorithmic logic to determine the most appropriate frequency string based on common time patterns, rather than hardcoded mappings. It handles sub-daily, daily, weekly, monthly, and yearly frequencies intelligently. Parameters ---------- approx_interval : float Approximate interval in days tolerance : float, optional Relative tolerance for matching standard frequencies, by default 0.1 (10%) Returns ------- str or None Pandas-style frequency string (e.g., 'D', 'M', '3M', 'Y') or None for time-invariant data (0.0 days) Examples -------- >>> approx_interval_to_frequency_str(1.0) # Daily 'D' >>> approx_interval_to_frequency_str(30.0) # Monthly 'M' >>> approx_interval_to_frequency_str(91.3) # 3-Monthly (approx) '3M' >>> approx_interval_to_frequency_str(365.0) # Yearly 'Y' >>> approx_interval_to_frequency_str(0.041667) # Hourly 'H' """ # Handle special case: time-invariant/fixed data if approx_interval == 0.0: return None # Define standard reference intervals for common frequencies MINUTES_PER_DAY = 24 * 60 HOURS_PER_DAY = 24 DAYS_PER_WEEK = 7 DAYS_PER_MONTH = 30.0 # CMIP6 standard DAYS_PER_YEAR = 365.0 # CMIP6 standard def is_close(value, target, tolerance): """Check if value is within relative tolerance of target.""" if target == 0: return value == 0 return abs(value - target) / target <= tolerance # 1. Sub-daily frequencies (< 1 day) if approx_interval < 1.0: # Convert to hours hours = approx_interval * HOURS_PER_DAY # Check for common hourly frequencies for h in [1, 2, 3, 4, 6, 8, 12]: if is_close(hours, h, tolerance): return f"{h}H" if h > 1 else "H" # Check for sub-hourly (minutes) minutes = approx_interval * MINUTES_PER_DAY # Common sub-hourly intervals for m in [15, 20, 25, 30, 45]: if is_close(minutes, m, tolerance): return f"{m}T" # Fall back to rounded hours or minutes if hours >= 1: return f"{int(round(hours))}H" else: return f"{int(round(minutes))}T" # 2. Daily frequencies (1-6 days) elif approx_interval < DAYS_PER_WEEK: days = round(approx_interval) return "D" if days == 1 else f"{days}D" # 3. Weekly frequencies (7-27 days) elif approx_interval < DAYS_PER_MONTH - 3: # ~27 days weeks = approx_interval / DAYS_PER_WEEK # Check for exact weekly matches for w in [1, 2]: if is_close(weeks, w, tolerance): return "W" if w == 1 else f"{w}W" # Fall back to days days = int(round(approx_interval)) return f"{days}D" # 4. Monthly frequencies (28-400 days) elif approx_interval < 400: # First check if it's close to a year (prioritize yearly over 12M) years = approx_interval / DAYS_PER_YEAR if is_close(years, 1, tolerance): return "Y" months = approx_interval / DAYS_PER_MONTH # Check for common monthly frequencies (excluding 12 since we handle yearly above) for m in [1, 2, 3, 4, 5, 6, 9]: if is_close(months, m, tolerance): return "M" if m == 1 else f"{m}M" # Fall back to rounded months months_rounded = int(round(months)) if months_rounded >= 1: return "M" if months_rounded == 1 else f"{months_rounded}M" else: # Very close to monthly but not quite - use days days = int(round(approx_interval)) return f"{days}D" # 5. Yearly and longer frequencies (> 400 days) else: years = approx_interval / DAYS_PER_YEAR # Check for common yearly frequencies for y in [1, 2, 5, 10, 20, 50, 100]: if is_close(years, y, tolerance): return "Y" if y == 1 else f"{y}Y" # Fall back to rounded years years_rounded = int(round(years)) if years_rounded >= 1: return "Y" if years_rounded == 1 else f"{years_rounded}Y" else: # Less than a year but more than 400 days - use days days = int(round(approx_interval)) return f"{days}D"
# Compare with CMIP6 approx_interval
[docs] def is_resolution_fine_enough( times, target_approx_interval, calendar="standard", strict=True, tolerance=0.01, log=True, ): """ Determines if the temporal resolution of a time series is sufficient for resampling. Parameters ---------- times : list or array-like Array of datetime-like objects representing the time series. target_approx_interval : float Expected interval in days for the target frequency. calendar : str, optional Calendar type to use for cftime objects, by default "standard". strict : bool, optional If True, performs additional checks for irregular time series and includes status messages. Defaults to True. tolerance : float, optional Tolerance for comparing time intervals. Defaults to 0.01. log : bool, optional If True, logs the results of the frequency check. Defaults to True. Returns ------- dict Contains the inferred interval, comparison status, validity for resampling, and status message. Notes ----- The function infers the frequency using `infer_frequency` and compares it against the target interval, considering the specified tolerance. The result includes a status indicating whether the time series is suitable for resampling. """ result = infer_frequency( times, return_metadata=True, strict=strict, calendar=calendar, log=False ) if result is None: if log: print("[Temporal Resolution Check]") print(" → Error: Could not infer frequency from time data") print("-" * 40) return { "inferred_interval": None, "comparison_status": "unknown", "is_valid_for_resampling": False, } freq = result.frequency delta = result.delta_days exact = result.is_exact status = result.status if delta is None: if log: print("[Temporal Resolution Check]") print(f" → Inferred Frequency : {freq or 'unknown'}") print(f" → Status : {status}") print(" → Valid for Resampling : ❌ (could not determine time delta)") print("-" * 40) return { "inferred_interval": None, "comparison_status": status, "is_valid_for_resampling": False, } comparison_status = status if not exact or status in ("irregular", "missing_steps"): is_valid = False elif delta < target_approx_interval - tolerance: comparison_status = "finer" is_valid = True elif abs(delta - target_approx_interval) <= tolerance: comparison_status = "equal" is_valid = True else: comparison_status = "coarser" is_valid = False if log: target_freq_str = approx_interval_to_frequency_str(target_approx_interval) target_display = f"{target_approx_interval:.4f} days" if target_freq_str: target_display += f" (~{target_freq_str})" print("[Temporal Resolution Check]") print( f" → Inferred Frequency : {freq or 'unknown'} (Δ ≈ {delta:.4f} days)" ) print(f" → Target Approx Interval : {target_display}") print(f" → Comparison Status : {comparison_status}") print(f" → Valid for Resampling : {'✅' if is_valid else '❌'}") if status not in (None, "valid"): print(f" → Status Message : {status}") print("-" * 40) return { "inferred_interval": delta, "comparison_status": comparison_status, "is_valid_for_resampling": is_valid, "status": status, }
# xarray accessor is named "timefreq" at the moment instead of "pymor" as # project name is not yet finalized.
[docs] @register_dataarray_accessor("timefreq") class TimeFrequencyAccessor: def __init__(self, xarray_obj): self._obj = xarray_obj
[docs] def infer_frequency( self, strict=False, calendar="standard", log=True, time_dim=None, return_metadata=True, ): """ Infer time frequency from datetime-like array, returning pandas-style frequency strings. Parameters ---------- strict : bool, optional If True, performs additional checks for irregular time series and returns a status message. Defaults to False. calendar : str, optional Calendar type to use for cftime objects. Defaults to "standard". log : bool, optional If True, logs the results of the frequency check. Defaults to False. time_dim : str, optional Name of the time dimension in the DataArray. If None, automatically detects the time dimension using `get_time_label`. Defaults to None. return_metadata : bool, optional If True, returns (freq, delta, step, is_exact, status) instead of just the frequency string. Defaults to True. Returns ------- str or FrequencyResult Inferred frequency string (e.g., 'M') or (freq, delta, step, is_exact, status) if return_metadata=True. """ # Auto-detect time dimension if not provided if time_dim is None: time_dim = get_time_label(self._obj) if time_dim is None: raise ValueError( "No datetime coordinate found in DataArray." " Please specify time_dim manually." ) # Check if this is a DataArray with time coordinates or a time coordinate itself if hasattr(self._obj, "dims") and time_dim in self._obj.dims: # This is a DataArray with a time dimension - get the time coordinate times = self._obj.coords[time_dim].values else: # This is likely a time coordinate DataArray itself times = self._obj.values result = infer_frequency( times, return_metadata=True, strict=strict, calendar=calendar, log=False, ) if log: log_frequency_check( self._obj.name or "Unnamed Time Axis", result.frequency, result.delta_days, result.step, result.is_exact, result.status, strict, ) if return_metadata: return result else: return result.frequency
[docs] def check_resolution( self, target_approx_interval, calendar="standard", strict=True, tolerance=0.01, log=True, time_dim=None, ): """ Check if the time resolution is fine enough for resampling. Parameters ---------- target_approx_interval : float Expected interval in days for the target frequency calendar : str, optional Calendar type, by default "standard" strict : bool, optional If True, performs additional checks for irregular time series and returns a status message. Defaults to True. tolerance : float, optional Tolerance for time interval comparison, by default 0.01 log : bool, optional If True, logs the results of the frequency check. Defaults to True. time_dim : str, optional Name of the time dimension. If None, automatically detects the time dimension using get_time_label. Defaults to None. Returns ------- dict Dictionary containing the inferred interval, comparison status, and validity for resampling. """ # Auto-detect time dimension if not provided if time_dim is None: time_dim = get_time_label(self._obj) if time_dim is None: raise ValueError( "No datetime coordinate found in DataArray." " Please specify time_dim manually." ) # Check if this is a DataArray with time coordinates or a time coordinate itself if hasattr(self._obj, "dims") and time_dim in self._obj.dims: # This is a DataArray with a time dimension - get the time coordinate times = self._obj.coords[time_dim].values else: # This is likely a time coordinate DataArray itself times = self._obj.values return is_resolution_fine_enough( times, target_approx_interval, calendar, strict, tolerance, log )
[docs] def resample_safe( self, target_approx_interval=None, freq_str=None, calendar="standard", method="mean", time_dim=None, tolerance=0.01, **resample_kwargs, ): """Safely resample time series data after checking temporal resolution. Users can specify the target frequency in two ways: 1. Provide target_approx_interval (float in days) - will be converted to freq_str 2. Provide freq_str (pandas frequency string) - used directly for resampling If both are provided, freq_str takes precedence for resampling, and target_approx_interval is used for validation. Parameters ---------- target_approx_interval : float, optional Expected interval in days for the target frequency. If provided without freq_str, this will be converted to an appropriate frequency string. If provided with freq_str, this is used for validation only. freq_str : str, optional Target frequency string (e.g., 'M' for monthly, '3H' for 3-hourly). If provided, this takes precedence for resampling operations. calendar : str, optional Calendar type, by default "standard" method : str or dict, optional Resampling method, by default "mean" time_dim : str, optional Name of the time dimension. If None, automatically detects the time dimension using get_time_label. Defaults to None. tolerance : float, optional Tolerance for time interval comparison, by default 0.01 **resample_kwargs Additional arguments passed to xarray's resample Returns ------- xarray.DataArray Resampled data Raises ------ ValueError If neither target_approx_interval nor freq_str is provided, or if the time resolution is too coarse for the target frequency Examples -------- # Using approximate interval (will be converted to frequency string) data.timefreq.resample_safe(target_approx_interval=30.0) # ~monthly # Using frequency string directly data.timefreq.resample_safe(freq_str='3M') # 3-monthly # Using both (freq_str used for resampling, target_approx_interval for validation) data.timefreq.resample_safe(target_approx_interval=90.0, freq_str='3M') """ warnings.warn("resample_safe is incomplete, use resample instead", stacklevel=1) # Validate input arguments if target_approx_interval is None and freq_str is None: raise ValueError( "Either target_approx_interval or freq_str must be provided" ) # Determine the frequency string to use for resampling if freq_str is not None: # Validate the provided frequency string try: # Test if pandas can understand the frequency string pd.Timedelta(freq_str) except (ValueError, TypeError): # Try with a simple date range to validate frequency string try: pd.date_range("2000-01-01", periods=2, freq=freq_str) except (ValueError, TypeError) as e: raise ValueError(f"Invalid frequency string '{freq_str}': {e}") resampling_freq = freq_str else: # Convert target_approx_interval to frequency string resampling_freq = approx_interval_to_frequency_str(target_approx_interval) if resampling_freq is None: raise ValueError( f"Cannot convert target_approx_interval={target_approx_interval} " "to a valid frequency string (possibly time-invariant data)" ) # Auto-detect time dimension if not provided if time_dim is None: time_dim = get_time_label(self._obj) if time_dim is None: raise ValueError( "No datetime coordinate found in DataArray." " Please specify time_dim manually." ) # Perform resolution check if target_approx_interval is provided if target_approx_interval is not None: check = self.check_resolution( target_approx_interval=target_approx_interval, calendar=calendar, strict=True, tolerance=tolerance, log=True, ) if not check["is_valid_for_resampling"]: # For test compatibility, use the expected error message format raise ValueError("time resolution too coarse") # If we get here, it's safe to resample resampled = self._obj.resample({time_dim: resampling_freq}, **resample_kwargs) # Apply the specified method (mean, sum, etc.) if isinstance(method, str): resampled = getattr(resampled, method)() elif isinstance(method, dict): resampled = resampled.agg(method) else: raise ValueError( f"Unsupported method type: {type(method)}. Expected str or dict." ) return resampled
[docs] @register_dataset_accessor("timefreq") class DatasetFrequencyAccessor: def __init__(self, ds): self._ds = ds
[docs] def infer_frequency(self, time_dim=None, **kwargs): """ Infer time frequency from datetime-like array, returning pandas-style frequency strings. Parameters ---------- time_dim : str, optional Name of the time dimension in the Dataset. If None, automatically detects the time dimension using get_time_label. Defaults to None. **kwargs Additional arguments passed to infer_frequency. Returns ------- str or FrequencyResult Inferred frequency string (e.g., 'M') or (freq, delta, step, is_exact, status) if return_metadata=True. """ # Auto-detect time dimension if not provided if time_dim is None: time_dim = get_time_label(self._ds) if time_dim is None: raise ValueError( "No datetime coordinate found in Dataset." " Please specify time_dim manually." ) if time_dim not in self._ds: raise ValueError(f"Time dimension '{time_dim}' not found.") return self._ds[time_dim].timefreq.infer_frequency(time_dim=time_dim, **kwargs)
[docs] def resample_safe( self, target_approx_interval=None, freq_str=None, time_dim=None, calendar="standard", method="mean", tolerance=0.01, **resample_kwargs, ): """Safely resample dataset time series data after checking temporal resolution. Users can specify the target frequency in two ways: 1. Provide target_approx_interval (float in days) - will be converted to freq_str 2. Provide freq_str (pandas frequency string) - used directly for resampling If both are provided, freq_str takes precedence for resampling, and target_approx_interval is used for validation. Parameters ---------- target_approx_interval : float, optional Expected interval in days for the target frequency. If provided without freq_str, this will be converted to an appropriate frequency string. If provided with freq_str, this is used for validation only. freq_str : str, optional Target frequency string (e.g., 'M' for monthly, '3H' for 3-hourly). If provided, this takes precedence for resampling operations. time_dim : str, optional Name of the time dimension. If None, automatically detects the time dimension using get_time_label. Defaults to None. calendar : str, optional Calendar type, by default "standard" method : str or dict, optional Resampling method, by default "mean" tolerance : float, optional Tolerance for time interval comparison, by default 0.01 **resample_kwargs Additional arguments passed to xarray's resample Returns ------- xarray.Dataset Resampled dataset Raises ------ ValueError If neither target_approx_interval nor freq_str is provided, or if the time resolution is too coarse for the target frequency Examples -------- # Using approximate interval (will be converted to frequency string) dataset.timefreq.resample_safe(target_approx_interval=30.0) # ~monthly # Using frequency string directly dataset.timefreq.resample_safe(freq_str='3M') # 3-monthly # Using both (freq_str used for resampling, target_approx_interval for validation) dataset.timefreq.resample_safe(target_approx_interval=90.0, freq_str='3M') """ # Validate input arguments if target_approx_interval is None and freq_str is None: raise ValueError( "Either target_approx_interval or freq_str must be provided" ) # Determine the frequency string to use for resampling if freq_str is not None: # Validate the provided frequency string try: # Test if pandas can understand the frequency string pd.Timedelta(freq_str) except (ValueError, TypeError): # Try with a simple date range to validate frequency string try: pd.date_range("2000-01-01", periods=2, freq=freq_str) except (ValueError, TypeError) as e: raise ValueError(f"Invalid frequency string '{freq_str}': {e}") resampling_freq = freq_str else: # Convert target_approx_interval to frequency string resampling_freq = approx_interval_to_frequency_str(target_approx_interval) if resampling_freq is None: raise ValueError( f"Cannot convert target_approx_interval={target_approx_interval} " "to a valid frequency string (possibly time-invariant data)" ) # Auto-detect time dimension if not provided if time_dim is None: time_dim = get_time_label(self._ds) if time_dim is None: raise ValueError( "No datetime coordinate found in Dataset." " Please specify time_dim manually." ) if time_dim not in self._ds: raise ValueError(f"Time dimension '{time_dim}' not found in dataset.") # Perform resolution check if target_approx_interval is provided if target_approx_interval is not None: check = self._ds[time_dim].timefreq.check_resolution( target_approx_interval=target_approx_interval, calendar=calendar, strict=True, tolerance=tolerance, log=True, ) if not check["is_valid_for_resampling"]: # For test compatibility, use the expected error message format raise ValueError("time resolution too coarse") # If we get here, it's safe to resample the entire dataset resampled = self._ds.resample({time_dim: resampling_freq}, **resample_kwargs) # Apply the specified method (mean, sum, etc.) if isinstance(method, str): resampled_ds = getattr(resampled, method)() elif isinstance(method, dict): resampled_ds = resampled.agg(method) else: raise ValueError( f"Unsupported method type: {type(method)}. Expected str or dict." ) return resampled_ds
[docs] def check_resolution(self, target_approx_interval, time_dim=None, **kwargs): """ Check if the time resolution is fine enough for resampling. Parameters ---------- target_approx_interval : float Expected interval in days for the target frequency time_dim : str, optional Name of the time dimension. If None, automatically detects the time dimension using get_time_label. Defaults to None. **kwargs Additional arguments passed to check_resolution. Returns ------- dict Dictionary containing the inferred interval, comparison status, and validity for resampling. """ # Auto-detect time dimension if not provided if time_dim is None: time_dim = get_time_label(self._ds) if time_dim is None: raise ValueError( "No datetime coordinate found in Dataset." " Please specify time_dim manually." ) if time_dim not in self._ds: raise ValueError(f"Time dimension '{time_dim}' not found.") return self._ds[time_dim].timefreq.check_resolution( target_approx_interval, **kwargs )