"""
Contain the ClimateVariable class and its related functions.
A climate variable is a structure that contains all the pre-processed input varaible to
compute a climate index.
A climate index may require one or more climate variables to be computed.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
import numpy as np
import xarray
from icclim._core.constants import REFERENCE_PERIOD_INDEX, UNITS_KEY
from icclim._core.generic.threshold.percentile import PercentileThreshold
from icclim._core.input_parsing import (
DEFAULT_INPUT_FREQUENCY,
build_reference_da,
build_studied_data,
guess_standard_variable,
read_dataset,
)
from icclim.exception import InvalidIcclimArgumentError
from icclim.frequency import Frequency, FrequencyRegistry
from icclim.threshold.factory import build_threshold
if TYPE_CHECKING:
from collections.abc import Sequence
from datetime import datetime
import jinja2
from xarray.core.dataarray import DataArray
from icclim._core.model.global_metadata import GlobalMetadata
from icclim._core.model.icclim_types import InFileBaseType
from icclim._core.model.in_file_dictionary import InFileDictionary
from icclim._core.model.standard_index import StandardIndex
from icclim._core.model.standard_variable import StandardVariable
from icclim._core.model.threshold import Threshold
@dataclass
[docs]
class ClimateVariable:
"""
ClimateVariable is a dataclass that represents a climate variable used to compute a climate index.
It groups together the input variable (studied_data), its associated metadata
(standard_var) if any, the threshold it must be compared to.
Attributes
----------
name: str
Name of the variable.
standard_var: StandardVariable
CF metadata bounded to the standard variable used for this ClimateVariable.
studied_data: DataArray
The variable studied.
threshold: Threshold | None
thresholds for this variable
""" # noqa: E501
[docs]
standard_var: StandardVariable | None
[docs]
studied_data: DataArray
global_metadata: GlobalMetadata
source_frequency: Frequency
[docs]
threshold: Threshold | None = None
is_reference: bool = False
[docs]
def build_climate_vars(
climate_vars_dict: dict[str, InFileDictionary],
ignore_Feb29th: bool, # noqa: N803
time_range: Sequence[datetime | str] | None,
base_period: Sequence[str] | None,
standard_index: StandardIndex | None,
is_compared_to_reference: bool,
) -> list[ClimateVariable]:
"""
Build a list of ClimateVariable from a dictionary of input files.
Parameters
----------
climate_vars_dict: dict of str, InFileDictionary
The dictionary of input files.
ignore_Feb29th: bool
Whether to ignore February 29th.
time_range: Sequence of datetime | str | None
The time range to consider.
base_period: Sequence of str | None
The base period to consider, used to build a reference variable for indices such
as anomaly.
standard_index: StandardIndex | None
The standard index to compute.
Returns
-------
list of ClimateVariable that will be used to compute the climate index.
"""
if standard_index is not None and len(standard_index.input_variables) > len(
climate_vars_dict
):
msg = (
f"Index {standard_index.short_name} needs"
f" {len(standard_index.input_variables)} variables."
f" Please provide them with an xarray.Dataset, netCDF file(s) or a"
f" zarr store."
)
raise InvalidIcclimArgumentError(msg)
acc = []
for i, raw_climate_var in enumerate(climate_vars_dict.items()):
if standard_index is not None:
standard_var = standard_index.input_variables[i]
else:
standard_var = None
acc.append(
build_climate_var(
raw_climate_var[0],
raw_climate_var[1],
ignore_Feb29th,
time_range,
standard_var=standard_var,
)
)
if _standard_index_needs_ref(
standard_index,
is_compared_to_reference,
) or _generic_index_needs_ref(standard_index, is_compared_to_reference):
standard_var = (
standard_index.input_variables[0] if standard_index is not None else None
)
added_var = _build_reference_variable(
base_period,
climate_vars_dict,
standard_var=standard_var,
)
acc.append(added_var)
return acc
[docs]
def build_climate_var(
climate_var_name: str,
climate_var_data: InFileDictionary | InFileBaseType,
ignore_Feb29th: bool, # noqa: N803
time_range: Sequence[datetime | str] | None,
standard_var: StandardVariable | None,
) -> ClimateVariable:
"""
Build a ClimateVariable object.
Parameters
----------
climate_var_name : str
The name of the climate variable.
climate_var_data : InFileDictionary | InFileBaseType
The input data for the climate variable. It can be either a dictionary
or a file.
ignore_Feb29th : bool
Flag indicating whether to ignore February 29th in the time range.
time_range : Sequence[datetime | str] | None
The time range to consider for the climate variable. It can be a sequence
of datetime objects or strings, or None to consider the entire time range.
standard_var : StandardVariable | None
The standard variable to use for the climate variable. If None, the input
data will be used to guess the standard variable.
Returns
-------
ClimateVariable
The built ClimateVariable object.
Notes
-----
This function builds a ClimateVariable object based on the provided inputs.
It reads the input data, determines the standard variable, builds the studied
data, and sets the threshold and global metadata.
If the input data is a dictionary, it is assumed to have a 'study' key
containing the study data and an optional 'thresholds' key containing the
threshold data.
If the input data is a file, it is assumed to contain the study data.
The standard variable is used to determine the conversion unit for the
threshold data.
The studied data is built based on the study data, time range, ignore_Feb29th
flag, and standard variable.
If a threshold is provided in the dictionary, it is added to the ClimateVariable.
Examples
--------
>>> climate_var_name = "tas"
>>> climate_var_data = {"study": "/path/to/data.nc", "thresholds": ">= 27 degC"}
>>> ignore_Feb29th = False
>>> time_range = ["2000-01-01", "2010-12-31"]
>>> standard_var = StandardVariableRegistry.TAS
>>> climate_var = build_climate_var(
... climate_var_name, climate_var_data, ignore_Feb29th, time_range, standard_var
... )
"""
if isinstance(climate_var_data, dict):
study_ds = read_dataset(
climate_var_data["study"],
standard_var,
climate_var_name,
)
climate_var_thresh = climate_var_data.get("thresholds", None)
else:
study_ds = read_dataset(climate_var_data, standard_var, climate_var_name)
climate_var_thresh = None
if standard_var is None:
standard_var = guess_standard_variable(study_ds[climate_var_name])
studied_data = build_studied_data(
study_ds[climate_var_name],
time_range,
ignore_Feb29th,
standard_var.default_units if standard_var else None,
)
if climate_var_thresh is not None:
climate_var_thresh = _build_threshold(
climate_var_thresh=climate_var_thresh,
original_data=study_ds[climate_var_name],
conversion_unit=studied_data.attrs[UNITS_KEY],
)
return ClimateVariable(
name=climate_var_name,
standard_var=standard_var,
studied_data=studied_data,
threshold=climate_var_thresh,
global_metadata={
"history": study_ds.attrs.get("history", None),
"source": study_ds.attrs.get("source", None),
"time_encoding": study_ds.time.encoding,
},
source_frequency=FrequencyRegistry.lookup(
xarray.infer_freq(studied_data.time) or DEFAULT_INPUT_FREQUENCY,
),
)
[docs]
def must_run_bootstrap(da: DataArray, threshold: Threshold | None) -> bool:
"""
Determine whether to run the bootstrap method.
Parameters
----------
da : DataArray
The studied data.
threshold : Threshold | None
The threshold that contains the reference period.
Returns
-------
bool
Whether to run the bootstrap method.
Notes
-----
This function is used to avoid bootstrapping if there is one single year
overlapping or no year overlapping or all year overlapping between the studied
data `da` and the reference period defined by the threshold.
"""
# TODO @bzah: Don't run bootstrap when not on extreme percentile
# (run only below 20? 10? and above 80? 90?)
# https://github.com/cerfacs-globc/icclim/issues/289
if (
threshold is None
or not isinstance(threshold, PercentileThreshold)
or (
isinstance(threshold, PercentileThreshold)
and not threshold.is_doy_per_threshold
)
):
return False
reference = threshold.value
study_years = np.unique(da.indexes.get("time").year)
overlapping_years = np.unique(
da.sel(time=_get_ref_period_slice(reference)).indexes.get("time").year,
)
return 1 < len(overlapping_years) < len(study_years)
def _standard_index_needs_ref(
standard_index: StandardIndex, is_compared_to_reference: bool
) -> bool:
return (
standard_index
and standard_index.qualifiers
and REFERENCE_PERIOD_INDEX in standard_index.qualifiers
and is_compared_to_reference
)
def _generic_index_needs_ref(
standard_index: StandardIndex, is_compared_to_reference: bool
) -> bool:
return standard_index is None and is_compared_to_reference
[docs]
def _build_reference_variable(
reference_period: Sequence[str] | None,
in_files: dict[str, InFileDictionary],
standard_var: StandardVariable,
) -> ClimateVariable:
"""
Add a secondary variable for indices such as anomaly.
This kind of indices require exactly two variables, but the second variable can
just be a subset of the first one.
"""
if reference_period is None:
msg = "Can't build a reference variable without a `base_period_time_range`"
raise InvalidIcclimArgumentError(msg)
var_name = next(iter(in_files.keys()))
if isinstance(in_files, dict):
study_ds = read_dataset(
next(iter(in_files.values()))["study"],
standard_var=standard_var,
var_name=var_name,
)
else:
study_ds = read_dataset(
next(iter(in_files.values())),
standard_var=standard_var,
var_name=var_name,
)
studied_data = build_reference_da(
study_ds[var_name],
reference_period,
only_leap_years=False,
percentile_min_value=None,
)
return ClimateVariable(
name=var_name + "_reference",
standard_var=standard_var,
studied_data=studied_data,
threshold=None,
global_metadata={
"history": study_ds.attrs.get("history", None),
"source": study_ds.attrs.get("source", None),
"time_encoding": study_ds.time.encoding,
},
source_frequency=FrequencyRegistry.lookup(
xarray.infer_freq(studied_data.time) or DEFAULT_INPUT_FREQUENCY,
),
is_reference=True,
)
def _build_threshold(
climate_var_thresh: str | Threshold,
original_data: DataArray,
conversion_unit: str,
) -> Threshold:
if isinstance(climate_var_thresh, str):
climate_var_thresh: Threshold = build_threshold(climate_var_thresh)
if climate_var_thresh.prepare is not None and not climate_var_thresh.is_ready:
climate_var_thresh.prepare(original_data)
climate_var_thresh.unit = conversion_unit
return climate_var_thresh
def _get_ref_period_slice(da: DataArray) -> slice:
if (bds := da.attrs.get("climatology_bounds", None)) is not None:
return slice(*bds)
time_length = len(da.time)
return slice(*da.time[0 :: time_length - 1].dt.strftime("%Y-%m-%d").to_numpy())