diff --git a/src/qa4sm_reader/globals.py b/src/qa4sm_reader/globals.py index db4efe6..7ced79f 100644 --- a/src/qa4sm_reader/globals.py +++ b/src/qa4sm_reader/globals.py @@ -28,9 +28,8 @@ # === boxplot_basic defaults === boxplot_printnumbers = True # Print 'median', 'nObs', 'stdDev' to the boxplot_basic. -boxplot_figsize = [6.30, 4.68] # size of the output figure in inches. NO MORE USED. -boxplot_height = 4.68 -boxplot_width = 1.7 # times (n+1), where n is the number of boxes. +boxplot_height = 6 +boxplot_width = 2 # times (n+1), where n is the number of boxes. boxplot_title_len = 8 * boxplot_width # times the number of boxes. maximum length of plot title in chars. # === watermark defaults === @@ -59,19 +58,33 @@ } # 0=common metrics, 2=paired metrics (2 datasets), 3=triple metrics (TC, 3 datasets) -metric_groups = {0: ['n_obs'], - 2: ['R', 'p_R', 'rho','p_rho', 'RMSD', 'BIAS', - 'urmsd', 'mse', 'mse_corr', 'mse_bias', 'mse_var', - 'RSS', 'tau', 'p_tau'], - 3: ['snr', 'err_std', 'beta']} +metric_groups = { + 0: ['n_obs'], + 2: ['R', 'p_R', 'rho','p_rho', 'RMSD', 'BIAS', + 'urmsd', 'mse', 'mse_corr', 'mse_bias', 'mse_var', + 'RSS', 'tau', 'p_tau' + ], + 3: ['snr', 'err_std', 'beta'] +} + # === variable template === # how the metric is separated from the rest -var_name_metric_sep = {0: "{metric}", 2: "{metric}_between_", - 3: "{metric}_{mds_id:d}-{mds}_between_"} +var_name_metric_sep = { + 0: "{metric}", + 2: "{metric}_between_", + 3: "{metric}_{mds_id:d}-{mds}_between_" +} +var_name_CI = { + 0: "{metric}_ci_{bound}_between_", + 2: "{metric}_ci_{bound}_between_", + 3: "{metric}_ci_{bound}_{mds_id:d}-{mds}_between_" +} # how two datasets are separated, ids must be marked as numbers with :d! -var_name_ds_sep = {0: None, 2: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}", - 3: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}_and_{sat_id1:d}-{sat_ds1}"} +var_name_ds_sep = { + 0: None, 2: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}", + 3: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}_and_{sat_id1:d}-{sat_ds1}" +} # === metadata tempplates === _ref_ds_attr = 'val_ref' # global meta values variable that links to the reference dc @@ -80,8 +93,11 @@ _version_short_name_attr = 'val_dc_version{:d}' # attribute convention for other datasets _version_pretty_name_attr = 'val_dc_version_pretty_name{:d}' # attribute convention for other datasets -_variable_pretty_name = {0: "{}", 2: "{} of {} \n with {} as reference", - 3: "{} of {} \n against {}, {}"} # format should have (metric, ds, ref, other ds) +# format should have (metric, ds, ref, other ds) +_variable_pretty_name = { + 0: "{}", 2: "{} of {} \n with {} as reference", + 3: "{} of {} \n against {}, {}" +} _colormaps = { # from /qa4sm/validator/validation/graphics.py 'R': _cclasses['div_better'], @@ -179,17 +195,17 @@ # units for all datasets _metric_units = { # from /qa4sm/validator/validation/graphics.py - 'ISMN': r'm^3 m^{-3}', - 'C3S': r'm^3 m^{-3}', - 'GLDAS': r'm^3 m^{-3}', + 'ISMN': r'm³/m³', + 'C3S': r'm³/m³', + 'GLDAS': r'm³/m³', 'ASCAT': r'percentage of saturation', - 'SMAP': r'm^3 m^{-3}', - 'ERA5': r'm^3 m^{-3}', - 'ERA5_LAND': r'm^3 m^{-3}', + 'SMAP': r'm³/m³', + 'ERA5': r'm³/m³', + 'ERA5_LAND': r'm³/m³', 'ESA_CCI_SM_active': r'percentage of saturation', - 'ESA_CCI_SM_combined': r'm^3 m^{-3}', - 'ESA_CCI_SM_passive': r'm^3 m^{-3}', - 'SMOS': r'm^3 m^{-3}', + 'ESA_CCI_SM_combined': r'm³/m³', + 'ESA_CCI_SM_passive': r'm³/m³', + 'SMOS': r'm³/m³', } # label name for all metrics diff --git a/src/qa4sm_reader/handlers.py b/src/qa4sm_reader/handlers.py index 8c2f877..dab4a1a 100644 --- a/src/qa4sm_reader/handlers.py +++ b/src/qa4sm_reader/handlers.py @@ -105,12 +105,12 @@ def _dc_names(self, dc:int) -> dict: @property def ref_id(self): """Id of the reference dataset as in the variable names""" - return self._ref_dc() - self._offset_id_dc + return self._ref_dc() - self.offset @property def others_id(self): """Id of the other datasets as in the variable names""" - return [dc - self._offset_id_dc for dc in self._dcs().keys()] + return [dc - self.offset for dc in self._dcs().keys()] def _id2dc(self, id:int) -> int: """ @@ -217,7 +217,9 @@ def __init__(self, varname, global_attrs, values=None): if self.g: self.Metric = QA4SMMetric(self.metric) self.ref_ds, self.metric_ds, self.other_ds = self.get_varmeta() - self.pretty_name = self._pretty_name() + # if this is a CI variable, get whether it's the upper or lower bound + if self.is_CI: + self.bound = self.parts["bound"] @property def isempty(self) -> bool: @@ -237,19 +239,32 @@ def id(self): else: return self.ref_ds[0] - def _pretty_name(self): + @property + def is_CI(self): + """True if the Variable is the confidence interval of a metric""" + if self.g: + return "bound" in self.parts.keys() + else: + return False + + @property + def pretty_name(self): """Create a nice name for the variable""" - name = globals._variable_pretty_name[self.g] + template = globals._variable_pretty_name[self.g] if self.g == 0: - return name.format(self.metric) + name = template.format(self.metric) elif self.g == 2: - return name.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'], + name = template.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'], self.ref_ds[1]['pretty_title']) elif self.g == 3: - return name.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'], + name = template.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'], self.ref_ds[1]['pretty_title'], self.other_ds[1]['pretty_title']) + if self.is_CI: + name = "Confidence Interval of " + name + + return name def _parse_varname(self) -> (str, int, dict): """ @@ -276,6 +291,12 @@ def _parse_varname(self) -> (str, int, dict): if parts is not None and parts['metric'] in globals.metric_groups[g]: return parts['metric'], g, parts.named + # perhaps it's a CI variable + else: + pattern = '{}{}'.format(globals.var_name_CI[g], template) + parts = parse(pattern, self.varname) + if parts is not None and parts['metric'] in globals.metric_groups[g]: + return parts['metric'], g, parts.named return None, None, None @@ -348,3 +369,13 @@ def _get_attribute(self, attr:str): return value + @property + def has_CIs(self): + """Boolean property for metrics with or without confidence intervals""" + it_does = False + for n, Var in enumerate(self.variables): + if Var.is_CI(): + it_does = True + break + + return it_does diff --git a/src/qa4sm_reader/img.py b/src/qa4sm_reader/img.py index b273e32..aa54c34 100644 --- a/src/qa4sm_reader/img.py +++ b/src/qa4sm_reader/img.py @@ -98,6 +98,16 @@ def _open_ds(self, extent=None, period=None): else: return ds + @property + def has_CIs(self): + """True if the validation result contains confidence intervals""" + cis = False + # check if there is any CI Var + for Var in self._iter_vars(): + if Var.is_CI: + cis = True + return cis + def create_image_name(self) -> str: """Create a unique name for the QA4SMImage from the netCDF file""" ref = self.datasets.ref['pretty_title'] @@ -202,8 +212,13 @@ def _iter_vars(self, only_metrics=False, **filter_parms) -> iter: continue if filter_parms: for key, val in filter_parms.items(): - if getattr(Var, key) == val: - yield Var + if getattr(Var, key) == val: # check all attribute individually + check = True + else: + check = False # does not match requirements + break + if check == True: + yield Var else: yield Var @@ -379,7 +394,7 @@ def _metric_stats(self, metric, id=None) -> list: globals._metric_units_HTML[ds_name['short_name']]), Var.g]) # put the separate variable statistics in the same list metric_stats.append(var_stats) - + return metric_stats def stats_df(self) -> pd.DataFrame: diff --git a/src/qa4sm_reader/plot_utils.py b/src/qa4sm_reader/plot_utils.py index 53dd81d..0e53352 100644 --- a/src/qa4sm_reader/plot_utils.py +++ b/src/qa4sm_reader/plot_utils.py @@ -11,6 +11,7 @@ import matplotlib.pyplot as plt import matplotlib.ticker as mticker import matplotlib.gridspec as gridspec +from matplotlib.patches import PathPatch, Patch from cartopy import config as cconfig import cartopy.feature as cfeature from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER @@ -304,7 +305,7 @@ def style_map(ax, plot_extent, add_grid=True, map_resolution=globals.naturaleart add_topo=False, add_coastline=True, add_land=True, add_borders=True, add_us_states=False): ax.set_extent(plot_extent, crs=globals.data_crs) - ax.outline_patch.set_linewidth(0.4) + ax.spines["geo"].set_linewidth(0.4) if add_grid: # add gridlines. Bcs a bug in cartopy, draw girdlines first and then grid labels. # https://github.com/SciTools/cartopy/issues/1342 @@ -362,7 +363,7 @@ def style_map(ax, plot_extent, add_grid=True, map_resolution=globals.naturaleart return ax -def make_watermark(fig, placement=globals.watermark_pos, for_map=False, offset=0.02): #todo: adjust space of watermark +def make_watermark(fig, placement=globals.watermark_pos, for_map=False, offset=0.02): """ Adds a watermark to fig and adjusts the current axis to make sure there is enough padding around the watermarks. @@ -402,7 +403,6 @@ def make_watermark(fig, placement=globals.watermark_pos, for_map=False, offset=0 else: raise NotImplementedError - def _make_cbar(fig, im, cax, ref_short:str, metric:str, label=None): """ Make colorbar to use in plots @@ -439,7 +439,64 @@ def _make_cbar(fig, im, cax, ref_short:str, metric:str, label=None): return fig, im, cax -def boxplot(df, label=None, figsize=None, dpi=100, **kwargs): +def _CI_difference(fig, ax, ci): + """ + Insert the median value of the upper and lower CI difference + + Parameters + ---------- + fig: matplotlib.figure.Figure + figure with CIs + ci: list + list of upper and lower ci dataframes + """ + lower_pos = [] + for ax in fig.axes: + n = 0 + # iterating through axes artists: + for c in ax.get_children(): + # searching for PathPatches + if isinstance(c, PathPatch): + # different width whether it's the metric or the CIs + if n in np.arange(0, 100, 3): + # getting current width of box: + p = c.get_path() + verts = p.vertices + verts_sub = verts[:-1] + xmin = np.min(verts_sub[:, 0]) + lower_pos.append(xmin) + n += 1 + for ci_df, xmin in zip(ci, lower_pos): + diff = ci_df["upper"] - ci_df["lower"] + ci_range = float(diff.mean()) + ypos = float(ci_df["lower"].min()) + ax.annotate( + "Mean CI\nRange:\n {:.2g}".format(ci_range), + xy = (xmin - 0.2, ypos), + horizontalalignment="center" + ) + +def patch_styling( + box_dict, + facecolor +) -> None: + """Define style of the boxplots""" + for n, (patch, median) in enumerate(zip(box_dict["boxes"], box_dict["medians"])): + patch.set(color="grey", facecolor=facecolor, linewidth=1.6, alpha=0.7) + median.set(color="grey", linewidth=1.6) + for (whis, caps) in zip(box_dict["whiskers"], box_dict["caps"]): + whis.set(color="grey", linewidth=1.6) + caps.set(color="grey", linewidth=1.6) + +def boxplot( + df, + ci=None, + label=None, + figsize=None, + dpi=100, + spacing=0.35, + **kwargs +): """ Create a boxplot_basic from the variables in df. The box shows the quartiles of the dataset while the whiskers extend @@ -451,15 +508,16 @@ def boxplot(df, label=None, figsize=None, dpi=100, **kwargs): ---------- df : pandas.DataFrame DataFrame containing 'lat', 'lon' and (multiple) 'var' Series. - title : str, optional (default: None) - Title of the plot. If None, no title is added. + ci: list + list of Dataframes containing "upper" and "lower" CIs label : str, optional - Label of the y axis, describing the metric. If None, a label is autogenerated from metadata. - The default is None. + Label of the y axis, describing the metric. The default is None. figsize : tuple, optional Figure size in inches. The default is globals.map_figsize. dpi : int, optional Resolution for raster graphic output. The default is globals.dpi. + spacing: float, optional. + Space between the central boxplot and the CIs. Default is 0.3 Returns ------- @@ -467,27 +525,78 @@ def boxplot(df, label=None, figsize=None, dpi=100, **kwargs): the boxplot ax : matplotlib.axes.Axes """ - df = df.copy() + values = df.copy() # make plot - sns.set_style("whitegrid") fig, ax = plt.subplots(figsize=figsize, dpi=dpi) - ax = sns.boxplot(data=df, - ax=ax, - width=0.15, - showfliers=False, - color='white', - **kwargs) - sns.despine() # remove ugly spines (=border around plot) right and top. - + center_pos = np.arange(len(values.columns))*2 + # styling + ticklabels = values.columns + if kwargs is None: + kwargs = {} + kwargs.update(patch_artist=True, return_type="dict") + # changes necessary to have confidence intervals in the plot + if ci: + upper, lower = [], [] + for n, intervals in enumerate(ci): + lower.append(intervals["lower"]) + upper.append(intervals["upper"]) + lower = pd.concat(lower, ignore_index=True, axis=1) + upper = pd.concat(upper, ignore_index=True, axis=1) + low = lower.boxplot( + positions=center_pos - spacing, + showfliers=False, + widths=0.15, + **kwargs + ) + up = upper.boxplot( + positions=center_pos + spacing, + showfliers=False, + widths=0.15, + **kwargs + ) + patch_styling(low, 'skyblue') + patch_styling(up, 'tomato') + # create plot + cen = values.boxplot( + positions=center_pos, + showfliers=False, + widths=0.3, + **kwargs + ) + patch_styling(cen, 'white') + plt.xticks(center_pos, ticklabels) + if ci: + low_ci = Patch(color='skyblue', alpha=0.7, label='Lower CI') + up_ci = Patch(color='tomato', alpha=0.7, label='Upper CI') + #_CI_difference(fig, ax, ci) + plt.legend( + handles=[low_ci, up_ci], + fontsize=8, + loc="best" + ) + # provide y label if label is not None: - ax.set_ylabel(label, weight='normal') + plt.ylabel(label, weight='normal') + plt.grid(axis='x') + ax.spines['right'].set_visible(False) + ax.spines['top'].set_visible(False) return fig, ax -def mapplot(df, metric, ref_short, ref_grid_stepsize=None, - plot_extent=None, colormap=None, projection=None, - add_cbar=True, label=None, figsize=globals.map_figsize, - dpi=globals.dpi, diff_range=None, **style_kwargs): +def mapplot( + df, metric, + ref_short, + ref_grid_stepsize=None, + plot_extent=None, + colormap=None, + projection=None, + add_cbar=True, + label=None, + figsize=globals.map_figsize, + dpi=globals.dpi, + diff_range=None, + **style_kwargs +): """ Create an overview map from df using values as color. Plots a scatterplot for ISMN and an image plot for other input values. diff --git a/src/qa4sm_reader/plotter.py b/src/qa4sm_reader/plotter.py index 72d2156..2a08d5e 100644 --- a/src/qa4sm_reader/plotter.py +++ b/src/qa4sm_reader/plotter.py @@ -101,7 +101,7 @@ def _box_stats(ds:pd.Series, med:bool=True, iqr:bool=True, count:bool=True) -> s met_str = [] if med: - met_str.append('median: {:.3g}'.format(ds.median())) + met_str.append('Median: {:.3g}'.format(ds.median())) if iqr: met_str.append('IQR: {:.3g}'.format(iqr)) if count: @@ -294,7 +294,6 @@ def _yield_values(self, metric:str, tc:bool=False) -> tuple: for n, Var in enumerate(Vars): values = Var.values[Var.varname] - # changes if it's a common-type Var if Var.g == 0: box_cap_ds = 'All datasets' @@ -311,12 +310,15 @@ def _yield_values(self, metric:str, tc:bool=False) -> tuple: yield df, Var - def _boxplot_definition(self, metric:str, - df:pd.DataFrame, - type:str, - offset=0.08, - Var=None, - **kwargs) -> tuple: + def _boxplot_definition( + self, metric:str, + df:pd.DataFrame, + type:str, + ci=None, + offset=0.07, + Var=None, + **kwargs + ) -> tuple: """ Define parameters of plot @@ -326,6 +328,11 @@ def _boxplot_definition(self, metric:str, dataframe to plot type: str one of _titles_lut + ci: dict + Dict of dataframes with the lower and upper confidence intervals + shape: {"upper"/"lower": [CIs]} + xticks: list + caption to each boxplot (or triplet thereof) offset: float offset of boxplots Var: QA4SMMetricVariable, optional. Default is None @@ -339,7 +346,13 @@ def _boxplot_definition(self, metric:str, # generate plot figwidth = globals.boxplot_width * (len(df.columns) + 1) figsize = [figwidth, globals.boxplot_height] - fig, ax = boxplot(df=df, label=label, figsize=figsize, dpi=globals.dpi) + fig, ax = boxplot( + df=df, + ci=ci, + label=label, + figsize=figsize, + dpi=globals.dpi + ) if not Var: # when we only need reference dataset from variables (i.e. is the same): for Var in self.img._iter_vars(**{'metric':metric}): @@ -348,8 +361,10 @@ def _boxplot_definition(self, metric:str, title = self.create_title(Var, type=type) ax.set_title(title, pad=globals.title_pad) # add watermark + if self.img.has_CIs: + offset = 0.06 # offset smaller as CI variables have a larger caption if Var.g == 0: - offset = 0.03 # offset smaller as common metrics have a shorter caption + offset = 0.02 # offset larger as common metrics have a shorter caption if globals.watermark_pos not in [None, False]: make_watermark(fig, offset=offset) @@ -383,11 +398,13 @@ def _save_plot(self, out_name:str, out_types:str='png') -> list: return fnames - def boxplot_basic(self, metric:str, - out_name:str=None, - out_types:str='png', - save_files:bool=False, - **plotting_kwargs) -> list: + def boxplot_basic( + self, metric:str, + out_name:str=None, + out_types:str='png', + save_files:bool=False, + **plotting_kwargs + ) -> list: """ Creates a boxplot for common and double metrics. Saves a figure and returns Matplotlib fig and ax objects for further processing. @@ -411,19 +428,41 @@ def boxplot_basic(self, metric:str, list of file names with all the extensions """ fnames, values = [], [] + ci = [] # we take the last iterated value for Var and use it for the file name for df, Var in self._yield_values(metric=metric): - values.append(df) - + if not Var.is_CI: + # concat upper and lower CI bounds of Variable, if present + bounds = [] + for ci_df, ci_Var in self._yield_values(metric=metric): + # make sure they refer to the right variable + if ci_Var.is_CI and (ci_Var.metric_ds == Var.metric_ds): + ci_df.columns = [ci_Var.bound] + bounds.append(ci_df) + if bounds: # could be that variable doesn't have CIs + bounds = pd.concat(bounds, axis=1) + # get the mean CI range + diff = bounds["upper"] - bounds["lower"] + ci_range = float(diff.mean()) + df.columns = [ + df.columns[0] + "\nMean CI range:" + " {:.3g}".format(ci_range) + ] + ci.append(bounds) + values.append(df) + # put all Variables in the same dataframe values = pd.concat(values) # values are all Nan or NaNf - not plotted if df.isnull().values.all(): return None # create plot - fig, ax = self._boxplot_definition(metric=metric, - df=values, - type='boxplot_basic', - **plotting_kwargs) + fig, ax = self._boxplot_definition( + metric=metric, + df=values, + type='boxplot_basic', + ci=ci, + **plotting_kwargs + ) if not out_name: out_name = self.create_filename(Var, type='boxplot_basic') # save or return plotting objects @@ -436,11 +475,13 @@ def boxplot_basic(self, metric:str, else: return fig, ax - def boxplot_tc(self, metric:str, - out_name:str=None, - out_types:str='png', - save_files:bool=False, - **plotting_kwargs) -> list: + def boxplot_tc( # todo: set limits to show confidence intervals + self, metric:str, + out_name:str=None, + out_types:str='png', + save_files:bool=False, + **plotting_kwargs + ) -> list: """ Creates a boxplot for TC metrics. Saves a figure and returns Matplotlib fig and ax objects for further processing. @@ -464,26 +505,57 @@ def boxplot_tc(self, metric:str, list of file names with all the extensions """ fnames = [] - metric_tc = {} # group Vars relative to the same dataset + # group Vars and CIs relative to the same dataset + metric_tc, ci = {}, {} for df, Var in self._yield_values(metric=metric, tc=True): - ref_meta, mds_meta, other_meta = Var.get_varmeta() - id, names = mds_meta - if id in metric_tc.keys(): - metric_tc[id][0].append(df) - else: - metric_tc[id] = [df], Var - - for dfs, Var in metric_tc.values(): + if not Var.is_CI: + id, names = Var.metric_ds + bounds = [] + for ci_df, ci_Var in self._yield_values(metric=metric): + # make sure they refer to the right variable + if ci_Var.is_CI and \ + (ci_Var.metric_ds == Var.metric_ds) and \ + (ci_Var.other_dss == Var.other_dss): + ci_df.columns = [ci_Var.bound] + bounds.append(ci_df) + if bounds: # could be that variable doesn't have CIs + bounds = pd.concat(bounds, axis=1) + # get the mean CI range + diff = bounds["upper"] - bounds["lower"] + ci_range = diff.mean() + df.columns = [ + df.columns[0] + "\nMean CI range:" + " {:.3g}".format(ci_range) + ] + if id in ci.keys(): + ci[id].append(bounds) + else: + ci[id] = [bounds] + if id in metric_tc.keys(): + metric_tc[id][0].append(df) + else: + metric_tc[id] = [df], Var + + for id, values in metric_tc.items(): + dfs, Var = values df = pd.concat(dfs) # values are all Nan or NaNf - not plotted if df.isnull().values.all(): continue + # necessary if statement to prevent key error when no CIs are in the netCDF + if ci: + bounds = ci[id] + else: + bounds = ci # create plot - fig, ax = self._boxplot_definition(metric=metric, - df=df, - type='boxplot_tc', - Var=Var, - **plotting_kwargs) + fig, ax = self._boxplot_definition( + metric=metric, + df=df, + ci=bounds, + type='boxplot_tc', + Var=Var, + **plotting_kwargs + ) # save. Below workaround to avoid same names if not out_name: save_name = self.create_filename(Var, type='boxplot_tc') @@ -498,11 +570,13 @@ def boxplot_tc(self, metric:str, if save_files: return fnames - def mapplot_var(self, Var, - out_name:str=None, - out_types:str='png', - save_files:bool=False, - **plotting_kwargs) -> list: + def mapplot_var( + self, Var, + out_name:str=None, + out_types:str='png', + save_files:bool=False, + **plotting_kwargs + ) -> list: """ Plots values to a map, using the values as color. Plots a scatterplot for ISMN and a image plot for other input values. @@ -550,7 +624,7 @@ def mapplot_var(self, Var, # use title for plot, make watermark ax.set_title(title, pad=globals.title_pad) if globals.watermark_pos not in [None, False]: - make_watermark(fig, globals.watermark_pos, for_map=True) + make_watermark(fig, globals.watermark_pos, for_map=True, offset=0.04) # save file or just return the image if save_files: @@ -561,10 +635,12 @@ def mapplot_var(self, Var, else: return fig, ax - def mapplot_metric(self, metric:str, - out_types:str='png', - save_files:bool=False, - **plotting_kwargs) -> list: + def mapplot_metric( + self, metric:str, + out_types:str='png', + save_files:bool=False, + **plotting_kwargs + ) -> list: """ Mapplot for all variables for a given metric in the loaded file. @@ -587,7 +663,7 @@ def mapplot_metric(self, metric:str, """ fnames = [] for Var in self.img._iter_vars(**{'metric':metric}): - if not Var.values.isnull().values.all(): + if not (Var.values.isnull().values.all() or Var.is_CI): fns = self.mapplot_var(Var, out_name=None, out_types=out_types, @@ -603,7 +679,12 @@ def mapplot_metric(self, metric:str, if fnames: return fnames - def plot_metric(self, metric:str, out_types:str='png', save_all:bool=True, **plotting_kwargs) -> tuple: + def plot_metric( + self, metric:str, + out_types:str='png', + save_all:bool=True, + **plotting_kwargs + ) -> tuple: """ Plot and save boxplot and mapplot for a certain metric diff --git a/tests/test_attr.py b/tests/test_attr.py index bd5a47b..f85e5bd 100644 --- a/tests/test_attr.py +++ b/tests/test_attr.py @@ -18,6 +18,11 @@ def test_tc_attributes(): ds = xr.open_dataset(testfile) return ds.attrs +def test_CI_attributes(): + testfile = os.path.join(os.path.dirname(__file__), 'test_data', 'tc', + "0-ERA5.swvl1_with_1-ESA_CCI_SM_combined.sm_with_2-ESA_CCI_SM_combined.sm_with_3-ESA_CCI_SM_combined.sm_with_4-ESA_CCI_SM_combined.sm.CI.nc") + ds = xr.open_dataset(testfile) + return ds.attrs def test_grid_stepsize(): testfile = os.path.join(os.path.dirname(__file__), 'test_data', 'basic', diff --git a/tests/test_data/comparing/.DS_Store b/tests/test_data/comparing/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/tests/test_data/comparing/.DS_Store differ diff --git a/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm-nonoverlap.nc b/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm-nonoverlap.nc new file mode 100644 index 0000000..048dc40 Binary files /dev/null and b/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm-nonoverlap.nc differ diff --git a/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm-overlap.nc b/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm-overlap.nc new file mode 100644 index 0000000..9a185ca Binary files /dev/null and b/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm-overlap.nc differ diff --git a/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm.nc b/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm.nc new file mode 100644 index 0000000..ea3978e Binary files /dev/null and b/tests/test_data/comparing/0-ISMN.soil moisture_with_1-C3S.sm.nc differ diff --git a/tests/test_data/tc/0-ERA5.swvl1_with_1-ESA_CCI_SM_combined.sm_with_2-ESA_CCI_SM_combined.sm_with_3-ESA_CCI_SM_combined.sm_with_4-ESA_CCI_SM_combined.sm.CI.nc b/tests/test_data/tc/0-ERA5.swvl1_with_1-ESA_CCI_SM_combined.sm_with_2-ESA_CCI_SM_combined.sm_with_3-ESA_CCI_SM_combined.sm_with_4-ESA_CCI_SM_combined.sm.CI.nc new file mode 100644 index 0000000..0d651ec Binary files /dev/null and b/tests/test_data/tc/0-ERA5.swvl1_with_1-ESA_CCI_SM_combined.sm_with_2-ESA_CCI_SM_combined.sm_with_3-ESA_CCI_SM_combined.sm_with_4-ESA_CCI_SM_combined.sm.CI.nc differ diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 4f1a849..bdbcfb1 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -4,7 +4,7 @@ from qa4sm_reader.handlers import QA4SMDatasets, QA4SMMetricVariable, QA4SMMetric -from tests.test_attr import test_attributes, test_tc_attributes +from tests.test_attr import test_attributes, test_tc_attributes, test_CI_attributes class TestQA4SMDatasets(unittest.TestCase): @@ -72,8 +72,7 @@ def test_properties(self): assert self.beta.ismetric def test_pretty_name(self): - print(self.beta._pretty_name()) - assert self.beta._pretty_name() == "TC scaling coefficient of C3S (v201812) \n against ERA5-Land (ERA5-Land test), H-SAF ASCAT SSM CDR (H113)" + assert self.beta.pretty_name == "TC scaling coefficient of C3S (v201812) \n against ERA5-Land (ERA5-Land test), H-SAF ASCAT SSM CDR (H113)" def test_parse_varname(self): for var in [self.beta, self.r, self.n_obs]: @@ -191,5 +190,22 @@ def test_get_attribute(self): assert self.R.g == self.r1.g == self.r2.g +class TestMetricVariableCI(unittest.TestCase): # todo: update with correct CI .nc file + """Test variables in image with confidence intervals""" + def setUp(self) -> None: + attrs = test_CI_attributes() + self.CI_Var = QA4SMMetricVariable( + "RMSD_ci_upper_between_0-ERA5_and_2-ESA_CCI_SM_combined", + attrs + ) + + def test_CI_var(self): + assert self.CI_Var.ismetric + assert self.CI_Var.is_CI + print(self.CI_Var.pretty_name) + assert self.CI_Var.pretty_name == "Confidence Interval of Root-mean-square deviation of ESA CCI " \ + "SM combined (v05.2) \n with ERA5 (v20190613) as reference" + assert self.CI_Var.bound == "upper" + if __name__ == '__main__': unittest.main() diff --git a/tests/test_image.py b/tests/test_image.py index 91ac300..b19a782 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -170,5 +170,35 @@ def test_stats_df(self): assert tot_stats == len(df) +class TestQA4SMImgWithCI(unittest.TestCase): # todo: update with correct CI .nc file + """Test image where some of the variables are confidence intervals""" + + def setUp(self) -> None: + self.testfile = "0-ERA5.swvl1_with_1-ESA_CCI_SM_combined.sm_with_2-ESA_CCI_SM_combined.sm_with_3-ESA_CCI_SM_combined.sm_with_4-ESA_CCI_SM_combined.sm.CI.nc" + self.testfile_path = os.path.join(os.path.dirname(__file__), '..','tests', + 'test_data', 'tc', self.testfile) + self.img = QA4SMImg(self.testfile_path, ignore_empty=False) + + def test_testfile(self): + someCIs = [ + "RMSD_ci_lower_between_0-ERA5_and_1-ESA_CCI_SM_combined", + "RMSD_ci_upper_between_0-ERA5_and_1-ESA_CCI_SM_combined" + ] + for CI_varname in someCIs: + assert CI_varname in self.img.varnames + + def test_CIs(self): + assert self.img.has_CIs + + def test_CI_in_Vars(self): + """Test that CI Variables are correctly assigned to a metric""" + for CI_varname in self.img._iter_vars(**{ + "metric":"RMSD", + "metric_ds":"2-ESA_CCI_SM_combined"}): + assert CI_varname in [ + "RMSD_ci_lower_between_0-ERA5_and_2-ESA_CCI_SM_combined", + "RMSD_ci_upper_between_0-ERA5_and_2-ESA_CCI_SM_combined" + ] + if __name__ == '__main__': unittest.main() diff --git a/tests/test_plotter.py b/tests/test_plotter.py index 4446a2e..3652106 100644 --- a/tests/test_plotter.py +++ b/tests/test_plotter.py @@ -201,5 +201,32 @@ def test_grid_creation(self): assert zz.count() != 0 assert origin == 'upper' + +class TestQA4SMMetaImgWithCIPlotter(unittest.TestCase): + """Test plotter works with confidence intervals. We test whether boxplots are working, but not whether CIs are + actually plotted correctly inside them. Current CI file includes bootstrapped intervals""" + + def setUp(self) -> None: + self.testfile = "0-ERA5.swvl1_with_1-ESA_CCI_SM_combined.sm_with_2-ESA_CCI_SM_combined.sm_with_3-ESA_CCI_SM_combined.sm_with_4-ESA_CCI_SM_combined.sm.CI.nc" + self.testfile_path = os.path.join(os.path.dirname(__file__), '..', 'tests', + 'test_data', 'tc', self.testfile) + self.plotdir = tempfile.mkdtemp() + self.img = QA4SMImg(self.testfile_path) + self.plotter = QA4SMPlotter(self.img, self.plotdir) + + def test_boxplot_basic(self): + bias_files = self.plotter.boxplot_basic('BIAS', out_types='png', save_files=True) # should be 1 + assert len(os.listdir(self.plotdir)) == 1 + assert len(list(bias_files)) == 1 + + shutil.rmtree(self.plotdir) + + def test_boxplot_tc(self): + snr_files = self.plotter.boxplot_tc('snr', out_types='svg', save_files=True) # should be 4 + assert len(os.listdir(self.plotdir)) == 4 + assert len(list(snr_files)) == 4 + + shutil.rmtree(self.plotdir) + if __name__ == '__main__': unittest.main()