Merge pull request #20 from pstradio/confidence_intervals_issue-483

Confidence intervals issue 483
awst-austria · Jun 4, 2021 · 56f7a95 · 56f7a95
2 parents 08c15bf + 9a97ad6
commit 56f7a95
Show file tree

Hide file tree

Showing 14 changed files with 442 additions and 112 deletions.
diff --git a/src/qa4sm_reader/globals.py b/src/qa4sm_reader/globals.py
@@ -28,9 +28,8 @@
 
 # === boxplot_basic defaults ===
 boxplot_printnumbers = True  # Print 'median', 'nObs', 'stdDev' to the boxplot_basic.
-boxplot_figsize = [6.30, 4.68]  # size of the output figure in inches. NO MORE USED.
-boxplot_height = 4.68
-boxplot_width = 1.7  # times (n+1), where n is the number of boxes.
+boxplot_height = 6
+boxplot_width = 2 # times (n+1), where n is the number of boxes.
 boxplot_title_len = 8 * boxplot_width  # times the number of boxes. maximum length of plot title in chars.
 
 # === watermark defaults ===
@@ -59,19 +58,33 @@
 }
 
 # 0=common metrics, 2=paired metrics (2 datasets), 3=triple metrics (TC, 3 datasets)
-metric_groups = {0: ['n_obs'],
-                 2: ['R', 'p_R', 'rho','p_rho', 'RMSD', 'BIAS',
-                     'urmsd', 'mse', 'mse_corr', 'mse_bias', 'mse_var',
-                     'RSS', 'tau', 'p_tau'],
-                 3: ['snr', 'err_std', 'beta']}
+metric_groups = {
+    0: ['n_obs'],
+    2: ['R', 'p_R', 'rho','p_rho', 'RMSD', 'BIAS',
+        'urmsd', 'mse', 'mse_corr', 'mse_bias', 'mse_var',
+        'RSS', 'tau', 'p_tau'
+        ],
+    3: ['snr', 'err_std', 'beta']
+}
+
 
 # === variable template ===
 # how the metric is separated from the rest
-var_name_metric_sep = {0: "{metric}", 2: "{metric}_between_",
-                       3: "{metric}_{mds_id:d}-{mds}_between_"}
+var_name_metric_sep = {
+    0: "{metric}",
+    2: "{metric}_between_",
+    3: "{metric}_{mds_id:d}-{mds}_between_"
+}
+var_name_CI = {
+    0: "{metric}_ci_{bound}_between_",
+    2: "{metric}_ci_{bound}_between_",
+    3: "{metric}_ci_{bound}_{mds_id:d}-{mds}_between_"
+}
 # how two datasets are separated, ids must be marked as numbers with :d!
-var_name_ds_sep = {0: None, 2: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}",
-                   3: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}_and_{sat_id1:d}-{sat_ds1}"}
+var_name_ds_sep = {
+    0: None, 2: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}",
+    3: "{ref_id:d}-{ref_ds}_and_{sat_id0:d}-{sat_ds0}_and_{sat_id1:d}-{sat_ds1}"
+}
 
 # === metadata tempplates ===
 _ref_ds_attr = 'val_ref' # global meta values variable that links to the reference dc
@@ -80,8 +93,11 @@
 _version_short_name_attr = 'val_dc_version{:d}' # attribute convention for other datasets
 _version_pretty_name_attr = 'val_dc_version_pretty_name{:d}' # attribute convention for other datasets
 
-_variable_pretty_name = {0: "{}", 2: "{} of {} \n with {} as reference",
-                         3: "{} of {} \n against {}, {}"}  # format should have (metric, ds, ref, other ds)
+# format should have (metric, ds, ref, other ds)
+_variable_pretty_name = {
+    0: "{}", 2: "{} of {} \n with {} as reference",
+    3: "{} of {} \n against {}, {}"
+}
 
 _colormaps = {  # from /qa4sm/validator/validation/graphics.py
     'R': _cclasses['div_better'],
@@ -179,17 +195,17 @@
 
 # units for all datasets
 _metric_units = {  # from /qa4sm/validator/validation/graphics.py
-    'ISMN': r'm^3 m^{-3}',
-    'C3S': r'm^3 m^{-3}',
-    'GLDAS': r'm^3 m^{-3}',
+    'ISMN': r'm³/m³',
+    'C3S': r'm³/m³',
+    'GLDAS': r'm³/m³',
     'ASCAT': r'percentage of saturation',
-    'SMAP': r'm^3 m^{-3}',
-    'ERA5': r'm^3 m^{-3}',
-    'ERA5_LAND': r'm^3 m^{-3}',
+    'SMAP': r'm³/m³',
+    'ERA5': r'm³/m³',
+    'ERA5_LAND': r'm³/m³',
     'ESA_CCI_SM_active': r'percentage of saturation',
-    'ESA_CCI_SM_combined': r'm^3 m^{-3}',
-    'ESA_CCI_SM_passive': r'm^3 m^{-3}',
-    'SMOS': r'm^3 m^{-3}',
+    'ESA_CCI_SM_combined': r'm³/m³',
+    'ESA_CCI_SM_passive': r'm³/m³',
+    'SMOS': r'm³/m³',
 }
 
 # label name for all metrics

diff --git a/src/qa4sm_reader/handlers.py b/src/qa4sm_reader/handlers.py
@@ -105,12 +105,12 @@ def _dc_names(self, dc:int) -> dict:
     @property
     def ref_id(self):
         """Id of the reference dataset as in the variable names"""
-        return self._ref_dc() - self._offset_id_dc
+        return self._ref_dc() - self.offset
 
     @property
     def others_id(self):
         """Id of the other datasets as in the variable names"""
-        return [dc - self._offset_id_dc for dc in self._dcs().keys()]
+        return [dc - self.offset for dc in self._dcs().keys()]
 
     def _id2dc(self, id:int) -> int:
         """
@@ -217,7 +217,9 @@ def __init__(self, varname, global_attrs, values=None):
         if self.g:
             self.Metric = QA4SMMetric(self.metric)
             self.ref_ds, self.metric_ds, self.other_ds = self.get_varmeta()
-            self.pretty_name = self._pretty_name()
+            # if this is a CI variable, get whether it's the upper or lower bound
+            if self.is_CI:
+                self.bound = self.parts["bound"]
 
     @property
     def isempty(self) -> bool:
@@ -237,19 +239,32 @@ def id(self):
             else:
                 return self.ref_ds[0]
 
-    def _pretty_name(self):
+    @property
+    def is_CI(self):
+        """True if the Variable is the confidence interval of a metric"""
+        if self.g:
+            return "bound" in self.parts.keys()
+        else:
+            return False
+
+    @property
+    def pretty_name(self):
         """Create a nice name for the variable"""
-        name = globals._variable_pretty_name[self.g]
+        template = globals._variable_pretty_name[self.g]
 
         if self.g == 0:
-            return name.format(self.metric)
+            name = template.format(self.metric)
 
         elif self.g == 2:
-            return name.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'],
+            name = template.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'],
                                self.ref_ds[1]['pretty_title'])
         elif self.g == 3:
-            return name.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'],
+            name = template.format(self.Metric.pretty_name, self.metric_ds[1]['pretty_title'],
                                self.ref_ds[1]['pretty_title'], self.other_ds[1]['pretty_title'])
+        if self.is_CI:
+            name = "Confidence Interval of " + name
+
+        return name
 
     def _parse_varname(self) -> (str, int, dict):
         """
@@ -276,6 +291,12 @@ def _parse_varname(self) -> (str, int, dict):
 
             if parts is not None and parts['metric'] in globals.metric_groups[g]:
                 return parts['metric'], g, parts.named
+            # perhaps it's a CI variable
+            else:
+                pattern = '{}{}'.format(globals.var_name_CI[g], template)
+                parts = parse(pattern, self.varname)
+                if parts is not None and parts['metric'] in globals.metric_groups[g]:
+                    return parts['metric'], g, parts.named
 
         return None, None, None
 
@@ -348,3 +369,13 @@ def _get_attribute(self, attr:str):
 
         return value
 
+    @property
+    def has_CIs(self):
+        """Boolean property for metrics with or without confidence intervals"""
+        it_does = False
+        for n, Var in enumerate(self.variables):
+            if Var.is_CI():
+                it_does = True
+                break
+
+        return  it_does
diff --git a/src/qa4sm_reader/img.py b/src/qa4sm_reader/img.py
@@ -98,6 +98,16 @@ def _open_ds(self, extent=None, period=None):
         else:
             return ds
 
+    @property
+    def has_CIs(self):
+        """True if the validation result contains confidence intervals"""
+        cis = False
+        # check if there is any CI Var
+        for Var in self._iter_vars():
+            if Var.is_CI:
+                cis = True
+        return cis
+
     def create_image_name(self) -> str:
         """Create a unique name for the QA4SMImage from the netCDF file"""
         ref = self.datasets.ref['pretty_title']
@@ -202,8 +212,13 @@ def _iter_vars(self, only_metrics=False, **filter_parms) -> iter:
                     continue
             if filter_parms:
                 for key, val in filter_parms.items():
-                    if getattr(Var, key) == val:
-                        yield Var
+                    if getattr(Var, key) == val:  # check all attribute individually
+                        check = True
+                    else:
+                        check = False  # does not match requirements
+                        break
+                if check == True:
+                    yield Var
             else:
                 yield Var
 
@@ -379,7 +394,7 @@ def _metric_stats(self, metric, id=None)  -> list:
                     globals._metric_units_HTML[ds_name['short_name']]), Var.g])
             # put the separate variable statistics in the same list
             metric_stats.append(var_stats)
-        
+
         return metric_stats
 
     def stats_df(self) -> pd.DataFrame: