add external predictions

kili-technology · Jun 15, 2022 · 1163c0f · 1163c0f
1 parent 71cfe64
commit 1163c0f
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 30 deletions.
diff --git a/kiliautoml/utils/mapper/create.py b/kiliautoml/utils/mapper/create.py
@@ -128,7 +128,7 @@ def __init__(
         else:
             raise NotImplementedError
 
-    def create_mapper(self):
+    def create_mapper(self, graph_name: str):
         # Compute embeddings
         kili_print("Computing embeddings")
         embeddings = self._get_embeddings()
@@ -189,7 +189,7 @@ def create_mapper(self):
             topic_list = ["topic_" + str(i) for i in range(10)]
             self.lens_names = self.lens_names + topic_list
 
-        temp = gudhi_to_KM(Mapper_kili)
+        temp = gudhi_to_KM(Mapper_kili, self.cat2id)
         mapper = km.KeplerMapper(verbose=2)
         _ = mapper.visualize(
             temp,
@@ -198,7 +198,7 @@ def create_mapper(self):
             custom_tooltips=tooltip_s,
             color_values=self.lens,
             color_function_name=self.lens_names,
-            title="Mapper_" + self.job_name,
+            title=graph_name,
             path_html="Mapper_" + self.job_name + ".html",
         )
         return Mapper_kili
@@ -236,20 +236,20 @@ def _get_assignments_and_lens_with_labels(self):
         # Create lens for statistic displayed in Mapper
         self.lens = np.column_stack(
             (
-                prediction_true_class,
                 label_id_array,
-                np.max(self.predictions, axis=1),
+                prediction_true_class,
                 predicted_class,
+                np.max(self.predictions, axis=1),
                 predicted_class == label_id_array,
             )
         )
 
         self.lens_names = [
-            "confidence_C",
-            "correct_class",
-            "confidence_PC",
-            "predicted_class",
-            "prediction_error",
+            "Correct class (CC)",
+            "Probability CC",
+            "Predicted class (PC)",
+            "Probability PC",
+            "Accuracy",
         ]
 
     def _get_assignments_and_lens_without_labels(self):
@@ -260,12 +260,12 @@ def _get_assignments_and_lens_without_labels(self):
 
         # Create lens for statistic displayed in Mapper
         self.lens = np.column_stack(
-            (np.max(self.predictions, axis=1), predicted_class, predicted_order[:, -2])
+            (predicted_class, np.max(self.predictions, axis=1), predicted_order[:, -2])
         )
         self.lens_names = [
-            "confidence_PC",
-            "predicted_class",
-            "alt_predicted_class",
+            "Predicted class (PC)",
+            "Probability PC",
+            "Alternate PC",
         ]
 
     def _get_custom_tooltip(self):

diff --git a/kiliautoml/utils/mapper/gudhi_mapper.py b/kiliautoml/utils/mapper/gudhi_mapper.py
@@ -670,7 +670,7 @@ def display_pic_from_mapper_node(
         plt.show()
 
 
-def gudhi_to_KM(mapper_cover_complex):
+def gudhi_to_KM(mapper_cover_complex, cat2id):
     """Convert mapper_cover_complex from Gudhi_mapper to a Kepler Mapper ready to use for visualization
     Args:
         mapper_cover_complex (dict): Cover complex computed with gudhi_mapper
@@ -681,12 +681,12 @@ def gudhi_to_KM(mapper_cover_complex):
 
     # extract metadata
     out["meta_data"] = {
+        "filter": "Model predictions",
+        "cover": "Confusion cover",
+        "input space": "embeddings (computed with Img2Vec efficient_b7 model",
         "clustering": mapper_cover_complex.get_params()["clustering"],
-        "resolutions": mapper_cover_complex.get_params()["resolutions"],
-        "gains": mapper_cover_complex.get_params()["gains"],
-        "cover_name": mapper_cover_complex.get_params()["cover_name"],
-        "input_name": mapper_cover_complex.get_params()["input_name"],
     }
+    out["meta_data"].update(cat2id)
 
     # extract edges / links
     out["links"] = defaultdict(list)

diff --git a/mapper.py b/mapper.py
@@ -6,12 +6,7 @@
 from kili.client import Kili
 from tabulate import tabulate
 
-from commands.common_args import (
-    LabelErrorOptions,
-    Options,
-    PredictOptions,
-    TrainOptions,
-)
+from commands.common_args import Options, PredictOptions, TrainOptions
 from commands.predict import predict_one_job
 from kiliautoml.models import PyTorchVisionImageClassificationModel
 from kiliautoml.utils.constants import ModelFrameworkT, ModelNameT, ModelRepositoryT
@@ -41,7 +36,6 @@
     help="Asset repository (eg. /content/assets/)",
 )
 @click.option("--predictions-path", required=True, default=None, help="csv file with predictions")
-@LabelErrorOptions.cv_folds
 @click.option(
     "--focus-class",
     default=None,
@@ -50,6 +44,11 @@
 )
 @PredictOptions.from_model
 @PredictOptions.from_project
+@click.option(
+    "--graph-name",
+    default="Mapper",
+    help="Name to de displayed in the KMapper html page",
+)
 def main(
     api_endpoint: str,
     api_key: str,
@@ -66,10 +65,10 @@ def main(
     predictions_path: Optional[str],
     batch_size: int,
     epochs: int,
-    cv_folds: int,
     focus_class: Optional[List[str]],
     from_model: Optional[ModelFrameworkT],
     from_project: Optional[str],
+    graph_name: str,
 ):
     """
     Main method for creating mapper
@@ -131,7 +130,7 @@ def main(
                     verbose=4,
                 )
 
-                training_losses = [job_name, training_loss]
+                training_losses = [[job_name, training_loss]]
                 print(tabulate(training_losses, headers=["job_name", "training_loss"]))
 
                 job_predictions = predict_one_job(
@@ -157,7 +156,40 @@ def main(
 
                 predictions = job_predictions.predictions_probability
             else:
-                predictions = list(pd.read_csv(predictions_path))
+                with open("/content/predictions.csv", "r") as csv:
+                    first_line = csv.readline()
+                    next_lines = csv.readlines()
+                    ncol = first_line.count(",") + 1
+                    nrows = len(next_lines) + 1
+
+                if ncol == len(job["content"]["categories"]):
+                    index_col = None
+                elif ncol == len(job["content"]["categories"]):
+                    index_col = None
+                else:
+                    raise ValueError(
+                        "Number of column in predictions should be either "
+                        "the number of category of the number of category + 1 for the external id"
+                    )
+
+                if nrows == len(assets):
+                    header = None
+                elif ncol == len(assets) + 1:
+                    header = 0
+                else:
+                    raise ValueError(
+                        "Number of rows in predictions should be either "
+                        "the number of assets of the number of assets + 1 for the header"
+                    )
+
+                predictions_df = pd.read_csv(predictions_path, index_col=index_col, header=header)
+
+                if index_col is None:
+                    predictions = list(predictions_df.to_numpy())
+                else:
+                    predictions = []
+                    for asset in assets:
+                        predictions.append(predictions_df.loc[asset["externalId"]].to_numpy())
 
             mapper_image_classification = MapperClassification(
                 api_key=api_key,
@@ -171,7 +203,7 @@ def main(
                 focus_class=focus_class,
             )
 
-            _ = mapper_image_classification.create_mapper(cv_folds)
+            _ = mapper_image_classification.create_mapper(graph_name)
 
         else:
             raise NotImplementedError