diff --git a/kiliautoml/utils/mapper/create.py b/kiliautoml/utils/mapper/create.py index 7adad842..f3fd683e 100644 --- a/kiliautoml/utils/mapper/create.py +++ b/kiliautoml/utils/mapper/create.py @@ -128,7 +128,7 @@ def __init__( else: raise NotImplementedError - def create_mapper(self): + def create_mapper(self, graph_name: str): # Compute embeddings kili_print("Computing embeddings") embeddings = self._get_embeddings() @@ -189,7 +189,7 @@ def create_mapper(self): topic_list = ["topic_" + str(i) for i in range(10)] self.lens_names = self.lens_names + topic_list - temp = gudhi_to_KM(Mapper_kili) + temp = gudhi_to_KM(Mapper_kili, self.cat2id) mapper = km.KeplerMapper(verbose=2) _ = mapper.visualize( temp, @@ -198,7 +198,7 @@ def create_mapper(self): custom_tooltips=tooltip_s, color_values=self.lens, color_function_name=self.lens_names, - title="Mapper_" + self.job_name, + title=graph_name, path_html="Mapper_" + self.job_name + ".html", ) return Mapper_kili @@ -236,20 +236,20 @@ def _get_assignments_and_lens_with_labels(self): # Create lens for statistic displayed in Mapper self.lens = np.column_stack( ( - prediction_true_class, label_id_array, - np.max(self.predictions, axis=1), + prediction_true_class, predicted_class, + np.max(self.predictions, axis=1), predicted_class == label_id_array, ) ) self.lens_names = [ - "confidence_C", - "correct_class", - "confidence_PC", - "predicted_class", - "prediction_error", + "Correct class (CC)", + "Probability CC", + "Predicted class (PC)", + "Probability PC", + "Accuracy", ] def _get_assignments_and_lens_without_labels(self): @@ -260,12 +260,12 @@ def _get_assignments_and_lens_without_labels(self): # Create lens for statistic displayed in Mapper self.lens = np.column_stack( - (np.max(self.predictions, axis=1), predicted_class, predicted_order[:, -2]) + (predicted_class, np.max(self.predictions, axis=1), predicted_order[:, -2]) ) self.lens_names = [ - "confidence_PC", - "predicted_class", - "alt_predicted_class", + "Predicted class (PC)", + "Probability PC", + "Alternate PC", ] def _get_custom_tooltip(self): diff --git a/kiliautoml/utils/mapper/gudhi_mapper.py b/kiliautoml/utils/mapper/gudhi_mapper.py index fe4e3d2f..d90ba2f9 100644 --- a/kiliautoml/utils/mapper/gudhi_mapper.py +++ b/kiliautoml/utils/mapper/gudhi_mapper.py @@ -670,7 +670,7 @@ def display_pic_from_mapper_node( plt.show() -def gudhi_to_KM(mapper_cover_complex): +def gudhi_to_KM(mapper_cover_complex, cat2id): """Convert mapper_cover_complex from Gudhi_mapper to a Kepler Mapper ready to use for visualization Args: mapper_cover_complex (dict): Cover complex computed with gudhi_mapper @@ -681,12 +681,12 @@ def gudhi_to_KM(mapper_cover_complex): # extract metadata out["meta_data"] = { + "filter": "Model predictions", + "cover": "Confusion cover", + "input space": "embeddings (computed with Img2Vec efficient_b7 model", "clustering": mapper_cover_complex.get_params()["clustering"], - "resolutions": mapper_cover_complex.get_params()["resolutions"], - "gains": mapper_cover_complex.get_params()["gains"], - "cover_name": mapper_cover_complex.get_params()["cover_name"], - "input_name": mapper_cover_complex.get_params()["input_name"], } + out["meta_data"].update(cat2id) # extract edges / links out["links"] = defaultdict(list) diff --git a/mapper.py b/mapper.py index 0cc6992f..767f7a81 100644 --- a/mapper.py +++ b/mapper.py @@ -6,12 +6,7 @@ from kili.client import Kili from tabulate import tabulate -from commands.common_args import ( - LabelErrorOptions, - Options, - PredictOptions, - TrainOptions, -) +from commands.common_args import Options, PredictOptions, TrainOptions from commands.predict import predict_one_job from kiliautoml.models import PyTorchVisionImageClassificationModel from kiliautoml.utils.constants import ModelFrameworkT, ModelNameT, ModelRepositoryT @@ -41,7 +36,6 @@ help="Asset repository (eg. /content/assets/)", ) @click.option("--predictions-path", required=True, default=None, help="csv file with predictions") -@LabelErrorOptions.cv_folds @click.option( "--focus-class", default=None, @@ -50,6 +44,11 @@ ) @PredictOptions.from_model @PredictOptions.from_project +@click.option( + "--graph-name", + default="Mapper", + help="Name to de displayed in the KMapper html page", +) def main( api_endpoint: str, api_key: str, @@ -66,10 +65,10 @@ def main( predictions_path: Optional[str], batch_size: int, epochs: int, - cv_folds: int, focus_class: Optional[List[str]], from_model: Optional[ModelFrameworkT], from_project: Optional[str], + graph_name: str, ): """ Main method for creating mapper @@ -131,7 +130,7 @@ def main( verbose=4, ) - training_losses = [job_name, training_loss] + training_losses = [[job_name, training_loss]] print(tabulate(training_losses, headers=["job_name", "training_loss"])) job_predictions = predict_one_job( @@ -157,7 +156,40 @@ def main( predictions = job_predictions.predictions_probability else: - predictions = list(pd.read_csv(predictions_path)) + with open("/content/predictions.csv", "r") as csv: + first_line = csv.readline() + next_lines = csv.readlines() + ncol = first_line.count(",") + 1 + nrows = len(next_lines) + 1 + + if ncol == len(job["content"]["categories"]): + index_col = None + elif ncol == len(job["content"]["categories"]): + index_col = None + else: + raise ValueError( + "Number of column in predictions should be either " + "the number of category of the number of category + 1 for the external id" + ) + + if nrows == len(assets): + header = None + elif ncol == len(assets) + 1: + header = 0 + else: + raise ValueError( + "Number of rows in predictions should be either " + "the number of assets of the number of assets + 1 for the header" + ) + + predictions_df = pd.read_csv(predictions_path, index_col=index_col, header=header) + + if index_col is None: + predictions = list(predictions_df.to_numpy()) + else: + predictions = [] + for asset in assets: + predictions.append(predictions_df.loc[asset["externalId"]].to_numpy()) mapper_image_classification = MapperClassification( api_key=api_key, @@ -171,7 +203,7 @@ def main( focus_class=focus_class, ) - _ = mapper_image_classification.create_mapper(cv_folds) + _ = mapper_image_classification.create_mapper(graph_name) else: raise NotImplementedError