Skip to content

Commit

Permalink
add external predictions
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianLeRoyKili committed Jun 15, 2022
1 parent 71cfe64 commit 1163c0f
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 30 deletions.
28 changes: 14 additions & 14 deletions kiliautoml/utils/mapper/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def __init__(
else:
raise NotImplementedError

def create_mapper(self):
def create_mapper(self, graph_name: str):
# Compute embeddings
kili_print("Computing embeddings")
embeddings = self._get_embeddings()
Expand Down Expand Up @@ -189,7 +189,7 @@ def create_mapper(self):
topic_list = ["topic_" + str(i) for i in range(10)]
self.lens_names = self.lens_names + topic_list

temp = gudhi_to_KM(Mapper_kili)
temp = gudhi_to_KM(Mapper_kili, self.cat2id)
mapper = km.KeplerMapper(verbose=2)
_ = mapper.visualize(
temp,
Expand All @@ -198,7 +198,7 @@ def create_mapper(self):
custom_tooltips=tooltip_s,
color_values=self.lens,
color_function_name=self.lens_names,
title="Mapper_" + self.job_name,
title=graph_name,
path_html="Mapper_" + self.job_name + ".html",
)
return Mapper_kili
Expand Down Expand Up @@ -236,20 +236,20 @@ def _get_assignments_and_lens_with_labels(self):
# Create lens for statistic displayed in Mapper
self.lens = np.column_stack(
(
prediction_true_class,
label_id_array,
np.max(self.predictions, axis=1),
prediction_true_class,
predicted_class,
np.max(self.predictions, axis=1),
predicted_class == label_id_array,
)
)

self.lens_names = [
"confidence_C",
"correct_class",
"confidence_PC",
"predicted_class",
"prediction_error",
"Correct class (CC)",
"Probability CC",
"Predicted class (PC)",
"Probability PC",
"Accuracy",
]

def _get_assignments_and_lens_without_labels(self):
Expand All @@ -260,12 +260,12 @@ def _get_assignments_and_lens_without_labels(self):

# Create lens for statistic displayed in Mapper
self.lens = np.column_stack(
(np.max(self.predictions, axis=1), predicted_class, predicted_order[:, -2])
(predicted_class, np.max(self.predictions, axis=1), predicted_order[:, -2])
)
self.lens_names = [
"confidence_PC",
"predicted_class",
"alt_predicted_class",
"Predicted class (PC)",
"Probability PC",
"Alternate PC",
]

def _get_custom_tooltip(self):
Expand Down
10 changes: 5 additions & 5 deletions kiliautoml/utils/mapper/gudhi_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ def display_pic_from_mapper_node(
plt.show()


def gudhi_to_KM(mapper_cover_complex):
def gudhi_to_KM(mapper_cover_complex, cat2id):
"""Convert mapper_cover_complex from Gudhi_mapper to a Kepler Mapper ready to use for visualization
Args:
mapper_cover_complex (dict): Cover complex computed with gudhi_mapper
Expand All @@ -681,12 +681,12 @@ def gudhi_to_KM(mapper_cover_complex):

# extract metadata
out["meta_data"] = {
"filter": "Model predictions",
"cover": "Confusion cover",
"input space": "embeddings (computed with Img2Vec efficient_b7 model",
"clustering": mapper_cover_complex.get_params()["clustering"],
"resolutions": mapper_cover_complex.get_params()["resolutions"],
"gains": mapper_cover_complex.get_params()["gains"],
"cover_name": mapper_cover_complex.get_params()["cover_name"],
"input_name": mapper_cover_complex.get_params()["input_name"],
}
out["meta_data"].update(cat2id)

# extract edges / links
out["links"] = defaultdict(list)
Expand Down
54 changes: 43 additions & 11 deletions mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@
from kili.client import Kili
from tabulate import tabulate

from commands.common_args import (
LabelErrorOptions,
Options,
PredictOptions,
TrainOptions,
)
from commands.common_args import Options, PredictOptions, TrainOptions
from commands.predict import predict_one_job
from kiliautoml.models import PyTorchVisionImageClassificationModel
from kiliautoml.utils.constants import ModelFrameworkT, ModelNameT, ModelRepositoryT
Expand Down Expand Up @@ -41,7 +36,6 @@
help="Asset repository (eg. /content/assets/)",
)
@click.option("--predictions-path", required=True, default=None, help="csv file with predictions")
@LabelErrorOptions.cv_folds
@click.option(
"--focus-class",
default=None,
Expand All @@ -50,6 +44,11 @@
)
@PredictOptions.from_model
@PredictOptions.from_project
@click.option(
"--graph-name",
default="Mapper",
help="Name to de displayed in the KMapper html page",
)
def main(
api_endpoint: str,
api_key: str,
Expand All @@ -66,10 +65,10 @@ def main(
predictions_path: Optional[str],
batch_size: int,
epochs: int,
cv_folds: int,
focus_class: Optional[List[str]],
from_model: Optional[ModelFrameworkT],
from_project: Optional[str],
graph_name: str,
):
"""
Main method for creating mapper
Expand Down Expand Up @@ -131,7 +130,7 @@ def main(
verbose=4,
)

training_losses = [job_name, training_loss]
training_losses = [[job_name, training_loss]]
print(tabulate(training_losses, headers=["job_name", "training_loss"]))

job_predictions = predict_one_job(
Expand All @@ -157,7 +156,40 @@ def main(

predictions = job_predictions.predictions_probability
else:
predictions = list(pd.read_csv(predictions_path))
with open("/content/predictions.csv", "r") as csv:
first_line = csv.readline()
next_lines = csv.readlines()
ncol = first_line.count(",") + 1
nrows = len(next_lines) + 1

if ncol == len(job["content"]["categories"]):
index_col = None
elif ncol == len(job["content"]["categories"]):
index_col = None
else:
raise ValueError(
"Number of column in predictions should be either "
"the number of category of the number of category + 1 for the external id"
)

if nrows == len(assets):
header = None
elif ncol == len(assets) + 1:
header = 0
else:
raise ValueError(
"Number of rows in predictions should be either "
"the number of assets of the number of assets + 1 for the header"
)

predictions_df = pd.read_csv(predictions_path, index_col=index_col, header=header)

if index_col is None:
predictions = list(predictions_df.to_numpy())
else:
predictions = []
for asset in assets:
predictions.append(predictions_df.loc[asset["externalId"]].to_numpy())

mapper_image_classification = MapperClassification(
api_key=api_key,
Expand All @@ -171,7 +203,7 @@ def main(
focus_class=focus_class,
)

_ = mapper_image_classification.create_mapper(cv_folds)
_ = mapper_image_classification.create_mapper(graph_name)

else:
raise NotImplementedError
Expand Down

0 comments on commit 1163c0f

Please sign in to comment.