From d04c96320cf93d9385fe652d06153f254a5f74b4 Mon Sep 17 00:00:00 2001 From: Florian_Le_Roy Date: Fri, 22 Jul 2022 12:03:00 +0200 Subject: [PATCH] rebase Mapper --- mapper.py => commands/mapper.py | 31 ++-- .../utils/mapper/Tuto_Mapper_AutoML.ipynb | 144 ------------------ kiliautoml/utils/mapper/create.py | 3 +- main.py | 3 +- notebooks/Tuto_Mapper.ipynb | 2 +- 5 files changed, 25 insertions(+), 158 deletions(-) rename mapper.py => commands/mapper.py (92%) delete mode 100644 kiliautoml/utils/mapper/Tuto_Mapper_AutoML.ipynb diff --git a/mapper.py b/commands/mapper.py similarity index 92% rename from mapper.py rename to commands/mapper.py index 8bbc5e7f..1fd0c5fc 100644 --- a/mapper.py +++ b/commands/mapper.py @@ -10,10 +10,22 @@ from commands.common_args import Options, PredictOptions, TrainOptions from commands.predict import predict_one_job from kiliautoml.models import PyTorchVisionImageClassificationModel -from kiliautoml.utils.constants import ModelFrameworkT, ModelNameT, ModelRepositoryT -from kiliautoml.utils.helpers import get_assets, get_label, get_project, kili_print +from kiliautoml.utils.helpers import ( + _get_label, + get_assets, + get_content_input_from_job, + get_project, + kili_print, +) from kiliautoml.utils.mapper.create import MapperClassification -from kiliautoml.utils.type import AssetStatusT, LabelMergeStrategyT +from kiliautoml.utils.type import ( + AssetStatusT, + LabelMergeStrategyT, + ModelFrameworkT, + ModelNameT, + ModelRepositoryT, + ProjectIdT, +) @click.command() @@ -53,7 +65,7 @@ def main( api_endpoint: str, api_key: str, - project_id: str, + project_id: ProjectIdT, clear_dataset_cache: bool, target_job: List[str], model_framework: ModelFrameworkT, @@ -68,7 +80,7 @@ def main( epochs: int, focus_class: Optional[List[str]], from_model: Optional[ModelFrameworkT], - from_project: Optional[str], + from_project: Optional[ProjectIdT], graph_name: str, ): """ @@ -87,7 +99,7 @@ def main( kili_print(f"Create Mapper for job: {job_name}") - content_input = job.get("content", {}).get("input") + content_input = get_content_input_from_job(job) ml_task = job.get("mlTask") tools = job.get("tools") if content_input == "radio" and ml_task == "CLASSIFICATION" and input_type == "IMAGE": @@ -101,13 +113,13 @@ def main( labeled_assets = [] labels = [] for asset in assets: - label = get_label(asset, label_merge_strategy) + label = _get_label(asset, job_name, label_merge_strategy) if (label is None) or (job_name not in label["jsonResponse"]): asset_id = asset["id"] warnings.warn(f"${asset_id}: No annotation for job ${job_name}") else: labeled_assets.append(asset) - labels.append(label["jsonResponse"][job_name]["categories"][0]["name"]) + labels.append(asset.get_annotations_classification(job_name)) if predictions_path is None: @@ -122,7 +134,6 @@ def main( training_loss = image_classification_model.train( assets=labeled_assets, - label_merge_strategy=label_merge_strategy, batch_size=batch_size, epochs=epochs, clear_dataset_cache=clear_dataset_cache, @@ -155,7 +166,7 @@ def main( clear_dataset_cache=clear_dataset_cache, ) - predictions = job_predictions.predictions_probability + predictions = job_predictions.predictions_probability # type: ignore else: with open("/content/predictions.csv", "r") as csv: first_line = csv.readline() diff --git a/kiliautoml/utils/mapper/Tuto_Mapper_AutoML.ipynb b/kiliautoml/utils/mapper/Tuto_Mapper_AutoML.ipynb deleted file mode 100644 index 24af8d11..00000000 --- a/kiliautoml/utils/mapper/Tuto_Mapper_AutoML.ipynb +++ /dev/null @@ -1,144 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "L9_fNq8zqAH_" - }, - "source": [ - "# Create Mapper from Kili Dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aKejop6ok_ne" - }, - "source": [ - "## Import requirements" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "850KFigkX0v9", - "outputId": "e18c0bfe-d128-4110-ff8f-d899fe84b491" - }, - "outputs": [], - "source": [ - "!pip install img2vec_pytorch\n", - "!pip install kili\n", - "!pip install gudhi\n", - "!pip install kmapper\n", - "!pip install datasets\n", - "!pip install transformers\n", - "import os\n", - "import sys\n", - "from kili.client import Kili\n", - "\n", - "git_token = getpass('Github token: ')\n", - "os.environ['GITHUB_TOKEN'] = git_token\n", - "!git clone --branch feature/ml-345-mapper https://$GITHUB_TOKEN@github.com/kili-technology/automl.git\n", - "\n", - "kili_api_key = getpass('Kili API Key: ')\n", - "os.environ[\"KILI_API_KEY\"] = kili_api_key\n", - "api_key = os.environ[\"KILI_API_KEY\"]\n", - "\n", - "kili = Kili(api_key=api_key)\n", - "\n", - "%cd /content/automl" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AvCWOIzvXNtb" - }, - "source": [ - "## Create Mapper" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HJHIvtiAic0I" - }, - "outputs": [], - "source": [ - "# Image Classification - Roman Number - project id = cl2yugghg0iw10m0i8wh05o00\n", - "# Text Classification - Tweet Emotions - project_id = cl1uwuqvt5exk0mtha6lchugl" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IW53iJtMj0Bk" - }, - "outputs": [], - "source": [ - "\"\"\"\n", - "Mapper argument: \n", - "--api-endpoint, --api-key, --project-id , --target-job, --max-assets : \n", - "--assets-repository : Required, where to store downloaded assets\n", - "--asset-status-in: if None or TODO or ONGOING included, mapper.py will not use labels in the asset assignment\n", - "--cv-folds: Number of cv-folds used to compute predictions. Used only if not (None or TODO or ONGOING included in asset-status-in). \n", - "--focus-class: only use assets with labels or predictions inside focus-class\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zt4hKCPohk0t", - "outputId": "8788e04b-5819-4b9c-8ebe-536d4fdce967" - }, - "outputs": [], - "source": [ - "!python mapper.py --project-id cl2yugghg0iw10m0i8wh05o00 --assets-repository /content/assets --asset-status-in labeled,reviewed --focus-class ix,x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "XB1PvyRQaIIR", - "outputId": "27d1b412-afa7-475e-a2d7-85d48abe1b89" - }, - "outputs": [], - "source": [ - "!python mapper.py --project-id cl1uwuqvt5exk0mtha6lchugl --assets-repository /content/assets --asset-status-in labeled,reviewed" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "machine_shape": "hm", - "name": "Tuto_Mapper_AutoML.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/kiliautoml/utils/mapper/create.py b/kiliautoml/utils/mapper/create.py index e089c3f2..c4a8e9fc 100644 --- a/kiliautoml/utils/mapper/create.py +++ b/kiliautoml/utils/mapper/create.py @@ -13,7 +13,6 @@ from tqdm import tqdm from transformers import AutoModel, AutoTokenizer # type: ignore -from kiliautoml.utils.constants import InputTypeT from kiliautoml.utils.download_assets import ( download_project_images, download_project_text, @@ -29,7 +28,7 @@ gudhi_to_KM, topic_score, ) -from kiliautoml.utils.type import JobT +from kiliautoml.utils.type import InputTypeT, JobT def embeddings_text(list_text: List[str]): diff --git a/main.py b/main.py index a2fef2f1..0bd2d8b5 100644 --- a/main.py +++ b/main.py @@ -22,6 +22,7 @@ sys.excepthook = ultratb.FormattedTB(mode="Verbose", color_scheme="Linux", call_pdb=False) from commands.label_errors import main as label_errors +from commands.mapper import main as mapper from commands.predict import main as predict from commands.prioritize import main as prioritize from commands.train import main as train @@ -36,7 +37,7 @@ def kiliautoml(): kiliautoml.add_command(predict, name="predict") kiliautoml.add_command(label_errors, name="label_errors") kiliautoml.add_command(prioritize, name="prioritize") - +kiliautoml.add_command(mapper, name="mapper") if __name__ == "__main__": kiliautoml() diff --git a/notebooks/Tuto_Mapper.ipynb b/notebooks/Tuto_Mapper.ipynb index 78b43f82..41e44bf4 100644 --- a/notebooks/Tuto_Mapper.ipynb +++ b/notebooks/Tuto_Mapper.ipynb @@ -432,7 +432,7 @@ }, "outputs": [], "source": [ - "!python automl/mapper.py --project-id $project_id --assets-repository /content/assets --predictions-path /content/predictions.csv" + "!PYTHONPATH=$(pwd) kiliautoml --project-id $project_id --assets-repository /content/assets --predictions-path /content/predictions.csv" ] } ],