diff --git a/.ci/keywords.json b/.ci/keywords.json new file mode 100644 index 00000000000..77ebfa3b4aa --- /dev/null +++ b/.ci/keywords.json @@ -0,0 +1,27 @@ +{ + "tags": { + "GPU": ["device_name = \"GPU\"", "device_name=\"GPU\""], + "Auto device": ["device_name=\"AUTO\"", "device_name = \"AUTO\""], + + "Dynamic Shape": [".partial_shape", "Dimension("], + "Reshape Model": ["model.reshape("], + "Async Inference": [".start_async("], + + "Download Model": ["omz_downloader"], + "Convert Model": ["omz_converter"], + "Optimize Model": ["import openvino.tools.mo", "from openvino.tools.mo", "!mo "], + "Benchmark Model": ["benchmark_app"], + "OMZ Info Dumper": ["omz_info_dumper"], + + "Paddle": ["import paddle", "from paddle"], + "Torchvision": ["import torchvision", "from torchvision"], + "Compression": ["import compression", "from compression"], + "Pytorch": ["import torch", "from torch"], + "NNCF": ["import nncf", "from nncf"], + "Transformers": ["import transformers", "from transformers"], + "Tensorflow": ["import tensorflow", "from tensorflow"], + + "ONNX": [".onnx"], + "Train Model": ["model.fit(", "model.train()"] + } +} \ No newline at end of file diff --git a/.ci/tagger.py b/.ci/tagger.py new file mode 100644 index 00000000000..acf5084de43 --- /dev/null +++ b/.ci/tagger.py @@ -0,0 +1,35 @@ +import json +import glob +import mmap + + +def get_notebooks(path: str): + return glob.glob(f"{path}/*/[0-9]*.ipynb") + +def get_tags(path: str): + return json.load(open(path)) + +def find_tags_for_notebook(notebook_path: str, tags: dict): + nb_tags = [] + with open(notebook_path) as file: + f = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) + for tag, keywords in tags.items(): + for keyword in keywords: + if f.find(bytes(keyword, 'utf-8')) != -1: + nb_tags.append(tag) + break + return nb_tags + +def find_tags_for_all_notebooks(notebooks: list, tags: dict): + notebooks_tags = {} + for notebook in notebooks: + nb_tags = sorted(find_tags_for_notebook(notebook, tags)) + if nb_tags: + notebooks_tags[notebook.split('/')[-1].split('.')[0]] = nb_tags + return notebooks_tags + +notebooks_paths = sorted(get_notebooks("notebooks")) +tags = get_tags(".ci/keywords.json")['tags'] +all_notebooks_tags = find_tags_for_all_notebooks(notebooks_paths, tags) + +print(json.dumps(all_notebooks_tags, indent=4)) diff --git a/.github/workflows/generate_tags.yml b/.github/workflows/generate_tags.yml new file mode 100644 index 00000000000..b9982258e75 --- /dev/null +++ b/.github/workflows/generate_tags.yml @@ -0,0 +1,38 @@ +# Generate tags for each notebook + +name: Generate tags +on: + workflow_dispatch: + pull_request: + branches: + - 'main' + paths: + - 'notebooks/**.ipynb' + - '.ci/keywords.json' + - '.ci/tagger.py' + +jobs: + build_codecheck: + strategy: + fail-fast: false + runs-on: ubuntu-20.04 # change cachepip step when changing this + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Dotenv Action + id: dotenv + uses: xom9ikk/dotenv@v1.0.2 + with: + path: ./.github/workflows + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Run tagger and store results in file + run: | + python .ci/tagger.py > notebook-tags-${{ github.sha }}.json + - name: Archive notebook tags + uses: actions/upload-artifact@v2 + with: + name: notebook-tags + path: notebook-tags-${{ github.sha }}.json \ No newline at end of file