Skip to content

Commit

Permalink
Add tagging for notebooks (openvinotoolkit#550)
Browse files Browse the repository at this point in the history
* Add tagging

* Apply suggestions from code review

Co-authored-by: Adrian Boguszewski <[email protected]>

* Add missing apostrophes

Co-authored-by: Adrian Boguszewski <[email protected]>
  • Loading branch information
Jakub Debski and adrianboguszewski authored May 11, 2022
1 parent b716ee7 commit 42ee0a3
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .ci/keywords.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"tags": {
"GPU": ["device_name = \"GPU\"", "device_name=\"GPU\""],
"Auto device": ["device_name=\"AUTO\"", "device_name = \"AUTO\""],

"Dynamic Shape": [".partial_shape", "Dimension("],
"Reshape Model": ["model.reshape("],
"Async Inference": [".start_async("],

"Download Model": ["omz_downloader"],
"Convert Model": ["omz_converter"],
"Optimize Model": ["import openvino.tools.mo", "from openvino.tools.mo", "!mo "],
"Benchmark Model": ["benchmark_app"],
"OMZ Info Dumper": ["omz_info_dumper"],

"Paddle": ["import paddle", "from paddle"],
"Torchvision": ["import torchvision", "from torchvision"],
"Compression": ["import compression", "from compression"],
"Pytorch": ["import torch", "from torch"],
"NNCF": ["import nncf", "from nncf"],
"Transformers": ["import transformers", "from transformers"],
"Tensorflow": ["import tensorflow", "from tensorflow"],

"ONNX": [".onnx"],
"Train Model": ["model.fit(", "model.train()"]
}
}
35 changes: 35 additions & 0 deletions .ci/tagger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import json
import glob
import mmap


def get_notebooks(path: str):
return glob.glob(f"{path}/*/[0-9]*.ipynb")

def get_tags(path: str):
return json.load(open(path))

def find_tags_for_notebook(notebook_path: str, tags: dict):
nb_tags = []
with open(notebook_path) as file:
f = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
for tag, keywords in tags.items():
for keyword in keywords:
if f.find(bytes(keyword, 'utf-8')) != -1:
nb_tags.append(tag)
break
return nb_tags

def find_tags_for_all_notebooks(notebooks: list, tags: dict):
notebooks_tags = {}
for notebook in notebooks:
nb_tags = sorted(find_tags_for_notebook(notebook, tags))
if nb_tags:
notebooks_tags[notebook.split('/')[-1].split('.')[0]] = nb_tags
return notebooks_tags

notebooks_paths = sorted(get_notebooks("notebooks"))
tags = get_tags(".ci/keywords.json")['tags']
all_notebooks_tags = find_tags_for_all_notebooks(notebooks_paths, tags)

print(json.dumps(all_notebooks_tags, indent=4))
38 changes: 38 additions & 0 deletions .github/workflows/generate_tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generate tags for each notebook

name: Generate tags
on:
workflow_dispatch:
pull_request:
branches:
- 'main'
paths:
- 'notebooks/**.ipynb'
- '.ci/keywords.json'
- '.ci/tagger.py'

jobs:
build_codecheck:
strategy:
fail-fast: false
runs-on: ubuntu-20.04 # change cachepip step when changing this
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Dotenv Action
id: dotenv
uses: xom9ikk/[email protected]
with:
path: ./.github/workflows
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: Run tagger and store results in file
run: |
python .ci/tagger.py > notebook-tags-${{ github.sha }}.json
- name: Archive notebook tags
uses: actions/upload-artifact@v2
with:
name: notebook-tags
path: notebook-tags-${{ github.sha }}.json

0 comments on commit 42ee0a3

Please sign in to comment.