Skip to content

Commit

Permalink
Update formatting (#271)
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix authored Apr 6, 2023
1 parent c17b879 commit bd7ae23
Show file tree
Hide file tree
Showing 49 changed files with 343 additions and 341 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check_code_quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
run: |
source venv/bin/activate
black --check .
- name: Check style with isort
- name: Check style with ruff
run: |
source venv/bin/activate
isort --check .
ruff .
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
# Run code quality checks
style_check:
black --check .
isort --check .
ruff .

style:
black .
isort .
ruff . --fix

# Run tests for the library
test:
Expand Down
8 changes: 4 additions & 4 deletions docs/source/inference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ When fixing the shapes with the `reshape()` method, inference cannot be performe
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline
from evaluate import evaluator
from optimum.intel.openvino import OVModelForQuestionAnswering
from optimum.intel import OVModelForQuestionAnswering

model_id = "distilbert-base-cased-distilled-squad"
model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True)
Expand All @@ -92,7 +92,7 @@ metric = task_evaluator.compute(model_or_pipeline=qa_pipe, data=eval_dataset, me
By default the model will be compiled when instantiating our `OVModel`. In the case where the model is reshaped, placed to an other device or if FP16 precision is enabled, the model will need to be recompiled again, which will happen by default before the first inference (thus inflating the latency of the first inference). To avoid an unnecessary compilation, you can disable the first compilation by setting `compile=False`. The model should also be compiled before the first inference with `model.compile()`.

```python
from optimum.intel.openvino import OVModelForSequenceClassification
from optimum.intel import OVModelForSequenceClassification

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
# Load the model and disable the model compilation
Expand All @@ -111,7 +111,7 @@ Here is an example on how you can run inference for a translation task using an

```python
from transformers import AutoTokenizer, pipeline
from optimum.intel.openvino import OVModelForSeq2SeqLM
from optimum.intel import OVModelForSeq2SeqLM

model_id = "t5-small"
model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True)
Expand Down Expand Up @@ -147,7 +147,7 @@ pip install diffusers
Here is an example of how you can load an OpenVINO Stable Diffusion model and run inference using OpenVINO Runtime:

```python
from optimum.intel.openvino import OVStableDiffusionPipeline
from optimum.intel import OVStableDiffusionPipeline

model_id = "echarlaix/stable-diffusion-v1-5-openvino"
stable_diffusion = OVStableDiffusionPipeline.from_pretrained(model_id)
Expand Down
10 changes: 5 additions & 5 deletions docs/source/optimization_ov.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License.

# Optimization

🤗 Optimum Intel provides an `optimum.openvino` package that enables you to apply a variety of model compression methods such as quantization, pruning, on many models hosted on the 🤗 hub using the [NNCF](https://docs.openvino.ai/2022.1/docs_nncf_introduction.html) framework.
🤗 Optimum Intel provides an `openvino` package that enables you to apply a variety of model compression methods such as quantization, pruning, on many models hosted on the 🤗 hub using the [NNCF](https://docs.openvino.ai/2022.1/docs_nncf_introduction.html) framework.


## Post-training optimization
Expand All @@ -27,7 +27,7 @@ Here is how to apply static quantization on a fine-tuned DistilBERT:
```python
from functools import partial
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from optimum.intel.openvino import OVConfig, OVQuantizer
from optimum.intel import OVConfig, OVQuantizer

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
Expand Down Expand Up @@ -83,7 +83,7 @@ from transformers import (
)
from datasets import load_dataset
- from transformers import Trainer
+ from optimum.intel.openvino import OVConfig, OVTrainer, OVModelForSequenceClassification
+ from optimum.intel import OVConfig, OVTrainer, OVModelForSequenceClassification

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
Expand Down Expand Up @@ -168,7 +168,7 @@ Once we have the config ready, we can start develop the training pipeline like t

```diff
- from transformers import Trainer, TrainingArguments
+ from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
+ from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments

# Load teacher model
+ teacher_model = AutoModelForSequenceClassification.from_pretrained(teacher_model_or_path)
Expand Down Expand Up @@ -210,7 +210,7 @@ After applying quantization on our model, we can then easily load it with our `O

```python
from transformers import pipeline
from optimum.intel.openvino import OVModelForSequenceClassification
from optimum.intel import OVModelForSequenceClassification

model_id = "helenai/distilbert-base-uncased-finetuned-sst-2-english-ov-int8"
ov_model = OVModelForSequenceClassification.from_pretrained(model_id)
Expand Down
16 changes: 8 additions & 8 deletions examples/neural_compressor/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@
from typing import Optional

import datasets
import evaluate
import torch
import transformers
from datasets import load_dataset
from neural_compressor import (
DistillationConfig,
PostTrainingQuantConfig,
QuantizationAwareTrainingConfig,
WeightPruningConfig,
)
from transformers import (
CONFIG_MAPPING,
MODEL_FOR_CAUSAL_LM_MAPPING,
Expand All @@ -49,13 +56,6 @@
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

import evaluate
from neural_compressor import (
DistillationConfig,
PostTrainingQuantConfig,
QuantizationAwareTrainingConfig,
WeightPruningConfig,
)
from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer


Expand Down Expand Up @@ -441,7 +441,7 @@ def main():
)
else:
model = AutoModelForCausalLM.from_config(config)
n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")

# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
Expand Down
14 changes: 7 additions & 7 deletions examples/neural_compressor/language-modeling/run_mlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@
from typing import Optional

import datasets
import evaluate
import torch
import transformers
from datasets import load_dataset
from neural_compressor import (
DistillationConfig,
PostTrainingQuantConfig,
QuantizationAwareTrainingConfig,
WeightPruningConfig,
)
from transformers import (
CONFIG_MAPPING,
MODEL_FOR_MASKED_LM_MAPPING,
Expand All @@ -48,13 +55,6 @@
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

import evaluate
from neural_compressor import (
DistillationConfig,
PostTrainingQuantConfig,
QuantizationAwareTrainingConfig,
WeightPruningConfig,
)
from optimum.intel.neural_compressor import INCModelForMaskedLM, INCQuantizer, INCTrainer


Expand Down
19 changes: 9 additions & 10 deletions examples/neural_compressor/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@
import torch
import transformers
from datasets import load_dataset
from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig
from transformers import (
AutoConfig,
AutoModelForMultipleChoice,
AutoTokenizer,
HfArgumentParser,
PreTrainedModel,
TrainingArguments,
default_data_collator,
set_seed,
Expand All @@ -45,7 +45,6 @@
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig
from optimum.intel.neural_compressor import INCModelForMultipleChoice, INCTrainer


Expand Down Expand Up @@ -548,14 +547,14 @@ def compute_metrics(eval_predictions):
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

kwargs = dict(
finetuned_from=model_args.model_name_or_path,
tasks="multiple-choice",
dataset_tags="swag",
dataset_args="regular",
dataset="SWAG",
language="en",
)
kwargs = {
"finetuned_from": model_args.model_name_or_path,
"tasks": "multiple-choice",
"dataset_tags": "swag",
"dataset_args": "regular",
"dataset": "SWAG",
"language": "en",
}

if training_args.push_to_hub:
trainer.push_to_hub(**kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,18 @@
from typing import Optional, Union

import datasets
import numpy as np
import evaluate
import torch
import transformers
from accelerate import Accelerator
from datasets import load_dataset
from neural_compressor import PostTrainingQuantConfig
from torch.utils.data import DataLoader
from transformers import (
AutoConfig,
AutoModelForMultipleChoice,
AutoTokenizer,
HfArgumentParser,
PreTrainedModel,
TrainingArguments,
default_data_collator,
set_seed,
Expand All @@ -45,9 +46,6 @@
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

import evaluate
from accelerate import Accelerator
from neural_compressor import PostTrainingQuantConfig
from optimum.intel.neural_compressor import INCModelForMultipleChoice, INCQuantizer


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
from dataclasses import dataclass, field
from typing import Optional

import evaluate
import pandas as pd
import torch
import transformers
from neural_compressor import PostTrainingQuantConfig
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from transformers import HfArgumentParser, TrainingArguments, TrOCRProcessor, VisionEncoderDecoderModel
from transformers.utils import check_min_version

import evaluate
from neural_compressor import PostTrainingQuantConfig
from optimum.intel.neural_compressor import INCModelForVision2Seq, INCQuantizer


Expand Down Expand Up @@ -284,7 +284,7 @@ def eval_func(model, iters=None):
if result_loaded_model != result_optimized_model:
logger.error("The quantized model was not successfully loaded.")
else:
logger.info(f"The quantized model was successfully loaded.")
logger.info("The quantized model was successfully loaded.")


def _mp_fn(index):
Expand Down
42 changes: 34 additions & 8 deletions examples/neural_compressor/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,23 @@
from typing import Optional

import datasets
import evaluate
import numpy as np
import torch
import transformers
from accelerate import Accelerator
from datasets import load_dataset
from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig
from torch.utils.data.dataloader import DataLoader
from tqdm.auto import tqdm
from trainer_qa import QuestionAnsweringINCTrainer
from transformers import (
AutoConfig,
AutoModelForQuestionAnswering,
AutoTokenizer,
DataCollatorWithPadding,
EvalPrediction,
HfArgumentParser,
PreTrainedModel,
PreTrainedTokenizerFast,
TrainingArguments,
default_data_collator,
Expand All @@ -47,13 +50,9 @@
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
from transformers.utils.versions import require_version
from utils_qa import postprocess_qa_predictions

import evaluate
from accelerate import Accelerator
from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig
from optimum.intel.neural_compressor import INCModelForQuestionAnswering
from trainer_qa import QuestionAnsweringINCTrainer
from utils_qa import postprocess_qa_predictions


# Will be removed when neural-compressor next release is out
Expand Down Expand Up @@ -553,7 +552,10 @@ def move_input_to_device(input, device):
)
teacher_model_qa = QAModel(teacher_model)
teacher_model_qa = accelerator.prepare(teacher_model_qa)
num_param = lambda model: sum(p.numel() for p in model.parameters())

def num_param(model):
return sum(p.numel() for p in model.parameters())

logger.info(
"***** Number of teacher model parameters: {:.2f}M *****".format(num_param(teacher_model_qa) / 10**6)
)
Expand Down Expand Up @@ -662,9 +664,33 @@ def prepare_validation_features(examples):
load_from_cache_file=not data_args.overwrite_cache,
desc="Running tokenizer on validation dataset",
)

if data_args.max_eval_samples is not None:
# During Feature creation dataset samples might increase, we will select required samples again
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
eval_dataset = eval_dataset.select(range(max_eval_samples))

if training_args.do_predict:
if "test" not in raw_datasets:
raise ValueError("--do_predict requires a test dataset")
predict_examples = raw_datasets["test"]
if data_args.max_predict_samples is not None:
# We will select sample from whole data
predict_examples = predict_examples.select(range(data_args.max_predict_samples))
# Predict Feature Creation
with training_args.main_process_first(desc="prediction dataset map pre-processing"):
predict_dataset = predict_examples.map(
prepare_validation_features,
batched=True,
num_proc=data_args.preprocessing_num_workers,
remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache,
desc="Running tokenizer on prediction dataset",
)
if data_args.max_predict_samples is not None:
# During Feature creation dataset samples might increase, we will select required samples again
max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
predict_dataset = predict_dataset.select(range(max_predict_samples))

# Post-processing:
def post_processing_function(examples, features, predictions, stage="eval"):
Expand Down
Loading

0 comments on commit bd7ae23

Please sign in to comment.