Setup package (#1)

* Add tests * Add intel neural compressor examples * Rename file * Update transformers version for INC
huggingface · Jun 1, 2022 · ba475a7 · ba475a7
1 parent 567d474
commit ba475a7
Show file tree

Hide file tree

Showing 38 changed files with 7,804 additions and 1 deletion.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,22 @@
+# What does this PR do?
+
+<!--
+Congratulations! You've made it this far! You're not quite done yet though.
+
+Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution.
+
+Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change.
+
+Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost.
+-->
+
+<!-- Remove if not applicable -->
+
+Fixes # (issue)
+
+
+## Before submitting
+- [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
+- [ ] Did you make sure to update the documentation with your changes?
+- [ ] Did you write any new necessary tests?
+
diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml
@@ -0,0 +1,54 @@
+name: check_code_quality
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/**.py"
+      - "tests/**.py"
+      - "examples/**.py"
+
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/**.py"
+      - "tests/**.py"
+      - "examples/**.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.8]
+        os: [ubuntu-20.04]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Create and start a virtual environment
+      run: |
+        python -m venv venv
+        source venv/bin/activate
+    - name: Install dependencies
+      run: |
+        source venv/bin/activate
+        pip install --upgrade pip
+        pip install isort
+        pip install black
+    - name: Check style with black
+      run: |
+        source venv/bin/activate
+        black --check .
+    - name: Check style with isort
+      run: |
+        source venv/bin/activate
+        isort --check .
diff --git a/.github/workflows/test_general.yml b/.github/workflows/test_general.yml
@@ -0,0 +1,37 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+name: Neural Compressor / Python - Test
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.8, 3.9]
+        os: [ubuntu-18.04]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install .[tests]
+        pip install torch==1.9.1
+    - name: Test with Pytest
+      run: |
+        pytest tests/
diff --git a/examples/config/prune.yml b/examples/config/prune.yml
@@ -0,0 +1,37 @@
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: 1.0
+
+model:
+  name: bert_prune
+  framework: pytorch
+
+pruning:
+  approach:
+    weight_compression:
+      initial_sparsity: 0.0
+      target_sparsity: 0.1                           # targeted sparsity of 10%
+      start_epoch: 0
+      end_epoch: 1
+      pruners:
+        - !Pruner
+            prune_type: basic_magnitude
+tuning:
+  accuracy_criterion:
+    relative: 0.1                                    # only verifying workflow, accuracy loss percentage: 10%
+  exit_policy:
+    timeout: 0                                       # tuning timeout (seconds)
+  random_seed: 9527                                  # random seed
diff --git a/examples/config/quantization.yml b/examples/config/quantization.yml
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: 1.0
+
+model:                                               # mandatory.
+  name: bert
+  framework: pytorch                                 # mandatory. possible values are pytorch and pytorch_fx.
+
+device: cpu
+
+quantization:                                        # optional.
+  approach: post_training_dynamic_quant
+
+tuning:
+  accuracy_criterion:
+    relative:  0.03                                  # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 3%.
+  exit_policy:
+    timeout: 0                                       # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
+    max_trials: 30
+  random_seed: 9527                                  # optional. random seed for deterministic tuning.
diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md
@@ -0,0 +1,82 @@
+<!---
+Copyright 2020 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Language modeling training
+
+The scripts [`run_clm.py`](https://github.com/huggingface/optimum/blob/main/examples/language-modeling/run_clm.py) 
+and [`run_mlm.py`](https://github.com/huggingface/optimum/blob/main/examples/language-modeling/run_mlm.py)
+allow us to apply different quantization approaches (such as dynamic, static and aware-training quantization) as well as pruning 
+using the [Intel Neural Compressor (INC)](https://github.com/intel/neural-compressor) library for language modeling tasks.
+
+
+GPT and GPT-2 are trained or fine-tuned using a causal language modeling (CLM) loss. ALBERT, BERT, DistilBERT and 
+RoBERTa are trained or fine-tuned using a masked language modeling (MLM) loss, more information about the differences 
+between those objectives can be found in our [model summary](https://huggingface.co/transformers/model_summary.html).
+
+
+### GPT-2/GPT and causal language modeling
+
+The following example fine-tunes GPT-Neo on WikiText-2 while first applying magnitude pruning and then quantization aware training.
+We're using the raw WikiText-2 (no tokens were replaced before the tokenization). The loss here is that of causal language modeling (CLM). 
+
+```bash
+python run_clm.py \
+    --model_name_or_path EleutherAI/gpt-neo-125M \
+    --dataset_name wikitext \
+    --dataset_config_name wikitext-2-raw-v1 \
+    --quantize \
+    --quantization_approach aware_training \
+    --prune \
+    --target_sparsity 0.02 \
+    --perf_tol 0.5 \
+    --do_train \
+    --do_eval \
+    --verify_loading \
+    --output_dir /tmp/clm_output
+```
+
+### RoBERTa/BERT/DistilBERT and masked language modeling
+
+The following example fine-tunes RoBERTa on WikiText-2 while applying quantization aware training and magnitude pruning. We're using the raw 
+WikiText-2. The loss is different as BERT/RoBERTa have a bidirectional mechanism, we are therefore using the same loss 
+that was used during their pre-training: masked language modeling (MLM) loss. 
+
+```bash
+python run_mlm.py \
+    --model_name_or_path bert-base-uncased  \
+    --dataset_name wikitext \
+    --dataset_config_name wikitext-2-raw-v1 \
+    --quantize \
+    --quantization_approach aware_training \
+    --prune \
+    --target_sparsity 0.1 \
+    --perf_tol 0.5 \
+    --do_train \
+    --do_eval \
+    --verify_loading \
+    --output_dir /tmp/mlm_output
+```
+
+In order to apply dynamic, static or aware-training quantization, `quantization_approach` must be set to 
+respectively `dynamic`, `static` or `aware_training`.
+
+The configuration file containing all the information related to the model quantization and pruning objectives can be 
+specified using respectively `quantization_config` and `pruning_config`. If not specified, the default
+[quantization](https://github.com/huggingface/optimum/blob/main/examples/config/quantization.yml) 
+and [pruning](https://github.com/huggingface/optimum/blob/main/examples/config/prune.yml) 
+config files will be used.
+
+The flag `--verify_loading` can be passed along to verify that the resulting quantized model can be loaded correctly.
diff --git a/examples/language-modeling/config/inc/quantization.yml b/examples/language-modeling/config/inc/quantization.yml
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: 1.0
+
+model:                                               # mandatory.
+  name: bert
+  framework: pytorch                                 # mandatory. possible values are pytorch and pytorch_fx.
+
+device: cpu
+
+quantization:                                        # optional.
+  approach: post_training_dynamic_quant
+
+tuning:
+  accuracy_criterion:
+    absolute:  2                                     # optional. default value is relative, other value is absolute. this example allows absolute accuracy loss of 2.
+  exit_policy:
+    timeout: 0                                       # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
+    max_trials: 30
+  random_seed: 9527                                  # optional.
diff --git a/examples/language-modeling/requirements.txt b/examples/language-modeling/requirements.txt
@@ -0,0 +1,5 @@
+accelerate
+torch >= 1.9
+datasets >= 1.8.0
+sentencepiece != 0.1.92
+protobuf