Add ruff to pre-commit config (#121)

* Add ruff to pre-commit config Switch from flake8 and ufmt to ruff * Remove flake8 github action
unitaryai · Jan 16, 2025 · 89b4471 · 89b4471
1 parent 03ace8a
commit 89b4471
Show file tree

Hide file tree

Showing 8 changed files with 29 additions and 85 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -34,14 +34,6 @@ repos:
     hooks:
       - id: yesqa
 
-  - repo: https://github.com/omnilib/ufmt
-    rev: v2.7.0
-    hooks:
-      - id: ufmt
-        additional_dependencies:
-          - black == 22.6
-          - usort == 1.0.2
-
   # TODO
   #- repo: https://github.com/executablebooks/mdformat
   #  rev: 0.7.7
@@ -52,8 +44,9 @@ repos:
   #        - mdformat-black
   #        - mdformat_frontmatter
 
-  - repo: https://github.com/PyCQA/flake8
-    rev: 7.1.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.3
     hooks:
-      - id: flake8
-        name: PEP8
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
diff --git a/detoxify/detoxify.py b/detoxify/detoxify.py
@@ -119,8 +119,9 @@ def predict(self, text):
         results = {}
         for i, cla in enumerate(self.class_names):
             results[cla] = (
-                # If the input is a single text, squeezing will remove the dimensionality from the tensor - so `.tolist()` will return a number instead. Otherwise, we'll get the list of scores of that class.
-                scores[:,i].squeeze().tolist()
+                # If the input is a single text, squeezing will remove the dimensionality from the tensor -
+                # so `.tolist()` will return a number instead. Otherwise, we'll get the list of scores of that class.
+                scores[:, i].squeeze().tolist()
             )
         return results
 

diff --git a/model_eval/evaluate.py b/model_eval/evaluate.py
@@ -15,7 +15,6 @@
 
 
 def test_classifier(config, dataset, checkpoint_path, device="cuda:0"):
-
     model = ToxicClassifier(config)
     checkpoint = torch.load(checkpoint_path, map_location=device)
     model.load_state_dict(checkpoint["state_dict"])

diff --git a/pyproject.toml b/pyproject.toml
@@ -36,18 +36,24 @@ dev = [
     "numpy>=2"
 ]
 
-[tool.black]
-# https://github.com/psf/black
+[tool.ruff]
 line-length = 120
-exclude = "(.eggs|.git|.hg|.mypy_cache|.venv|_build|buck-out|build|dist)"
+target-version = "py39"
 
-[tool.isort]
-known_first_party = [
-    "detoxify",
+[tool.ruff.lint]
+select = ["E", "F", "W"]
+ignore = [
+    "E731",    # Do not assign a lambda expression, use a def
+    "E402",    # Module level import not at top of file
+    "F401",    # Module imported but unused
+    "F841",    # Local variable name is assigned to but never used
 ]
-skip_glob = []
-profile = "black"
-line_length = 120
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
 
 [tool.pytest.ini_options]
 norecursedirs = [

diff --git a/setup.cfg b/setup.cfg
diff --git a/src/data_loaders.py b/src/data_loaders.py
@@ -81,7 +81,6 @@ def __init__(
         add_test_labels=True,
         classes=["toxic"],
     ):
-
         super().__init__(
             train_csv_file=train_csv_file,
             test_csv_file=test_csv_file,
@@ -120,7 +119,6 @@ def __init__(
         identity_classes=["female"],
         soft_labels=False,
     ):
-
         self.classes = classes
         self.soft_labels = soft_labels
         self.identity_classes = identity_classes
@@ -195,7 +193,6 @@ def __init__(
         train=True,
         classes=["toxic"],
     ):
-
         self.classes = classes
         super().__init__(train_csv_file=train_csv_file, test_csv_file=test_csv_file, train=train)
 

diff --git a/tests/test_trainer.py b/tests/test_trainer.py
@@ -51,12 +51,12 @@ def get_instance(module, name, config, *args, **kwargs):
 
 def test_trainer():
     CONFIG = json.load(open("configs/Toxic_comment_classification_BERT.json"))
-    CONFIG["dataset"]["args"][
-        "train_csv_file"
-    ] = "tests/dummy_data/jigsaw-toxic-comment-classification-challenge/train.csv"
-    CONFIG["dataset"]["args"][
-        "test_csv_file"
-    ] = "tests/dummy_data/jigsaw-toxic-comment-classification-challenge/test.csv"
+    CONFIG["dataset"]["args"]["train_csv_file"] = (
+        "tests/dummy_data/jigsaw-toxic-comment-classification-challenge/train.csv"
+    )
+    CONFIG["dataset"]["args"]["test_csv_file"] = (
+        "tests/dummy_data/jigsaw-toxic-comment-classification-challenge/test.csv"
+    )
     CONFIG["batch_size"] = 2
 
     results = initialize_trainer(CONFIG)