Merge pull request #2614 from ASFHyP3/ruff-extensions

Uncomment ruff extensions and fix errors
ASFHyP3 · Feb 21, 2025 · 35a51c1 · 35a51c1
2 parents f540e71 + 3fa1a07
commit 35a51c1
Show file tree

Hide file tree

Showing 19 changed files with 89 additions and 72 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - The reserved `bucket_prefix` job spec parameter has been renamed to `job_id` and can be referenced as `Ref::job_id` within each step's `command` field.
 - The `job_id` parameter of the `ARIA_RAIDER` job type has been renamed to `gunw_job_id`.
 - The `AUTORIFT_ITS_LIVE` job type now accepts Sentinel-1 burst products.
+- `ruff` now checks for incorrect docstrings (missing docstrings are still allowed), incomplete type annotations (missing annotations are still allowed), and opportunities to use `pathlib`.
 
 ## [9.4.0]
 

diff --git a/apps/api/src/hyp3_api/auth.py b/apps/api/src/hyp3_api/auth.py
@@ -1,10 +1,11 @@
 import time
 from os import environ
+from typing import Any
 
 import jwt
 
 
-def decode_token(token) -> dict | None:
+def decode_token(token: Any) -> dict | None:
     try:
         return jwt.decode(token, environ['AUTH_PUBLIC_KEY'], algorithms=environ['AUTH_ALGORITHM'])
     except (jwt.ExpiredSignatureError, jwt.DecodeError):

diff --git a/apps/api/src/hyp3_api/handlers.py b/apps/api/src/hyp3_api/handlers.py
@@ -1,22 +1,22 @@
 from http.client import responses
 
 import requests
-from flask import abort, jsonify, request
+from flask import Response, abort, jsonify, request
 
 import dynamo
 from dynamo.exceptions import AccessCodeError, InsufficientCreditsError, UnexpectedApplicationStatusError
 from hyp3_api import util
 from hyp3_api.validation import BoundsValidationError, GranuleValidationError, validate_jobs
 
 
-def problem_format(status, message):
+def problem_format(status: int, message: str) -> Response:
     response = jsonify({'status': status, 'detail': message, 'title': responses[status], 'type': 'about:blank'})
     response.headers['Content-Type'] = 'application/problem+json'
     response.status_code = status
     return response
 
 
-def post_jobs(body, user):
+def post_jobs(body: dict, user: str) -> dict:
     print(body)
 
     try:
@@ -27,15 +27,23 @@ def post_jobs(body, user):
         abort(problem_format(400, str(e)))
 
     try:
-        body['jobs'] = dynamo.jobs.put_jobs(user, body['jobs'], dry_run=body.get('validate_only'))
+        body['jobs'] = dynamo.jobs.put_jobs(user, body['jobs'], dry_run=bool(body.get('validate_only')))
     except UnexpectedApplicationStatusError as e:
         abort(problem_format(403, str(e)))
     except InsufficientCreditsError as e:
         abort(problem_format(400, str(e)))
     return body
 
 
-def get_jobs(user, start=None, end=None, status_code=None, name=None, job_type=None, start_token=None):
+def get_jobs(
+    user: str,
+    start: str | None = None,
+    end: str | None = None,
+    status_code: str | None = None,
+    name: str | None = None,
+    job_type: str | None = None,
+    start_token: str | None = None,
+) -> dict:
     try:
         start_key = util.deserialize(start_token) if start_token else None
     except util.TokenDeserializeError:
@@ -50,7 +58,7 @@ def get_jobs(user, start=None, end=None, status_code=None, name=None, job_type=N
     return payload
 
 
-def get_job_by_id(job_id):
+def get_job_by_id(job_id: str) -> dict:
     job = dynamo.jobs.get_job(job_id)
     if job is None:
         abort(problem_format(404, f'job_id does not exist: {job_id}'))
@@ -68,7 +76,7 @@ def patch_user(body: dict, user: str, edl_access_token: str) -> dict:
     return _user_response(user_record)
 
 
-def get_user(user):
+def get_user(user: str) -> dict:
     user_record = dynamo.user.get_or_create_user(user)
     return _user_response(user_record)
 
@@ -81,7 +89,7 @@ def _user_response(user_record: dict) -> dict:
     return payload
 
 
-def _get_names_for_user(user):
+def _get_names_for_user(user: str) -> list[str]:
     jobs, next_key = dynamo.jobs.query_jobs(user)
     while next_key is not None:
         new_jobs, next_key = dynamo.jobs.query_jobs(user, start_key=next_key)

diff --git a/apps/api/src/hyp3_api/openapi.py b/apps/api/src/hyp3_api/openapi.py
@@ -3,7 +3,7 @@
 import prance
 
 
-def get_spec_yaml(path_to_spec: Path):
+def get_spec_yaml(path_to_spec: Path) -> dict:
     parser = prance.ResolvingParser(str(path_to_spec.resolve()))
     parser.parse()
     return parser.specification
diff --git a/apps/api/src/hyp3_api/routes.py b/apps/api/src/hyp3_api/routes.py
@@ -6,7 +6,7 @@
 from typing import Any
 
 import yaml
-from flask import abort, g, jsonify, make_response, redirect, render_template, request
+from flask import Response, abort, g, jsonify, make_response, redirect, render_template, request
 from flask.json.provider import JSONProvider
 from flask_cors import CORS
 from openapi_core import OpenAPI
@@ -99,10 +99,10 @@ def default(self, o):
 
 
 class CustomJSONProvider(JSONProvider):
-    def dumps(self, obj: Any, **kwargs) -> str:
+    def dumps(self, obj: Any, **kwargs: Any) -> str:
         return json.dumps(obj, cls=CustomEncoder)
 
-    def loads(self, s: str | bytes, **kwargs) -> Any:
+    def loads(self, s: str | bytes, **kwargs: Any) -> Any:
         return json.loads(s)
 
 
@@ -126,19 +126,19 @@ def __call__(self, errors):
 
 
 @app.route('/costs', methods=['GET'])
-def costs_get():
+def costs_get() -> Response:
     return jsonify(dynamo.jobs.COSTS)
 
 
 @app.route('/jobs', methods=['POST'])
 @openapi
-def jobs_post():
+def jobs_post() -> Response:
     return jsonify(handlers.post_jobs(request.get_json(), g.user))
 
 
 @app.route('/jobs', methods=['GET'])
 @openapi
-def jobs_get():
+def jobs_get() -> Response:
     parameters = request.openapi.parameters.query  # type: ignore[attr-defined]
     start = parameters.get('start')
     end = parameters.get('end')
@@ -157,17 +157,17 @@ def jobs_get():
 
 @app.route('/jobs/<job_id>', methods=['GET'])
 @openapi
-def jobs_get_by_job_id(job_id):
+def jobs_get_by_job_id(job_id: str) -> Response:
     return jsonify(handlers.get_job_by_id(job_id))
 
 
 @app.route('/user', methods=['PATCH'])
 @openapi
-def user_patch():
+def user_patch() -> Response:
     return jsonify(handlers.patch_user(request.get_json(), g.user, g.edl_access_token))
 
 
 @app.route('/user', methods=['GET'])
 @openapi
-def user_get():
+def user_get() -> Response:
     return jsonify(handlers.get_user(g.user))
diff --git a/apps/api/src/hyp3_api/util.py b/apps/api/src/hyp3_api/util.py
@@ -1,11 +1,12 @@
 import binascii
 import json
 from base64 import b64decode, b64encode
+from typing import Any
 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
 
 
 class TokenDeserializeError(Exception):
-    """Raised when paging results and `start_token` fails to deserialize"""
+    """Raised when paging results and `start_token` fails to deserialize."""
 
 
 def get_granules(jobs: list[dict]) -> set[str]:
@@ -17,13 +18,13 @@ def get_granules(jobs: list[dict]) -> set[str]:
     }
 
 
-def serialize(payload: dict):
+def serialize(payload: dict) -> str:
     string_version = json.dumps(payload)
     base_64 = b64encode(string_version.encode())
     return base_64.decode()
 
 
-def deserialize(token: str):
+def deserialize(token: str) -> Any:
     try:
         string_version = b64decode(token.encode())
         return json.loads(string_version)

diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py
@@ -1,6 +1,6 @@
 import json
-import os
 import sys
+from collections.abc import Iterable
 from copy import deepcopy
 from pathlib import Path
 
@@ -23,19 +23,19 @@ class BoundsValidationError(Exception):
     pass
 
 
-with open(Path(__file__).parent / 'job_validation_map.yml') as job_validation_map_file:
+with (Path(__file__).parent / 'job_validation_map.yml').open() as job_validation_map_file:
     JOB_VALIDATION_MAP = yaml.safe_load(job_validation_map_file.read())
 
 
-def _has_sufficient_coverage(granule: Polygon):
+def _has_sufficient_coverage(granule: Polygon) -> bool:
     global DEM_COVERAGE
     if DEM_COVERAGE is None:
         DEM_COVERAGE = _get_multipolygon_from_geojson('dem_coverage_map_cop30.geojson')
 
     return granule.intersects(DEM_COVERAGE)
 
 
-def _get_cmr_metadata(granules):
+def _get_cmr_metadata(granules: Iterable[str]) -> list[dict]:
     cmr_parameters = {
         'granule_ur': [f'{granule}*' for granule in granules],
         'options[granule_ur][pattern]': 'true',
@@ -55,35 +55,34 @@ def _get_cmr_metadata(granules):
     }
     response = requests.post(CMR_URL, data=cmr_parameters)
     response.raise_for_status()
-    granules = [
+    return [
         {
             'name': entry.get('producer_granule_id', entry.get('title')),
             'polygon': Polygon(_format_points(entry['polygons'][0][0])),
         }
         for entry in response.json()['feed']['entry']
     ]
-    return granules
 
 
-def _is_third_party_granule(granule):
+def _is_third_party_granule(granule: str) -> bool:
     return granule.startswith('S2') or granule.startswith('L')
 
 
-def _make_sure_granules_exist(granules, granule_metadata):
+def _make_sure_granules_exist(granules: Iterable[str], granule_metadata: list[dict]) -> None:
     found_granules = [granule['name'] for granule in granule_metadata]
     not_found_granules = set(granules) - set(found_granules)
     not_found_granules = {granule for granule in not_found_granules if not _is_third_party_granule(granule)}
     if not_found_granules:
         raise GranuleValidationError(f'Some requested scenes could not be found: {", ".join(not_found_granules)}')
 
 
-def check_dem_coverage(_, granule_metadata):
+def check_dem_coverage(_, granule_metadata: list[dict]) -> None:
     bad_granules = [g['name'] for g in granule_metadata if not _has_sufficient_coverage(g['polygon'])]
     if bad_granules:
         raise GranuleValidationError(f'Some requested scenes do not have DEM coverage: {", ".join(bad_granules)}')
 
 
-def check_same_burst_ids(job, _):
+def check_same_burst_ids(job: dict, _) -> None:
     refs = job['job_parameters']['reference']
     secs = job['job_parameters']['secondary']
     ref_ids = ['_'.join(ref.split('_')[1:3]) for ref in refs]
@@ -103,7 +102,7 @@ def check_same_burst_ids(job, _):
         )
 
 
-def check_valid_polarizations(job, _):
+def check_valid_polarizations(job: dict, _) -> None:
     polarizations = set(granule.split('_')[4] for granule in get_granules([job]))
     if len(polarizations) > 1:
         raise GranuleValidationError(
@@ -115,7 +114,7 @@ def check_valid_polarizations(job, _):
         )
 
 
-def check_not_antimeridian(_, granule_metadata):
+def check_not_antimeridian(_, granule_metadata: list[dict]) -> None:
     for granule in granule_metadata:
         bbox = granule['polygon'].bounds
         if abs(bbox[0] - bbox[2]) > 180.0 and bbox[0] * bbox[2] < 0.0:
@@ -126,21 +125,21 @@ def check_not_antimeridian(_, granule_metadata):
             raise GranuleValidationError(msg)
 
 
-def _format_points(point_string):
+def _format_points(point_string: str) -> list:
     converted_to_float = [float(x) for x in point_string.split(' ')]
     points = [list(t) for t in zip(converted_to_float[1::2], converted_to_float[::2])]
     return points
 
 
-def _get_multipolygon_from_geojson(input_file):
-    dem_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), input_file)
-    with open(dem_file) as f:
+def _get_multipolygon_from_geojson(input_file: str) -> MultiPolygon:
+    dem_file = Path(__file__).parent / input_file
+    with Path(dem_file).open() as f:
         shp = json.load(f)['features'][0]['geometry']
     polygons = [x.buffer(0) for x in shape(shp).buffer(0).geoms]  # type: ignore[attr-defined]
     return MultiPolygon(polygons)
 
 
-def check_bounds_formatting(job, _):
+def check_bounds_formatting(job: dict, _) -> None:
     bounds = job['job_parameters']['bounds']
     if bounds == [0.0, 0.0, 0.0, 0.0]:
         raise BoundsValidationError('Invalid bounds. Bounds cannot be [0, 0, 0, 0].')
@@ -162,7 +161,7 @@ def bad_lon(lon):
         )
 
 
-def check_granules_intersecting_bounds(job, granule_metadata):
+def check_granules_intersecting_bounds(job: dict, granule_metadata: list[dict]) -> None:
     bounds = job['job_parameters']['bounds']
     if bounds == [0.0, 0.0, 0.0, 0.0]:
         raise BoundsValidationError('Invalid bounds. Bounds cannot be [0, 0, 0, 0].')
@@ -177,7 +176,7 @@ def check_granules_intersecting_bounds(job, granule_metadata):
         raise GranuleValidationError(f'The following granules do not intersect the provided bounds: {bad_granules}.')
 
 
-def check_same_relative_orbits(_, granule_metadata):
+def check_same_relative_orbits(_, granule_metadata: list[dict]) -> None:
     previous_relative_orbit = None
     for granule in granule_metadata:
         name_split = granule['name'].split('_')
@@ -204,7 +203,7 @@ def _convert_single_burst_jobs(jobs: list[dict]) -> list[dict]:
     return jobs
 
 
-def check_bounding_box_size(job: dict, _, max_bounds_area: float = 4.5):
+def check_bounding_box_size(job: dict, _, max_bounds_area: float = 4.5) -> None:
     bounds = job['job_parameters']['bounds']
 
     bounds_area = (bounds[3] - bounds[1]) * (bounds[2] - bounds[0])

diff --git a/apps/check-processing-time/src/check_processing_time.py b/apps/check-processing-time/src/check_processing_time.py
@@ -5,7 +5,7 @@ def get_time_from_result(result: list | dict) -> list | float:
     return (result['StoppedAt'] - result['StartedAt']) / 1000
 
 
-def lambda_handler(event, _) -> list | float:
+def lambda_handler(event: dict, _) -> list | float:
     processing_results = event['processing_results']
     result_list = [processing_results[key] for key in sorted(processing_results.keys())]
     return get_time_from_result(result_list)
diff --git a/apps/get-files/src/get_files.py b/apps/get-files/src/get_files.py
@@ -2,7 +2,6 @@
 import urllib.parse
 from datetime import datetime
 from os import environ
-from os.path import basename
 from pathlib import Path
 
 import boto3
@@ -70,7 +69,7 @@ def organize_files(files_dict, bucket):
                 'download_url': download_url,
                 'file_type': file_type,
                 'size': item['Size'],
-                'filename': basename(item['Key']),
+                'filename': Path(item['Key']).name,
                 's3': {
                     'bucket': bucket,
                     'key': item['Key'],

diff --git a/apps/handle-batch-event/src/handle_batch_event.py b/apps/handle-batch-event/src/handle_batch_event.py
@@ -1,12 +1,12 @@
 import dynamo
 
 
-def validate_field(actual, expected, field_name: str) -> None:
+def validate_field(actual: object, expected: object, field_name: str) -> None:
     if actual != expected:
         raise ValueError(f"Expected {field_name} '{expected}' but got '{actual}'.")
 
 
-def lambda_handler(event, context):
+def lambda_handler(event: dict, _) -> None:
     validate_field(event['source'], 'aws.batch', 'source')
     validate_field(event['detail-type'], 'Batch Job State Change', 'detail-type')
     validate_field(event['detail']['status'], 'RUNNING', 'status')