Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable S rule #1857

Merged
merged 7 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .ruff.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
select = ["A", "ASYNC", "I", "E", "F", "B", "C4", "T10", "T20"]
select = ["A", "ASYNC", "I", "E", "F", "B", "C4", "T10", "T20", "S"]
ignore = ["E501"]

# Allow autofix for all enabled rules (when `--fix`) is provided.
Expand All @@ -16,7 +16,8 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
target-version = "py310"

[per-file-ignores]
"tests/*" = ["B017"]
"tests/*" = ["B017", "S101", "S"]
"connectors/*" = ["S608"]

[isort]
known-first-party=["connectors", "tests"]
23 changes: 22 additions & 1 deletion connectors/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from connectors.preflight_check import PreflightCheck
from connectors.services import get_services
from connectors.source import get_source_klass, get_source_klasses
from connectors.utils import get_event_loop

__all__ = ["main"]

Expand Down Expand Up @@ -128,6 +127,28 @@ async def _start_service(actions, config, loop):
return await multiservice.run()


def get_event_loop(uvloop=False):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why did get_event_loop have to be moved?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cause it had a "try/except" with except that does nothing. I've added a logger statement there and moved to the only place where the function is used

if uvloop:
# activate uvloop if lib is present
try:
import uvloop

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
except Exception:
logger.warning(
"Unable to enable uvloop: {e}. Running with default event loop"
)
pass
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = asyncio.get_event_loop_policy().get_event_loop()
if loop is None:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop


def run(args):
"""Loads the config file, sets the logger and executes an action.

Expand Down
5 changes: 2 additions & 3 deletions connectors/sources/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@
Demo of a standalone source
"""
import functools
import hashlib
import os
from datetime import datetime, timezone
from pathlib import Path

import aiofiles

from connectors.source import BaseDataSource
from connectors.utils import TIKA_SUPPORTED_FILETYPES, get_base64_value
from connectors.utils import TIKA_SUPPORTED_FILETYPES, get_base64_value, hash_id

DEFAULT_DIR = os.environ.get("SYSTEM_DIR", os.path.dirname(__file__))

Expand Down Expand Up @@ -57,7 +56,7 @@ async def changed(self):
return True

def get_id(self, path):
return hashlib.md5(str(path).encode("utf8")).hexdigest()
return hash_id(str(path))

async def _download(self, path, timestamp=None, doit=None):
if not (doit and os.path.splitext(path)[-1] in TIKA_SUPPORTED_FILETYPES):
Expand Down
2 changes: 1 addition & 1 deletion connectors/sources/dropbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@


class EndpointName(Enum):
ACCESS_TOKEN = "access_token"
ACCESS_TOKEN = "access_token" # noqa S105
PING = "ping"
CHECK_PATH = "check_path"
FILES_FOLDERS = "files_folders"
Expand Down
2 changes: 1 addition & 1 deletion connectors/sources/microsoft_teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
GRAPH_ACQUIRE_TOKEN_URL = override_url
else:
GRAPH_API_AUTH_URL = "https://login.microsoftonline.com"
GRAPH_ACQUIRE_TOKEN_URL = "https://graph.microsoft.com/.default"
GRAPH_ACQUIRE_TOKEN_URL = "https://graph.microsoft.com/.default" # noqa S105
BASE_URL = "https://graph.microsoft.com/v1.0"

SCOPE = [
Expand Down
2 changes: 1 addition & 1 deletion connectors/sources/outlook.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ async def get_mails(self, account):
folder_object = (
account.root / "Top of Information Store" / "Archive"
)
except Exception:
except Exception: # noqa S112
continue
else:
folder_object = getattr(account, mail_type["folder"])
Expand Down
4 changes: 2 additions & 2 deletions connectors/sources/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
from contextlib import AsyncExitStack
from functools import partial
from hashlib import md5

import aioboto3
from aiobotocore.config import AioConfig
Expand All @@ -17,6 +16,7 @@

from connectors.logger import logger, set_extra_logger
from connectors.source import BaseDataSource
from connectors.utils import hash_id

DEFAULT_PAGE_SIZE = 100
DEFAULT_MAX_RETRY_ATTEMPTS = 5
Expand Down Expand Up @@ -190,7 +190,7 @@ async def format_document(self, bucket_name, bucket_object):
document: Modified document.
"""

doc_id = md5(f"{bucket_name}/{bucket_object.key}".encode("utf8")).hexdigest()
doc_id = hash_id(f"{bucket_name}/{bucket_object.key}")
owner = await bucket_object.owner
document = {
"_id": doc_id,
Expand Down
2 changes: 1 addition & 1 deletion connectors/sources/salesforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

BASE_URL = "https://<domain>.my.salesforce.com"
API_VERSION = "v58.0"
TOKEN_ENDPOINT = "/services/oauth2/token"
TOKEN_ENDPOINT = "/services/oauth2/token" # noqa S105
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wild that it thinks this is a password 🤷🏻

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think the word "TOKEN" triggered it

QUERY_ENDPOINT = f"/services/data/{API_VERSION}/query"
DESCRIBE_ENDPOINT = f"/services/data/{API_VERSION}/sobjects"
DESCRIBE_SOBJECT_ENDPOINT = f"/services/data/{API_VERSION}/sobjects/<sobject>/describe"
Expand Down
32 changes: 8 additions & 24 deletions connectors/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,15 @@ def convert_to_b64(source, target=None, overwrite=False):
version = int(platform.mac_ver()[0].split(".")[0])
# MacOS 13 has changed base64 util
if version >= 13:
cmd = f"{_BASE64} -i {source} -o {temp_target}"
cmd = [_BASE64, "-i", source, "-o", temp_target]
else:
cmd = f"{_BASE64} {source} > {temp_target}"
cmd = [_BASE64, source, ">", temp_target]
else:
# In Linuces, avoid line wrapping
cmd = f"{_BASE64} -w 0 {source} > {temp_target}"
cmd = [_BASE64, "-w", "0", source, ">", temp_target]
logger.debug(f"Calling {cmd}")
subprocess.check_call(cmd, shell=True)
# TODO: make it more robust
subprocess.check_call(cmd) # noqa S603
else:
# Pure Python version
with open(source, "rb") as sf, open(temp_target, "wb") as tf:
Expand Down Expand Up @@ -408,25 +409,6 @@ def cancel(self):
task.cancel()


def get_event_loop(uvloop=False):
if uvloop:
# activate uvloop if lib is present
try:
import uvloop

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
except Exception:
pass
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = asyncio.get_event_loop_policy().get_event_loop()
if loop is None:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop


class RetryStrategy(Enum):
CONSTANT = 0
LINEAR_BACKOFF = 1
Expand Down Expand Up @@ -638,7 +620,9 @@ def get_pem_format(key, postfix="-----END CERTIFICATE-----"):

def hash_id(_id):
# Collision probability: 1.47*10^-29
return hashlib.md5(_id.encode("utf8")).hexdigest()
# S105 rule considers this code unsafe, but we're not using it for security-related
# things, only to generate pseudo-ids for documents
return hashlib.md5(_id.encode("utf8")).hexdigest() # noqa S105


def truncate_id(_id):
Expand Down
Loading