diff --git a/Makefile b/Makefile index abac0bd..9837690 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ SHELL=/bin/bash short_ver = 2.1.3 long_ver = $(shell git describe --long 2>/dev/null || echo $(short_ver)-0-unknown-g`git describe --always`) +USE_RE2=${USE_RE2} + all: py-egg PYTHON ?= python3 diff --git a/journalpump/journalpump.py b/journalpump/journalpump.py index abbd9c3..c8622e5 100644 --- a/journalpump/journalpump.py +++ b/journalpump/journalpump.py @@ -17,10 +17,15 @@ import fnmatch import json import logging -import re import select import time import uuid +import os + +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re _5_MB = 5 * 1024 * 1024 CHUNK_SIZE = 5000 diff --git a/journalpump/senders/base.py b/journalpump/senders/base.py index 4c7d458..1b04caf 100644 --- a/journalpump/senders/base.py +++ b/journalpump/senders/base.py @@ -3,9 +3,15 @@ import logging import random -import re import sys import time +import os + +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re + KAFKA_COMPRESSED_MESSAGE_OVERHEAD = 30 MAX_KAFKA_MESSAGE_SIZE = 1024**2 # 1 MiB diff --git a/journalpump/senders/elasticsearch_opensearch_sender.py b/journalpump/senders/elasticsearch_opensearch_sender.py index d377901..e68a1cb 100644 --- a/journalpump/senders/elasticsearch_opensearch_sender.py +++ b/journalpump/senders/elasticsearch_opensearch_sender.py @@ -10,7 +10,6 @@ import enum import json -import re import time @@ -72,8 +71,6 @@ def create(*, sender_type: SenderType, config: Dict[str, Any]) -> "Config": class _EsOsLogSenderBase(LogSender): _DEFAULT_MAX_SENDER_INTERVAL = 10.0 - _INDICIES_URL_REDACTION_REGEXP = r"(\w*?://[A-Za-z0-9\-._~%!$&'()*+,;=]*)(:)([A-Za-z0-9\-._~%!$&'()*+,;=]*)(@)" - _ONE_HOUR_LAST_INDEX_CHECK = 3600 _SUCCESS_HTTP_STATUSES = {HTTPStatus.OK, HTTPStatus.CREATED} @@ -173,8 +170,7 @@ def send_messages(self, *, messages, cursor) -> bool: try: es_available = self._load_indices() if not es_available: - redacted_url = re.sub(self._INDICIES_URL_REDACTION_REGEXP, r"\1\2[REDACTED]\4", self._indices_url) - self.log.warning("Waiting for connection to %s for %s", redacted_url, self.name) + self.log.warning("Waiting for connection for %s", self.name) self._backoff() return False for msg in messages: diff --git a/requirements.txt b/requirements.txt index f175b49..29dcecf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ google-auth geoip2 https://github.com/systemd/python-systemd/zipball/master typing-extensions +google-re2 diff --git a/systest/test_rsyslog.py b/systest/test_rsyslog.py index b3da4eb..e741755 100644 --- a/systest/test_rsyslog.py +++ b/systest/test_rsyslog.py @@ -13,11 +13,16 @@ import logging.handlers import os import random -import re import socket import string import threading +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re + + RSYSLOGD = "/usr/sbin/rsyslogd" RSYSLOGD_TCP_CONF = """ diff --git a/test/test_journalpump.py b/test/test_journalpump.py index f0df82f..57a2da8 100644 --- a/test/test_journalpump.py +++ b/test/test_journalpump.py @@ -30,9 +30,13 @@ import botocore.session import json import pytest -import re import responses +import os +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re def test_journalpump_init(tmpdir): # pylint: disable=too-many-statements # Logplex sender