From 89a885df4f0b71fd52991c79a40a9f966e4733d5 Mon Sep 17 00:00:00 2001 From: msm-cert <156842376+msm-cert@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:28:23 +0000 Subject: [PATCH] Implement mongodb query syntax for task filters (#258) --- docs/advanced_concepts.rst | 73 +++++++ docs/task_headers_payloads.rst | 81 ++++++-- karton/core/karton.py | 4 + karton/core/query.py | 350 +++++++++++++++++++++++++++++++++ karton/core/task.py | 73 +------ karton/system/system.py | 8 +- tests/test_core.py | 2 +- tests/test_task_filters.py | 261 ++++++++++++++++++++++++ 8 files changed, 764 insertions(+), 88 deletions(-) create mode 100644 karton/core/query.py diff --git a/docs/advanced_concepts.rst b/docs/advanced_concepts.rst index 3d75953d..a5e6ed42 100644 --- a/docs/advanced_concepts.rst +++ b/docs/advanced_concepts.rst @@ -246,3 +246,76 @@ You can enable it by setting: - :code:`KARTON_KARTON_DEBUG` environment value to "1" - :code:`debug` parameter to `1` in the :code:`[karton]` config section - :code:`--debug` command-line parameter + + +Negated filter patterns +----------------------- + +.. versionadded:: 5.4.1 + +There is one more pattern syntax, not documented in the :code:`Filter Patterns` section anymore. +It is possible to define a negated filter, and they are handled in a special way. For example let's consider following filters: + +.. code-block:: python + + # Special ("old style") negation + [ + {"foo": "bar", "platform": "!linux"}, + {"foo": "bar", "platform": "!windows"}, + ] + +Depending on how you think this should work, this may have a surprising behavior. In particular this is **not** equivalent to: + +.. code-block:: python + + # Regular ("new style") negation (this is intentionally WRONG, see below) + [ + {"foo": "bar", "platform": {"$not": "linux"}}, + {"foo": "bar", "platform": {"$not": "windows"}}, + ] + +That's because negated "old style" filters are handled in a very special way, but :code:`$not` is not. Let's use the following task as an example: + +.. code-block:: python + + { + "foo": "bar", + "platform": "linux" + } + +Recall that filters are checked top to bottom, and if at least one pattern matches, the task will be accepted by a consumer. +Using regular ("new style") patterns, the matching will proceed as follows: + +- Check against the first filter: :code:`foo` matches, but the filter explicitly rejects tasks with :code:`platform: linux`. +- Check against the second filter: :code:`foo` matches, and the platform - :code:`linux` - is not equal to to :code:`windows`, so the task is accepted. + +Whoops! This is probably not what the programmer intended. In comparison, "old style" filters will always reject a task if it matches at least one negated filter. +This sounds nice, but as every special case may cause unpleasant surprised. This is especially true when combining "old style" and "new style" patterns. +That's why it's currently recommended to only use "new style" filters - they do everything "old style" filters can, and much more. + +In this case, the proper way to get the desired behavior with "new-style" filters is: + +.. code-block:: python + + # Regular ("new style") negation + [ + { + "foo": "bar", + "platform": {"$not": {"$or": ["linux", "windows"]}},, + } + ] + +It's a bit more verbose, but at least it should be very clear what is happening: We want :code:`foo` equal to :code:`bar`, and :code:`platform` **not** equal to either :code:`windows` or :code:`linux`. +In this case there are no special cases, and matching checks every filter top to bottom independently, as usual. + +.. warning:: + + "Old style" negations are only supported at the top-level! Combining them with "new style" filters will not work. Exclamation mark is not considered a special character in this case. + + In fact, we're not even sure how :code:`{"$or": ["!windows", "!linux"]}` *should* behave. + +.. note:: + + Since "new style" patterns were introduced in Karton version 5.4.1, "old style" negations are not recommended and should be considered deprecated. + + Nevertheless, Karton still supports them and they will keep working indefinitely. So don't worry, there are no breaking changes here. diff --git a/docs/task_headers_payloads.rst b/docs/task_headers_payloads.rst index 38fbe862..a79813c9 100644 --- a/docs/task_headers_payloads.rst +++ b/docs/task_headers_payloads.rst @@ -88,12 +88,10 @@ Starting from 5.0.0, consumer filters support basic wildcards and exclusions. Pattern Meaning ------------------------ ------------------------------------------------------------------------------ ``{"foo": "bar"}`` matches 'bar' value of 'foo' header -``{"foo": "!bar"}`` matches any value other than 'bar' in 'foo' header ``{"foo": "ba?"}`` matches 'ba' value followed by any character ``{"foo": "ba*"}`` matches 'ba' value followed by any substring (including empty) ``{"foo": "ba[rz]"}`` matches 'ba' value followed by 'r' or 'z' character ``{"foo": "ba[!rz]"}`` matches 'ba' value followed by any character other than 'r' or 'z' -``{"foo": "!ba[!rz]"}`` matches any value of 'foo' header that doesn't match to the "bar[!rz]" pattern ======================== ============================================================================== Filter logic can be used to fulfill specific use-cases: @@ -104,27 +102,78 @@ Filter logic can be used to fulfill specific use-cases: ``[]`` matches no tasks (no headers allowed). Can be used to turn off queue and consume tasks left. ``[{}]`` matches any task (no header conditions). Can be used to intercept all tasks incoming to Karton. ``[{"foo": "bar"}, {"foo": "baz"}]`` 'foo' header is required and must have 'bar' or 'baz' value. -``[{"foo": "!*"}]`` 'foo' header must be not defined. ==================================== ============================================================================== -Excluding (negated) filters come with specific corner-cases. Regular filters require specific value to be defined in header, while -negated filters are accepting all possible values except specified in filter. +.. versionadded:: 5.4.1 -================================================================================== ============================================================================================================================================= - ``filters`` value Meaning ----------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------------------- -``[{"type": "sample", "stage": "!*"}]`` matches only tasks that have type 'sample' but no 'stage' key -``[{"platform": "!linux"}, {"platform": "!windows"}]`` matches **all** tasks (even with no headers) but not these with platform 'linux' or 'windows' -``[{"foo": "bar", "platform": "!linux"}, {"foo": "bar", "platform": "!windows"}]`` 'foo' header is required and must have 'bar' value, but platform can't be 'linux' or 'windows' -``[{"foo": "bar", "platform": "!linux"}, {"foo": "baz", "platform": "!windows"}]`` 'foo' header is required and must have 'bar' value and no 'linux' in platform key, or foo must be 'baz', but then platform can't be 'windows' -================================================================================== ============================================================================================================================================= +Sometimes a more flexible behavior is necessary. This should be done with caution, as Karton can handle quite complex +workflows without resorting to this. The need to use complex task filtering rules may mean that one is doing something not in the "spirit" of Karton. + +The advanced filter syntax is based on MongoDB syntax. See `MongoDB documentation `_ +for a detailed explanation. + +In case of Karton, the following operators are allowed: + +- Comparison: :code:`$eq`, :code:`ne` :code:`$gt`, :code:`$gte`, :code:`$lt`, :code:`$lte` +- Logical: :code:`$and`, :code:`$or`, :code:`$not`, :code:`$nor` +- Array: :code:`$in`, :code:`$nin`, :code:`$all`, :code:`$elemMatch`, :code:`$size` +- Miscellaneous: :code:`$type`, :code:`$mod`, :code:`$regex`, :code:`$elemMatch` + +For some concrete examples, consider these filters: + +.. code-block:: python + + filters = [ + { # checks if `version` header is a number greater than 3 + "type": "sample", + "version": {"$gt": 3}, + }, + { # checks if `tags` header contain both "emotet" and "dimp" + "type": "sample", + "tags": {"$all": ["emotet", "dump"]}, + }, + { # checks if `platform` header is either "win32" or "linux" + "type": "sample", + "platform": {"$in": ["win32", "linux"]}, + }, + { # checks if `respect` header contains a prime number of letters "f" + "type": "sample", + "respect": {"$not": {"$regex": r"^f?$|^(ff+?)\1+$"}} + }, + ] .. warning:: - It's recommended to use only strings in filter and header values + Filter styles don't mix well, and wildcard patterns only work at the top level. + For example, the following won't work as expected: + + .. code-block:: python + + filters = [ + { "version": {"$or": ["win*", "linux*"]} }, + ] + + Instead you have to use regex explicitly: + + .. code-block:: python + + filters = [{ + "version": { + "$or": [ + {"$regex": "win*"}, + {"$regex": "linux*"}, + ], + } + ] + + Or just: + + .. code-block:: python + + filters = [ + { "version": {"$regex": "win*|linux*"} }, + ] - Although some of non-string types are allowed, they will be converted to string for comparison - which may lead to unexpected results. Task payload ------------ diff --git a/karton/core/karton.py b/karton/core/karton.py index a6d7e38f..5d819b9c 100644 --- a/karton/core/karton.py +++ b/karton/core/karton.py @@ -8,6 +8,7 @@ import traceback from typing import Any, Callable, Dict, List, Optional, Tuple, cast +from . import query from .__version__ import __version__ from .backend import KartonBackend, KartonBind, KartonMetrics from .base import KartonBase, KartonServiceBase @@ -122,6 +123,9 @@ def __init__( if self.filters is None: raise ValueError("Cannot bind consumer on Empty binds") + # Dummy conversion to make sure the filters are well-formed. + query.convert(self.filters) + self.persistent = ( self.config.getboolean("karton", "persistent", self.persistent) and not self.debug diff --git a/karton/core/query.py b/karton/core/query.py new file mode 100644 index 00000000..253f8e1a --- /dev/null +++ b/karton/core/query.py @@ -0,0 +1,350 @@ +import fnmatch +import re +from collections.abc import Mapping, Sequence +from typing import Dict, Type + +# Source code adopted from https://github.com/kapouille/mongoquery +# Original licenced under "The Unlicense" license. + + +class QueryError(Exception): + """Query error exception""" + + pass + + +class _Undefined(object): + pass + + +def is_non_string_sequence(entry): + """Returns True if entry is a Python sequence iterable, and not a string""" + return isinstance(entry, Sequence) and not isinstance(entry, str) + + +class Query(object): + """The Query class is used to match an object against a MongoDB-like query""" + + def __init__(self, definition): + self._definition = definition + + def match(self, entry): + """Matches the entry object against the query specified on instanciation""" + return self._match(self._definition, entry) + + def _match(self, condition, entry): + if isinstance(condition, Mapping): + return all( + self._process_condition(sub_operator, sub_condition, entry) + for sub_operator, sub_condition in condition.items() + ) + if is_non_string_sequence(entry): + return condition in entry + return condition == entry + + def _extract(self, entry, path): + if not path: + return entry + if entry is None: + return entry + if is_non_string_sequence(entry): + try: + index = int(path[0]) + return self._extract(entry[index], path[1:]) + except ValueError: + return [self._extract(item, path) for item in entry] + elif isinstance(entry, Mapping) and path[0] in entry: + return self._extract(entry[path[0]], path[1:]) + else: + return _Undefined() + + def _path_exists(self, operator, condition, entry): + keys_list = list(operator.split(".")) + for i, k in enumerate(keys_list): + if isinstance(entry, Sequence) and not k.isdigit(): + for elem in entry: + operator = ".".join(keys_list[i:]) + if self._path_exists(operator, condition, elem) == condition: + return condition + return not condition + elif isinstance(entry, Sequence): + k = int(k) + try: + entry = entry[k] + except (TypeError, IndexError, KeyError): + return not condition + return condition + + def _process_condition(self, operator, condition, entry): + if isinstance(condition, Mapping) and "$exists" in condition: + if isinstance(operator, str) and operator.find(".") != -1: + return self._path_exists(operator, condition["$exists"], entry) + elif condition["$exists"] != (operator in entry): + return False + elif tuple(condition.keys()) == ("$exists",): + return True + if isinstance(operator, str): + if operator.startswith("$"): + try: + return getattr(self, "_" + operator[1:])(condition, entry) + except AttributeError: + raise QueryError(f"{operator} operator isn't supported") + else: + try: + extracted_data = self._extract(entry, operator.split(".")) + except IndexError: + extracted_data = _Undefined() + else: + if operator not in entry: + return False + extracted_data = entry[operator] + return self._match(condition, extracted_data) + + @staticmethod + def _not_implemented(*_): + raise NotImplementedError + + @staticmethod + def _noop(*_): + return True + + @staticmethod + def _eq(condition, entry): + try: + return entry == condition + except TypeError: + return False + + @staticmethod + def _gt(condition, entry): + try: + return entry > condition + except TypeError: + return False + + @staticmethod + def _gte(condition, entry): + try: + return entry >= condition + except TypeError: + return False + + @staticmethod + def _in(condition, entry): + if is_non_string_sequence(condition): + for elem in condition: + if is_non_string_sequence(entry) and elem in entry: + return True + elif not is_non_string_sequence(entry) and elem == entry: + return True + return False + else: + raise TypeError("condition must be a list") + + @staticmethod + def _lt(condition, entry): + try: + return entry < condition + except TypeError: + return False + + @staticmethod + def _lte(condition, entry): + try: + return entry <= condition + except TypeError: + return False + + @staticmethod + def _ne(condition, entry): + return entry != condition + + def _nin(self, condition, entry): + return not self._in(condition, entry) + + def _and(self, condition, entry): + if isinstance(condition, Sequence): + return all(self._match(sub_condition, entry) for sub_condition in condition) + raise QueryError(f"$and has been attributed incorrect argument {condition}") + + def _nor(self, condition, entry): + if isinstance(condition, Sequence): + return all( + not self._match(sub_condition, entry) for sub_condition in condition + ) + raise QueryError(f"$nor has been attributed incorrect argument {condition}") + + def _not(self, condition, entry): + return not self._match(condition, entry) + + def _or(self, condition, entry): + if isinstance(condition, Sequence): + return any(self._match(sub_condition, entry) for sub_condition in condition) + raise QueryError(f"$or has been attributed incorrect argument {condition}") + + @staticmethod + def _type(condition, entry): + bson_type: Dict[int, Type] = { + 1: float, + 2: str, + 3: Mapping, + 4: Sequence, + 5: bytearray, + 7: str, # object id (uuid) + 8: bool, + 9: str, # date (UTC datetime) + 10: type(None), + 11: re.Pattern, # regex, + 13: str, # Javascript + 15: str, # JavaScript (with scope) + 16: int, # 32-bit integer + 17: int, # Timestamp + 18: int, # 64-bit integer + } + bson_alias = { + "double": 1, + "string": 2, + "object": 3, + "array": 4, + "binData": 5, + "objectId": 7, + "bool": 8, + "date": 9, + "null": 10, + "regex": 11, + "javascript": 13, + "javascriptWithScope": 15, + "int": 16, + "timestamp": 17, + "long": 18, + } + + if condition == "number": + return any( + [ + isinstance(entry, bson_type[bson_alias[alias]]) + for alias in ["double", "int", "long"] + ] + ) + + # resolves bson alias, or keeps original condition value + condition = bson_alias.get(condition, condition) + + if condition not in bson_type: + raise QueryError(f"$type has been used with unknown type {condition}") + + return isinstance(entry, bson_type[condition]) + + _exists = _noop + + @staticmethod + def _mod(condition, entry): + return entry % condition[0] == condition[1] + + @staticmethod + def _regex(condition, entry): + if not isinstance(entry, str): + return False + # If the caller has supplied a compiled regex, assume options are already + # included. + if isinstance(condition, re.Pattern): + return bool(re.search(condition, entry)) + + try: + regex = re.match(r"\A/(.+)/([imsx]{,4})\Z", condition, flags=re.DOTALL) + except TypeError: + raise QueryError( + f"{condition} is not a regular expression and should be a string" + ) + + flags = 0 + if regex: + options = regex.group(2) + for option in options: + flags |= getattr(re, option.upper()) + exp = regex.group(1) + else: + exp = condition + + try: + match = re.search(exp, entry, flags=flags) + except Exception as error: + raise QueryError(f"{condition} failed to execute with error {error!r}") + return bool(match) + + _options = _text = _where = _not_implemented + + def _all(self, condition, entry): + return all(self._match(item, entry) for item in condition) + + def _elemMatch(self, condition, entry): + if not isinstance(entry, Sequence): + return False + return any( + all( + self._process_condition(sub_operator, sub_condition, element) + for sub_operator, sub_condition in condition.items() + ) + for element in entry + ) + + @staticmethod + def _size(condition, entry): + if not isinstance(condition, int): + raise QueryError( + f"$size has been attributed incorrect argument {condition}" + ) + + if is_non_string_sequence(entry): + return len(entry) == condition + + return False + + def __repr__(self): + return f"" + + +def toregex(wildcard): + if not isinstance(wildcard, str): + raise QueryError(f"Unexpected value in the regex conversion: {wildcard}") + # If is not neessary, but we avoid unnecessary regular expressions. + if any(c in wildcard for c in "?*[]!"): + return {"$regex": fnmatch.translate(wildcard)} + return wildcard + + +def convert(filters): + """Convert filters to the mongo query syntax. + A special care is taken to handle old-style negative filters correctly + """ + # Negative_filters are old-style negative assertions, and behave differently. + # See issue #246 for the original bug report. + # + # For a short example: + # [{"platform": "!win32"}, {"platform": "!linux"}] + # will match all non-linux non-windows samples, but: + # [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}] + # means `platform != "win32" or "platform != "linux"` and will match everything. + # To get equivalent behaviour with mongo syntax, you should use: + # [{"platform": {"$not": {"$or": ["win32", "linux"]}}}] + regular_filter, negative_filter = [], [] + for rule in filters: + positive_checks, negative_checks = [], [] + for key, value in rule.items(): + if isinstance(value, str): + if value and value[0] == "!": # negative check + negative_checks.append({key: toregex(value[1:])}) + else: + positive_checks.append({key: toregex(value)}) + else: + positive_checks.append({key: value}) + regular_filter.append({"$and": positive_checks}) + negative_filter.append({"$and": positive_checks + [{"$or": negative_checks}]}) + return Query( + { + "$and": [ + {"$not": {"$or": negative_filter}}, + {"$or": regular_filter}, + ] + } + ) diff --git a/karton/core/task.py b/karton/core/task.py index 0c072d83..4d8c34bb 100644 --- a/karton/core/task.py +++ b/karton/core/task.py @@ -1,5 +1,4 @@ import enum -import fnmatch import json import time import uuid @@ -16,6 +15,7 @@ Union, ) +from . import query from .resource import RemoteResource, ResourceBase from .utils import recursive_iter, recursive_iter_with_keys, recursive_map @@ -223,75 +223,8 @@ def process(self, task: Task) -> None: return new_task def matches_filters(self, filters: List[Dict[str, Any]]) -> bool: - """ - Checks whether provided task headers match filters - - :param filters: Task header filters - :return: True if task headers match specific filters - - :meta private: - """ - - def test_filter(headers: Dict[str, Any], filter: Dict[str, Any]) -> int: - """ - Filter match follows AND logic, but it's non-boolean because filters may be - negated (task:!platform). - - Result values are as follows: - - 1 - positive match, no mismatched values in headers - (all matched) - - 0 - no match, found value that doesn't match to the filter - (some are not matched) - - -1 - negative match, found value that matches negated filter value - (all matched but found negative matches) - """ - matches = 1 - for filter_key, filter_value in filter.items(): - # Coerce filter value to string - filter_value_str = str(filter_value) - negated = False - if filter_value_str.startswith("!"): - negated = True - filter_value_str = filter_value_str[1:] - - # If expected key doesn't exist in headers - if filter_key not in headers: - # Negated filter ignores non-existent values - if negated: - continue - # But positive filter doesn't - return 0 - - # Coerce header value to string - header_value_str = str(headers[filter_key]) - # fnmatch is great for handling simple wildcard patterns (?, *, [abc]) - match = fnmatch.fnmatchcase(header_value_str, filter_value_str) - # If matches, but it's negated: it's negative match - if match and negated: - matches = -1 - # If doesn't match but filter is not negated: it's not a match - if not match and not negated: - return 0 - # If there are no mismatched values: filter is matched - return matches - - # List of filter matches follow OR logic, but -1 is special - # If there is any -1, result is False - # (any matched, but it's negative match) - # If there is any 1, but no -1's: result is True - # (any matched, no negative match) - # If there are only 0's: result is False - # (none matched) - matches = False - for task_filter in filters: - match_result = test_filter(self.headers, task_filter) - if match_result == -1: - # Any negative match results in False - return False - if match_result == 1: - # Any positive match but without negative matches results in True - matches = True - return matches + """Check if a task matches the given filters""" + return query.convert(filters).match(self.headers) def set_task_parent(self, parent: "Task"): """ diff --git a/karton/system/system.py b/karton/system/system.py index 0947d0e8..3d6b14b2 100644 --- a/karton/system/system.py +++ b/karton/system/system.py @@ -3,6 +3,7 @@ import time from typing import List, Optional +from karton.core import query from karton.core.__version__ import __version__ from karton.core.backend import ( KARTON_OPERATIONS_QUEUE, @@ -175,7 +176,12 @@ def route_task(self, task: Task, binds: List[KartonBind]) -> None: pipe = self.backend.make_pipeline() for bind in binds: identity = bind.identity - if task.matches_filters(bind.filters): + try: + is_match = task.matches_filters(bind.filters) + except query.QueryError: + self.log.error("Task matching failed - invalid filters?") + continue + if is_match: routed_task = task.fork_task() routed_task.status = TaskState.SPAWNED routed_task.last_update = time.time() diff --git a/tests/test_core.py b/tests/test_core.py index c560e290..831c534d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -38,7 +38,7 @@ def test_missing_config_file(self, mock_isfile, mock_parser): """ Test missing config file """ mock_isfile.return_value = False with self.assertRaises(IOError): - cfg = Config("this_file_doesnt_exist") + Config("this_file_doesnt_exist") @patch('os.path.isfile', lambda path: True) @patch('builtins.open', mock_open(read_data=MOCK_CONFIG)) diff --git a/tests/test_task_filters.py b/tests/test_task_filters.py index 2bd8599e..0a722870 100644 --- a/tests/test_task_filters.py +++ b/tests/test_task_filters.py @@ -261,3 +261,264 @@ def test_negated_filter_for_different_type(self): "platform": "win64" }) self.assertFalse(task_sample_win64.matches_filters(filters)) + + def test_list_contains(self): + filters = [ + { + "type": "sample", + "platform": {"$in": ["win32", "linux"]}, + }, + ] + + task_sample = Task(headers={ + "type": "sample", + "platform": "win32" + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_different_win32 = Task(headers={ + "type": "sample", + "platform": "linux" + }) + self.assertTrue(task_different_win32.matches_filters(filters)) + + task_different_win64 = Task(headers={ + "type": "different", + "platform": "win32" + }) + self.assertFalse(task_different_win64.matches_filters(filters)) + + def test_element_is_contained(self): + filters = [ + { + "type": "sample", + "tags": "emotet", + }, + ] + + task_sample = Task(headers={ + "type": "sample", + "tags": ["emotet"], + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "tags": ["emotet", "dump"], + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "tags": ["nymaim", "dump"], + }) + self.assertFalse(task_sample.matches_filters(filters)) + + def test_multiple_elements_are_contained(self): + filters = [ + { + "type": "sample", + "tags": {"$all": ["emotet", "dump"]}, + }, + ] + + task_sample = Task(headers={ + "type": "sample", + "tags": ["emotet"], + }) + self.assertFalse(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "tags": ["emotet", "dump"], + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "tags": ["emotet", "dump", "needs-inspection"], + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "tags": ["nymaim", "dump"], + }) + self.assertFalse(task_sample.matches_filters(filters)) + + def test_comparison(self): + filters = [ + { + "type": "sample", + "version": {"$gt": 3}, + }, + ] + + task_sample = Task(headers={ + "type": "sample", + "version": 2, + }) + self.assertFalse(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "version": 4, + }) + self.assertTrue(task_sample.matches_filters(filters)) + + def test_basic_wildcard(self): + filters = [ + { + "type": "sample", + "platform": "win*", + }, + ] + + task_sample = Task(headers={ + "type": "sample", + "platform": "linux", + }) + self.assertFalse(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "platform": "win32", + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "platform": "win", + }) + self.assertTrue(task_sample.matches_filters(filters)) + + def test_regex_match(self): + filters = [ + { + "type": "sample", + "platform": {"$regex": "win.*"} + }, + ] + + task_sample = Task(headers={ + "type": "sample", + "platform": "linux", + }) + self.assertFalse(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "platform": "win32", + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "platform": "win", + }) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "type": "sample", + "platform": "karton keeps on winning", + }) + # no anchors in the regex, so this should actually match + self.assertTrue(task_sample.matches_filters(filters)) + + def test_example_from_convert(self): + # Test for a literal example used in the convert method documentation + oldstyle = [{"platform": "!win32"}, {"platform": "!linux"}] + wrong = [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}] + good = [{"platform": {"$not": {"$or": ["win32", "linux"]}}}] + + task_linux = Task(headers={ + "type": "sample", + "platform": "linux", + }) + task_win32 = Task(headers={ + "type": "sample", + "platform": "win32", + }) + task_macos = Task(headers={ + "type": "sample", + "platform": "macos", + }) + tasks = [task_linux, task_win32, task_macos] + + def assertExpect(tasks, filters, results): + for task, result in zip(tasks, results): + self.assertEqual(task.matches_filters(filters), result) + + assertExpect(tasks, oldstyle, [False, False, True]) + assertExpect(tasks, wrong, [True, True, True]) + assertExpect(tasks, good, [False, False, True]) + + def test_nested_oldstyle(self): + # Old-style wildcards, except negative filters, don't mix + filters = [ + { + "platform": {"$or": ["win*", "linux*"]} + }, + ] + + task_sample = Task(headers={ + "platform": "linux", + }) + self.assertFalse(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "platform": "linux*", + }) + self.assertTrue(task_sample.matches_filters(filters)) + + def test_newstyle_flip(self): + # It's not recommended, but mongo syntax is allowed at the top level too + # Pointless example: match platform:win32 or kind:runnable + filters = [ + { + "$or": [{"platform": "win32"}, {"kind": "runnable"}], + }, + ] + + task_sample = Task( + headers={"platform": "linux", "kind": "runnable"} + ) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task( + headers={"platform": "win32"} + ) + self.assertTrue(task_sample.matches_filters(filters)) + + task_sample = Task( + headers={"platform": "linux"} + ) + self.assertFalse(task_sample.matches_filters(filters)) + + def test_oldstyle_wildcards(self): + # Old-style wildcards, except negative filters, don't mix + filters = [{"foo": "ba[!rz]"}] + + task_sample = Task(headers={ + "foo": "bar", + }) + self.assertFalse(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "foo": "bat", + }) + self.assertTrue(task_sample.matches_filters(filters)) + + def test_wildcards_anchored(self): + # Just to make sure matching is anchored at ^ and $. + filters = [{"foo": "bar"}] + + task_sample = Task(headers={ + "foo": "rabarbar", + }) + self.assertFalse(task_sample.matches_filters(filters)) + + task_sample = Task(headers={ + "foo": "bar", + }) + self.assertTrue(task_sample.matches_filters(filters))