From 89a885df4f0b71fd52991c79a40a9f966e4733d5 Mon Sep 17 00:00:00 2001
From: msm-cert <156842376+msm-cert@users.noreply.github.com>
Date: Tue, 10 Sep 2024 09:28:23 +0000
Subject: [PATCH] Implement mongodb query syntax for task filters (#258)
---
docs/advanced_concepts.rst | 73 +++++++
docs/task_headers_payloads.rst | 81 ++++++--
karton/core/karton.py | 4 +
karton/core/query.py | 350 +++++++++++++++++++++++++++++++++
karton/core/task.py | 73 +------
karton/system/system.py | 8 +-
tests/test_core.py | 2 +-
tests/test_task_filters.py | 261 ++++++++++++++++++++++++
8 files changed, 764 insertions(+), 88 deletions(-)
create mode 100644 karton/core/query.py
diff --git a/docs/advanced_concepts.rst b/docs/advanced_concepts.rst
index 3d75953d..a5e6ed42 100644
--- a/docs/advanced_concepts.rst
+++ b/docs/advanced_concepts.rst
@@ -246,3 +246,76 @@ You can enable it by setting:
- :code:`KARTON_KARTON_DEBUG` environment value to "1"
- :code:`debug` parameter to `1` in the :code:`[karton]` config section
- :code:`--debug` command-line parameter
+
+
+Negated filter patterns
+-----------------------
+
+.. versionadded:: 5.4.1
+
+There is one more pattern syntax, not documented in the :code:`Filter Patterns` section anymore.
+It is possible to define a negated filter, and they are handled in a special way. For example let's consider following filters:
+
+.. code-block:: python
+
+ # Special ("old style") negation
+ [
+ {"foo": "bar", "platform": "!linux"},
+ {"foo": "bar", "platform": "!windows"},
+ ]
+
+Depending on how you think this should work, this may have a surprising behavior. In particular this is **not** equivalent to:
+
+.. code-block:: python
+
+ # Regular ("new style") negation (this is intentionally WRONG, see below)
+ [
+ {"foo": "bar", "platform": {"$not": "linux"}},
+ {"foo": "bar", "platform": {"$not": "windows"}},
+ ]
+
+That's because negated "old style" filters are handled in a very special way, but :code:`$not` is not. Let's use the following task as an example:
+
+.. code-block:: python
+
+ {
+ "foo": "bar",
+ "platform": "linux"
+ }
+
+Recall that filters are checked top to bottom, and if at least one pattern matches, the task will be accepted by a consumer.
+Using regular ("new style") patterns, the matching will proceed as follows:
+
+- Check against the first filter: :code:`foo` matches, but the filter explicitly rejects tasks with :code:`platform: linux`.
+- Check against the second filter: :code:`foo` matches, and the platform - :code:`linux` - is not equal to to :code:`windows`, so the task is accepted.
+
+Whoops! This is probably not what the programmer intended. In comparison, "old style" filters will always reject a task if it matches at least one negated filter.
+This sounds nice, but as every special case may cause unpleasant surprised. This is especially true when combining "old style" and "new style" patterns.
+That's why it's currently recommended to only use "new style" filters - they do everything "old style" filters can, and much more.
+
+In this case, the proper way to get the desired behavior with "new-style" filters is:
+
+.. code-block:: python
+
+ # Regular ("new style") negation
+ [
+ {
+ "foo": "bar",
+ "platform": {"$not": {"$or": ["linux", "windows"]}},,
+ }
+ ]
+
+It's a bit more verbose, but at least it should be very clear what is happening: We want :code:`foo` equal to :code:`bar`, and :code:`platform` **not** equal to either :code:`windows` or :code:`linux`.
+In this case there are no special cases, and matching checks every filter top to bottom independently, as usual.
+
+.. warning::
+
+ "Old style" negations are only supported at the top-level! Combining them with "new style" filters will not work. Exclamation mark is not considered a special character in this case.
+
+ In fact, we're not even sure how :code:`{"$or": ["!windows", "!linux"]}` *should* behave.
+
+.. note::
+
+ Since "new style" patterns were introduced in Karton version 5.4.1, "old style" negations are not recommended and should be considered deprecated.
+
+ Nevertheless, Karton still supports them and they will keep working indefinitely. So don't worry, there are no breaking changes here.
diff --git a/docs/task_headers_payloads.rst b/docs/task_headers_payloads.rst
index 38fbe862..a79813c9 100644
--- a/docs/task_headers_payloads.rst
+++ b/docs/task_headers_payloads.rst
@@ -88,12 +88,10 @@ Starting from 5.0.0, consumer filters support basic wildcards and exclusions.
Pattern Meaning
------------------------ ------------------------------------------------------------------------------
``{"foo": "bar"}`` matches 'bar' value of 'foo' header
-``{"foo": "!bar"}`` matches any value other than 'bar' in 'foo' header
``{"foo": "ba?"}`` matches 'ba' value followed by any character
``{"foo": "ba*"}`` matches 'ba' value followed by any substring (including empty)
``{"foo": "ba[rz]"}`` matches 'ba' value followed by 'r' or 'z' character
``{"foo": "ba[!rz]"}`` matches 'ba' value followed by any character other than 'r' or 'z'
-``{"foo": "!ba[!rz]"}`` matches any value of 'foo' header that doesn't match to the "bar[!rz]" pattern
======================== ==============================================================================
Filter logic can be used to fulfill specific use-cases:
@@ -104,27 +102,78 @@ Filter logic can be used to fulfill specific use-cases:
``[]`` matches no tasks (no headers allowed). Can be used to turn off queue and consume tasks left.
``[{}]`` matches any task (no header conditions). Can be used to intercept all tasks incoming to Karton.
``[{"foo": "bar"}, {"foo": "baz"}]`` 'foo' header is required and must have 'bar' or 'baz' value.
-``[{"foo": "!*"}]`` 'foo' header must be not defined.
==================================== ==============================================================================
-Excluding (negated) filters come with specific corner-cases. Regular filters require specific value to be defined in header, while
-negated filters are accepting all possible values except specified in filter.
+.. versionadded:: 5.4.1
-================================================================================== =============================================================================================================================================
- ``filters`` value Meaning
----------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------------------------------------
-``[{"type": "sample", "stage": "!*"}]`` matches only tasks that have type 'sample' but no 'stage' key
-``[{"platform": "!linux"}, {"platform": "!windows"}]`` matches **all** tasks (even with no headers) but not these with platform 'linux' or 'windows'
-``[{"foo": "bar", "platform": "!linux"}, {"foo": "bar", "platform": "!windows"}]`` 'foo' header is required and must have 'bar' value, but platform can't be 'linux' or 'windows'
-``[{"foo": "bar", "platform": "!linux"}, {"foo": "baz", "platform": "!windows"}]`` 'foo' header is required and must have 'bar' value and no 'linux' in platform key, or foo must be 'baz', but then platform can't be 'windows'
-================================================================================== =============================================================================================================================================
+Sometimes a more flexible behavior is necessary. This should be done with caution, as Karton can handle quite complex
+workflows without resorting to this. The need to use complex task filtering rules may mean that one is doing something not in the "spirit" of Karton.
+
+The advanced filter syntax is based on MongoDB syntax. See `MongoDB documentation `_
+for a detailed explanation.
+
+In case of Karton, the following operators are allowed:
+
+- Comparison: :code:`$eq`, :code:`ne` :code:`$gt`, :code:`$gte`, :code:`$lt`, :code:`$lte`
+- Logical: :code:`$and`, :code:`$or`, :code:`$not`, :code:`$nor`
+- Array: :code:`$in`, :code:`$nin`, :code:`$all`, :code:`$elemMatch`, :code:`$size`
+- Miscellaneous: :code:`$type`, :code:`$mod`, :code:`$regex`, :code:`$elemMatch`
+
+For some concrete examples, consider these filters:
+
+.. code-block:: python
+
+ filters = [
+ { # checks if `version` header is a number greater than 3
+ "type": "sample",
+ "version": {"$gt": 3},
+ },
+ { # checks if `tags` header contain both "emotet" and "dimp"
+ "type": "sample",
+ "tags": {"$all": ["emotet", "dump"]},
+ },
+ { # checks if `platform` header is either "win32" or "linux"
+ "type": "sample",
+ "platform": {"$in": ["win32", "linux"]},
+ },
+ { # checks if `respect` header contains a prime number of letters "f"
+ "type": "sample",
+ "respect": {"$not": {"$regex": r"^f?$|^(ff+?)\1+$"}}
+ },
+ ]
.. warning::
- It's recommended to use only strings in filter and header values
+ Filter styles don't mix well, and wildcard patterns only work at the top level.
+ For example, the following won't work as expected:
+
+ .. code-block:: python
+
+ filters = [
+ { "version": {"$or": ["win*", "linux*"]} },
+ ]
+
+ Instead you have to use regex explicitly:
+
+ .. code-block:: python
+
+ filters = [{
+ "version": {
+ "$or": [
+ {"$regex": "win*"},
+ {"$regex": "linux*"},
+ ],
+ }
+ ]
+
+ Or just:
+
+ .. code-block:: python
+
+ filters = [
+ { "version": {"$regex": "win*|linux*"} },
+ ]
- Although some of non-string types are allowed, they will be converted to string for comparison
- which may lead to unexpected results.
Task payload
------------
diff --git a/karton/core/karton.py b/karton/core/karton.py
index a6d7e38f..5d819b9c 100644
--- a/karton/core/karton.py
+++ b/karton/core/karton.py
@@ -8,6 +8,7 @@
import traceback
from typing import Any, Callable, Dict, List, Optional, Tuple, cast
+from . import query
from .__version__ import __version__
from .backend import KartonBackend, KartonBind, KartonMetrics
from .base import KartonBase, KartonServiceBase
@@ -122,6 +123,9 @@ def __init__(
if self.filters is None:
raise ValueError("Cannot bind consumer on Empty binds")
+ # Dummy conversion to make sure the filters are well-formed.
+ query.convert(self.filters)
+
self.persistent = (
self.config.getboolean("karton", "persistent", self.persistent)
and not self.debug
diff --git a/karton/core/query.py b/karton/core/query.py
new file mode 100644
index 00000000..253f8e1a
--- /dev/null
+++ b/karton/core/query.py
@@ -0,0 +1,350 @@
+import fnmatch
+import re
+from collections.abc import Mapping, Sequence
+from typing import Dict, Type
+
+# Source code adopted from https://github.com/kapouille/mongoquery
+# Original licenced under "The Unlicense" license.
+
+
+class QueryError(Exception):
+ """Query error exception"""
+
+ pass
+
+
+class _Undefined(object):
+ pass
+
+
+def is_non_string_sequence(entry):
+ """Returns True if entry is a Python sequence iterable, and not a string"""
+ return isinstance(entry, Sequence) and not isinstance(entry, str)
+
+
+class Query(object):
+ """The Query class is used to match an object against a MongoDB-like query"""
+
+ def __init__(self, definition):
+ self._definition = definition
+
+ def match(self, entry):
+ """Matches the entry object against the query specified on instanciation"""
+ return self._match(self._definition, entry)
+
+ def _match(self, condition, entry):
+ if isinstance(condition, Mapping):
+ return all(
+ self._process_condition(sub_operator, sub_condition, entry)
+ for sub_operator, sub_condition in condition.items()
+ )
+ if is_non_string_sequence(entry):
+ return condition in entry
+ return condition == entry
+
+ def _extract(self, entry, path):
+ if not path:
+ return entry
+ if entry is None:
+ return entry
+ if is_non_string_sequence(entry):
+ try:
+ index = int(path[0])
+ return self._extract(entry[index], path[1:])
+ except ValueError:
+ return [self._extract(item, path) for item in entry]
+ elif isinstance(entry, Mapping) and path[0] in entry:
+ return self._extract(entry[path[0]], path[1:])
+ else:
+ return _Undefined()
+
+ def _path_exists(self, operator, condition, entry):
+ keys_list = list(operator.split("."))
+ for i, k in enumerate(keys_list):
+ if isinstance(entry, Sequence) and not k.isdigit():
+ for elem in entry:
+ operator = ".".join(keys_list[i:])
+ if self._path_exists(operator, condition, elem) == condition:
+ return condition
+ return not condition
+ elif isinstance(entry, Sequence):
+ k = int(k)
+ try:
+ entry = entry[k]
+ except (TypeError, IndexError, KeyError):
+ return not condition
+ return condition
+
+ def _process_condition(self, operator, condition, entry):
+ if isinstance(condition, Mapping) and "$exists" in condition:
+ if isinstance(operator, str) and operator.find(".") != -1:
+ return self._path_exists(operator, condition["$exists"], entry)
+ elif condition["$exists"] != (operator in entry):
+ return False
+ elif tuple(condition.keys()) == ("$exists",):
+ return True
+ if isinstance(operator, str):
+ if operator.startswith("$"):
+ try:
+ return getattr(self, "_" + operator[1:])(condition, entry)
+ except AttributeError:
+ raise QueryError(f"{operator} operator isn't supported")
+ else:
+ try:
+ extracted_data = self._extract(entry, operator.split("."))
+ except IndexError:
+ extracted_data = _Undefined()
+ else:
+ if operator not in entry:
+ return False
+ extracted_data = entry[operator]
+ return self._match(condition, extracted_data)
+
+ @staticmethod
+ def _not_implemented(*_):
+ raise NotImplementedError
+
+ @staticmethod
+ def _noop(*_):
+ return True
+
+ @staticmethod
+ def _eq(condition, entry):
+ try:
+ return entry == condition
+ except TypeError:
+ return False
+
+ @staticmethod
+ def _gt(condition, entry):
+ try:
+ return entry > condition
+ except TypeError:
+ return False
+
+ @staticmethod
+ def _gte(condition, entry):
+ try:
+ return entry >= condition
+ except TypeError:
+ return False
+
+ @staticmethod
+ def _in(condition, entry):
+ if is_non_string_sequence(condition):
+ for elem in condition:
+ if is_non_string_sequence(entry) and elem in entry:
+ return True
+ elif not is_non_string_sequence(entry) and elem == entry:
+ return True
+ return False
+ else:
+ raise TypeError("condition must be a list")
+
+ @staticmethod
+ def _lt(condition, entry):
+ try:
+ return entry < condition
+ except TypeError:
+ return False
+
+ @staticmethod
+ def _lte(condition, entry):
+ try:
+ return entry <= condition
+ except TypeError:
+ return False
+
+ @staticmethod
+ def _ne(condition, entry):
+ return entry != condition
+
+ def _nin(self, condition, entry):
+ return not self._in(condition, entry)
+
+ def _and(self, condition, entry):
+ if isinstance(condition, Sequence):
+ return all(self._match(sub_condition, entry) for sub_condition in condition)
+ raise QueryError(f"$and has been attributed incorrect argument {condition}")
+
+ def _nor(self, condition, entry):
+ if isinstance(condition, Sequence):
+ return all(
+ not self._match(sub_condition, entry) for sub_condition in condition
+ )
+ raise QueryError(f"$nor has been attributed incorrect argument {condition}")
+
+ def _not(self, condition, entry):
+ return not self._match(condition, entry)
+
+ def _or(self, condition, entry):
+ if isinstance(condition, Sequence):
+ return any(self._match(sub_condition, entry) for sub_condition in condition)
+ raise QueryError(f"$or has been attributed incorrect argument {condition}")
+
+ @staticmethod
+ def _type(condition, entry):
+ bson_type: Dict[int, Type] = {
+ 1: float,
+ 2: str,
+ 3: Mapping,
+ 4: Sequence,
+ 5: bytearray,
+ 7: str, # object id (uuid)
+ 8: bool,
+ 9: str, # date (UTC datetime)
+ 10: type(None),
+ 11: re.Pattern, # regex,
+ 13: str, # Javascript
+ 15: str, # JavaScript (with scope)
+ 16: int, # 32-bit integer
+ 17: int, # Timestamp
+ 18: int, # 64-bit integer
+ }
+ bson_alias = {
+ "double": 1,
+ "string": 2,
+ "object": 3,
+ "array": 4,
+ "binData": 5,
+ "objectId": 7,
+ "bool": 8,
+ "date": 9,
+ "null": 10,
+ "regex": 11,
+ "javascript": 13,
+ "javascriptWithScope": 15,
+ "int": 16,
+ "timestamp": 17,
+ "long": 18,
+ }
+
+ if condition == "number":
+ return any(
+ [
+ isinstance(entry, bson_type[bson_alias[alias]])
+ for alias in ["double", "int", "long"]
+ ]
+ )
+
+ # resolves bson alias, or keeps original condition value
+ condition = bson_alias.get(condition, condition)
+
+ if condition not in bson_type:
+ raise QueryError(f"$type has been used with unknown type {condition}")
+
+ return isinstance(entry, bson_type[condition])
+
+ _exists = _noop
+
+ @staticmethod
+ def _mod(condition, entry):
+ return entry % condition[0] == condition[1]
+
+ @staticmethod
+ def _regex(condition, entry):
+ if not isinstance(entry, str):
+ return False
+ # If the caller has supplied a compiled regex, assume options are already
+ # included.
+ if isinstance(condition, re.Pattern):
+ return bool(re.search(condition, entry))
+
+ try:
+ regex = re.match(r"\A/(.+)/([imsx]{,4})\Z", condition, flags=re.DOTALL)
+ except TypeError:
+ raise QueryError(
+ f"{condition} is not a regular expression and should be a string"
+ )
+
+ flags = 0
+ if regex:
+ options = regex.group(2)
+ for option in options:
+ flags |= getattr(re, option.upper())
+ exp = regex.group(1)
+ else:
+ exp = condition
+
+ try:
+ match = re.search(exp, entry, flags=flags)
+ except Exception as error:
+ raise QueryError(f"{condition} failed to execute with error {error!r}")
+ return bool(match)
+
+ _options = _text = _where = _not_implemented
+
+ def _all(self, condition, entry):
+ return all(self._match(item, entry) for item in condition)
+
+ def _elemMatch(self, condition, entry):
+ if not isinstance(entry, Sequence):
+ return False
+ return any(
+ all(
+ self._process_condition(sub_operator, sub_condition, element)
+ for sub_operator, sub_condition in condition.items()
+ )
+ for element in entry
+ )
+
+ @staticmethod
+ def _size(condition, entry):
+ if not isinstance(condition, int):
+ raise QueryError(
+ f"$size has been attributed incorrect argument {condition}"
+ )
+
+ if is_non_string_sequence(entry):
+ return len(entry) == condition
+
+ return False
+
+ def __repr__(self):
+ return f""
+
+
+def toregex(wildcard):
+ if not isinstance(wildcard, str):
+ raise QueryError(f"Unexpected value in the regex conversion: {wildcard}")
+ # If is not neessary, but we avoid unnecessary regular expressions.
+ if any(c in wildcard for c in "?*[]!"):
+ return {"$regex": fnmatch.translate(wildcard)}
+ return wildcard
+
+
+def convert(filters):
+ """Convert filters to the mongo query syntax.
+ A special care is taken to handle old-style negative filters correctly
+ """
+ # Negative_filters are old-style negative assertions, and behave differently.
+ # See issue #246 for the original bug report.
+ #
+ # For a short example:
+ # [{"platform": "!win32"}, {"platform": "!linux"}]
+ # will match all non-linux non-windows samples, but:
+ # [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}]
+ # means `platform != "win32" or "platform != "linux"` and will match everything.
+ # To get equivalent behaviour with mongo syntax, you should use:
+ # [{"platform": {"$not": {"$or": ["win32", "linux"]}}}]
+ regular_filter, negative_filter = [], []
+ for rule in filters:
+ positive_checks, negative_checks = [], []
+ for key, value in rule.items():
+ if isinstance(value, str):
+ if value and value[0] == "!": # negative check
+ negative_checks.append({key: toregex(value[1:])})
+ else:
+ positive_checks.append({key: toregex(value)})
+ else:
+ positive_checks.append({key: value})
+ regular_filter.append({"$and": positive_checks})
+ negative_filter.append({"$and": positive_checks + [{"$or": negative_checks}]})
+ return Query(
+ {
+ "$and": [
+ {"$not": {"$or": negative_filter}},
+ {"$or": regular_filter},
+ ]
+ }
+ )
diff --git a/karton/core/task.py b/karton/core/task.py
index 0c072d83..4d8c34bb 100644
--- a/karton/core/task.py
+++ b/karton/core/task.py
@@ -1,5 +1,4 @@
import enum
-import fnmatch
import json
import time
import uuid
@@ -16,6 +15,7 @@
Union,
)
+from . import query
from .resource import RemoteResource, ResourceBase
from .utils import recursive_iter, recursive_iter_with_keys, recursive_map
@@ -223,75 +223,8 @@ def process(self, task: Task) -> None:
return new_task
def matches_filters(self, filters: List[Dict[str, Any]]) -> bool:
- """
- Checks whether provided task headers match filters
-
- :param filters: Task header filters
- :return: True if task headers match specific filters
-
- :meta private:
- """
-
- def test_filter(headers: Dict[str, Any], filter: Dict[str, Any]) -> int:
- """
- Filter match follows AND logic, but it's non-boolean because filters may be
- negated (task:!platform).
-
- Result values are as follows:
- - 1 - positive match, no mismatched values in headers
- (all matched)
- - 0 - no match, found value that doesn't match to the filter
- (some are not matched)
- - -1 - negative match, found value that matches negated filter value
- (all matched but found negative matches)
- """
- matches = 1
- for filter_key, filter_value in filter.items():
- # Coerce filter value to string
- filter_value_str = str(filter_value)
- negated = False
- if filter_value_str.startswith("!"):
- negated = True
- filter_value_str = filter_value_str[1:]
-
- # If expected key doesn't exist in headers
- if filter_key not in headers:
- # Negated filter ignores non-existent values
- if negated:
- continue
- # But positive filter doesn't
- return 0
-
- # Coerce header value to string
- header_value_str = str(headers[filter_key])
- # fnmatch is great for handling simple wildcard patterns (?, *, [abc])
- match = fnmatch.fnmatchcase(header_value_str, filter_value_str)
- # If matches, but it's negated: it's negative match
- if match and negated:
- matches = -1
- # If doesn't match but filter is not negated: it's not a match
- if not match and not negated:
- return 0
- # If there are no mismatched values: filter is matched
- return matches
-
- # List of filter matches follow OR logic, but -1 is special
- # If there is any -1, result is False
- # (any matched, but it's negative match)
- # If there is any 1, but no -1's: result is True
- # (any matched, no negative match)
- # If there are only 0's: result is False
- # (none matched)
- matches = False
- for task_filter in filters:
- match_result = test_filter(self.headers, task_filter)
- if match_result == -1:
- # Any negative match results in False
- return False
- if match_result == 1:
- # Any positive match but without negative matches results in True
- matches = True
- return matches
+ """Check if a task matches the given filters"""
+ return query.convert(filters).match(self.headers)
def set_task_parent(self, parent: "Task"):
"""
diff --git a/karton/system/system.py b/karton/system/system.py
index 0947d0e8..3d6b14b2 100644
--- a/karton/system/system.py
+++ b/karton/system/system.py
@@ -3,6 +3,7 @@
import time
from typing import List, Optional
+from karton.core import query
from karton.core.__version__ import __version__
from karton.core.backend import (
KARTON_OPERATIONS_QUEUE,
@@ -175,7 +176,12 @@ def route_task(self, task: Task, binds: List[KartonBind]) -> None:
pipe = self.backend.make_pipeline()
for bind in binds:
identity = bind.identity
- if task.matches_filters(bind.filters):
+ try:
+ is_match = task.matches_filters(bind.filters)
+ except query.QueryError:
+ self.log.error("Task matching failed - invalid filters?")
+ continue
+ if is_match:
routed_task = task.fork_task()
routed_task.status = TaskState.SPAWNED
routed_task.last_update = time.time()
diff --git a/tests/test_core.py b/tests/test_core.py
index c560e290..831c534d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -38,7 +38,7 @@ def test_missing_config_file(self, mock_isfile, mock_parser):
""" Test missing config file """
mock_isfile.return_value = False
with self.assertRaises(IOError):
- cfg = Config("this_file_doesnt_exist")
+ Config("this_file_doesnt_exist")
@patch('os.path.isfile', lambda path: True)
@patch('builtins.open', mock_open(read_data=MOCK_CONFIG))
diff --git a/tests/test_task_filters.py b/tests/test_task_filters.py
index 2bd8599e..0a722870 100644
--- a/tests/test_task_filters.py
+++ b/tests/test_task_filters.py
@@ -261,3 +261,264 @@ def test_negated_filter_for_different_type(self):
"platform": "win64"
})
self.assertFalse(task_sample_win64.matches_filters(filters))
+
+ def test_list_contains(self):
+ filters = [
+ {
+ "type": "sample",
+ "platform": {"$in": ["win32", "linux"]},
+ },
+ ]
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "win32"
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_different_win32 = Task(headers={
+ "type": "sample",
+ "platform": "linux"
+ })
+ self.assertTrue(task_different_win32.matches_filters(filters))
+
+ task_different_win64 = Task(headers={
+ "type": "different",
+ "platform": "win32"
+ })
+ self.assertFalse(task_different_win64.matches_filters(filters))
+
+ def test_element_is_contained(self):
+ filters = [
+ {
+ "type": "sample",
+ "tags": "emotet",
+ },
+ ]
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "tags": ["emotet"],
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "tags": ["emotet", "dump"],
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "tags": ["nymaim", "dump"],
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ def test_multiple_elements_are_contained(self):
+ filters = [
+ {
+ "type": "sample",
+ "tags": {"$all": ["emotet", "dump"]},
+ },
+ ]
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "tags": ["emotet"],
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "tags": ["emotet", "dump"],
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "tags": ["emotet", "dump", "needs-inspection"],
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "tags": ["nymaim", "dump"],
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ def test_comparison(self):
+ filters = [
+ {
+ "type": "sample",
+ "version": {"$gt": 3},
+ },
+ ]
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "version": 2,
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "version": 4,
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ def test_basic_wildcard(self):
+ filters = [
+ {
+ "type": "sample",
+ "platform": "win*",
+ },
+ ]
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "linux",
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "win32",
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "win",
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ def test_regex_match(self):
+ filters = [
+ {
+ "type": "sample",
+ "platform": {"$regex": "win.*"}
+ },
+ ]
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "linux",
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "win32",
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "win",
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "type": "sample",
+ "platform": "karton keeps on winning",
+ })
+ # no anchors in the regex, so this should actually match
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ def test_example_from_convert(self):
+ # Test for a literal example used in the convert method documentation
+ oldstyle = [{"platform": "!win32"}, {"platform": "!linux"}]
+ wrong = [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}]
+ good = [{"platform": {"$not": {"$or": ["win32", "linux"]}}}]
+
+ task_linux = Task(headers={
+ "type": "sample",
+ "platform": "linux",
+ })
+ task_win32 = Task(headers={
+ "type": "sample",
+ "platform": "win32",
+ })
+ task_macos = Task(headers={
+ "type": "sample",
+ "platform": "macos",
+ })
+ tasks = [task_linux, task_win32, task_macos]
+
+ def assertExpect(tasks, filters, results):
+ for task, result in zip(tasks, results):
+ self.assertEqual(task.matches_filters(filters), result)
+
+ assertExpect(tasks, oldstyle, [False, False, True])
+ assertExpect(tasks, wrong, [True, True, True])
+ assertExpect(tasks, good, [False, False, True])
+
+ def test_nested_oldstyle(self):
+ # Old-style wildcards, except negative filters, don't mix
+ filters = [
+ {
+ "platform": {"$or": ["win*", "linux*"]}
+ },
+ ]
+
+ task_sample = Task(headers={
+ "platform": "linux",
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "platform": "linux*",
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ def test_newstyle_flip(self):
+ # It's not recommended, but mongo syntax is allowed at the top level too
+ # Pointless example: match platform:win32 or kind:runnable
+ filters = [
+ {
+ "$or": [{"platform": "win32"}, {"kind": "runnable"}],
+ },
+ ]
+
+ task_sample = Task(
+ headers={"platform": "linux", "kind": "runnable"}
+ )
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(
+ headers={"platform": "win32"}
+ )
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ task_sample = Task(
+ headers={"platform": "linux"}
+ )
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ def test_oldstyle_wildcards(self):
+ # Old-style wildcards, except negative filters, don't mix
+ filters = [{"foo": "ba[!rz]"}]
+
+ task_sample = Task(headers={
+ "foo": "bar",
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "foo": "bat",
+ })
+ self.assertTrue(task_sample.matches_filters(filters))
+
+ def test_wildcards_anchored(self):
+ # Just to make sure matching is anchored at ^ and $.
+ filters = [{"foo": "bar"}]
+
+ task_sample = Task(headers={
+ "foo": "rabarbar",
+ })
+ self.assertFalse(task_sample.matches_filters(filters))
+
+ task_sample = Task(headers={
+ "foo": "bar",
+ })
+ self.assertTrue(task_sample.matches_filters(filters))