From 89a885df4f0b71fd52991c79a40a9f966e4733d5 Mon Sep 17 00:00:00 2001
From: msm-cert <156842376+msm-cert@users.noreply.github.com>
Date: Tue, 10 Sep 2024 09:28:23 +0000
Subject: [PATCH] Implement mongodb query syntax for task filters (#258)

---
 docs/advanced_concepts.rst     |  73 +++++++
 docs/task_headers_payloads.rst |  81 ++++++--
 karton/core/karton.py          |   4 +
 karton/core/query.py           | 350 +++++++++++++++++++++++++++++++++
 karton/core/task.py            |  73 +------
 karton/system/system.py        |   8 +-
 tests/test_core.py             |   2 +-
 tests/test_task_filters.py     | 261 ++++++++++++++++++++++++
 8 files changed, 764 insertions(+), 88 deletions(-)
 create mode 100644 karton/core/query.py

diff --git a/docs/advanced_concepts.rst b/docs/advanced_concepts.rst
index 3d75953d..a5e6ed42 100644
--- a/docs/advanced_concepts.rst
+++ b/docs/advanced_concepts.rst
@@ -246,3 +246,76 @@ You can enable it by setting:
 - :code:`KARTON_KARTON_DEBUG` environment value to "1"
 - :code:`debug` parameter to `1` in the :code:`[karton]` config section
 - :code:`--debug` command-line parameter
+
+
+Negated filter patterns
+-----------------------
+
+.. versionadded:: 5.4.1
+
+There is one more pattern syntax, not documented in the :code:`Filter Patterns` section anymore.
+It is possible to define a negated filter, and they are handled in a special way. For example let's consider following filters:
+
+.. code-block:: python
+
+    # Special ("old style") negation
+    [
+        {"foo": "bar", "platform": "!linux"},
+        {"foo": "bar", "platform": "!windows"},
+    ]
+
+Depending on how you think this should work, this may have a surprising behavior. In particular this is **not** equivalent to:
+
+.. code-block:: python
+
+    # Regular ("new style") negation (this is intentionally WRONG, see below)
+    [
+        {"foo": "bar", "platform": {"$not": "linux"}},
+        {"foo": "bar", "platform": {"$not": "windows"}},
+    ]
+
+That's because negated "old style" filters are handled in a very special way, but :code:`$not` is not. Let's use the following task as an example:
+
+.. code-block:: python
+
+    {
+        "foo": "bar",
+        "platform": "linux"
+    }
+
+Recall that filters are checked top to bottom, and if at least one pattern matches, the task will be accepted by a consumer.
+Using regular ("new style") patterns, the matching will proceed as follows:
+
+- Check against the first filter: :code:`foo` matches, but the filter explicitly rejects tasks with :code:`platform: linux`. 
+- Check against the second filter: :code:`foo` matches, and the platform - :code:`linux` - is not equal to to :code:`windows`, so the task is accepted.
+
+Whoops! This is probably not what the programmer intended. In comparison, "old style" filters will always reject a task if it matches at least one negated filter.
+This sounds nice, but as every special case may cause unpleasant surprised. This is especially true when combining "old style" and "new style" patterns.
+That's why it's currently recommended to only use "new style" filters - they do everything "old style" filters can, and much more.
+
+In this case, the proper way to get the desired behavior with "new-style" filters is:
+
+.. code-block:: python
+
+    # Regular ("new style") negation
+    [
+        {
+            "foo": "bar",
+            "platform": {"$not": {"$or": ["linux", "windows"]}},,
+        }
+    ]
+
+It's a bit more verbose, but at least it should be very clear what is happening: We want :code:`foo` equal to :code:`bar`, and :code:`platform` **not** equal to either :code:`windows` or :code:`linux`.
+In this case there are no special cases, and matching checks every filter top to bottom independently, as usual.
+
+.. warning::
+
+    "Old style" negations are only supported at the top-level! Combining them with "new style" filters will not work. Exclamation mark is not considered a special character in this case.
+
+    In fact, we're not even sure how :code:`{"$or": ["!windows", "!linux"]}` *should* behave.
+
+.. note::
+    
+    Since "new style" patterns were introduced in Karton version 5.4.1, "old style" negations are not recommended and should be considered deprecated.
+
+    Nevertheless, Karton still supports them and they will keep working indefinitely. So don't worry, there are no breaking changes here.
diff --git a/docs/task_headers_payloads.rst b/docs/task_headers_payloads.rst
index 38fbe862..a79813c9 100644
--- a/docs/task_headers_payloads.rst
+++ b/docs/task_headers_payloads.rst
@@ -88,12 +88,10 @@ Starting from 5.0.0, consumer filters support basic wildcards and exclusions.
        Pattern                                           Meaning
 ------------------------  ------------------------------------------------------------------------------
 ``{"foo": "bar"}``        matches 'bar' value of 'foo' header
-``{"foo": "!bar"}``       matches any value other than 'bar' in 'foo' header
 ``{"foo": "ba?"}``        matches 'ba' value followed by any character
 ``{"foo": "ba*"}``        matches 'ba' value followed by any substring (including empty)
 ``{"foo": "ba[rz]"}``     matches 'ba' value followed by 'r' or 'z' character
 ``{"foo": "ba[!rz]"}``    matches 'ba' value followed by any character other than 'r' or 'z'
-``{"foo": "!ba[!rz]"}``   matches any value of 'foo' header that doesn't match to the "bar[!rz]" pattern
 ========================  ==============================================================================
 
 Filter logic can be used to fulfill specific use-cases:
@@ -104,27 +102,78 @@ Filter logic can be used to fulfill specific use-cases:
 ``[]``                                matches no tasks (no headers allowed). Can be used to turn off queue and consume tasks left.
 ``[{}]``                              matches any task (no header conditions). Can be used to intercept all tasks incoming to Karton.
 ``[{"foo": "bar"}, {"foo": "baz"}]``  'foo' header is required and must have 'bar' or 'baz' value.
-``[{"foo": "!*"}]``                   'foo' header must be not defined.
 ====================================  ==============================================================================
 
-Excluding (negated) filters come with specific corner-cases. Regular filters require specific value to be defined in header, while
-negated filters are accepting all possible values except specified in filter.
+.. versionadded:: 5.4.1
 
-==================================================================================  =============================================================================================================================================
-   ``filters`` value                                                                  Meaning
-----------------------------------------------------------------------------------  ---------------------------------------------------------------------------------------------------------------------------------------------
-``[{"type": "sample", "stage": "!*"}]``                                             matches only tasks that have type 'sample' but no 'stage' key
-``[{"platform": "!linux"}, {"platform": "!windows"}]``                              matches **all** tasks (even with no headers) but not these with platform 'linux' or 'windows'
-``[{"foo": "bar", "platform": "!linux"}, {"foo": "bar", "platform": "!windows"}]``  'foo' header is required and must have 'bar' value, but platform can't be 'linux' or 'windows'
-``[{"foo": "bar", "platform": "!linux"}, {"foo": "baz", "platform": "!windows"}]``  'foo' header is required and must have 'bar' value and no 'linux' in platform key, or foo must be 'baz', but then platform can't be 'windows'
-==================================================================================  =============================================================================================================================================
+Sometimes a more flexible behavior is necessary. This should be done with caution, as Karton can handle quite complex
+workflows without resorting to this. The need to use complex task filtering rules may mean that one is doing something not in the "spirit" of Karton.
+
+The advanced filter syntax is based on MongoDB syntax. See `MongoDB documentation <https://www.mongodb.com/docs/manual/reference/operator/query/>`_
+for a detailed explanation.
+
+In case of Karton, the following operators are allowed:
+
+- Comparison: :code:`$eq`, :code:`ne` :code:`$gt`, :code:`$gte`, :code:`$lt`, :code:`$lte` 
+- Logical: :code:`$and`, :code:`$or`, :code:`$not`, :code:`$nor`
+- Array: :code:`$in`, :code:`$nin`, :code:`$all`, :code:`$elemMatch`, :code:`$size`
+- Miscellaneous: :code:`$type`, :code:`$mod`, :code:`$regex`, :code:`$elemMatch`
+
+For some concrete examples, consider these filters:
+
+.. code-block:: python
+
+    filters = [
+        {  # checks if `version` header is a number greater than 3
+            "type": "sample",
+            "version": {"$gt": 3},
+        },
+        {  # checks if `tags` header contain both "emotet" and "dimp"
+            "type": "sample",
+            "tags": {"$all": ["emotet", "dump"]},
+        },
+        {  # checks if `platform` header is either "win32" or "linux"
+            "type": "sample",
+            "platform": {"$in": ["win32", "linux"]},
+        },
+        {  # checks if `respect` header contains a prime number of letters "f"
+            "type": "sample",
+            "respect": {"$not": {"$regex": r"^f?$|^(ff+?)\1+$"}}
+        },
+    ]
 
 .. warning::
 
-    It's recommended to use only strings in filter and header values
+    Filter styles don't mix well, and wildcard patterns only work at the top level.
+    For example, the following won't work as expected:
+
+    .. code-block:: python
+
+        filters = [
+            { "version": {"$or": ["win*", "linux*"]} },
+        ]
+
+    Instead you have to use regex explicitly: 
+
+    .. code-block:: python
+
+        filters = [{
+            "version": {
+                "$or": [
+                    {"$regex": "win*"},
+                    {"$regex": "linux*"},
+                ],
+            }
+        ]
+
+    Or just:
+
+    .. code-block:: python
+
+        filters = [
+            { "version": {"$regex": "win*|linux*"} },
+        ]
 
-    Although some of non-string types are allowed, they will be converted to string for comparison
-    which may lead to unexpected results.
 
 Task payload
 ------------
diff --git a/karton/core/karton.py b/karton/core/karton.py
index a6d7e38f..5d819b9c 100644
--- a/karton/core/karton.py
+++ b/karton/core/karton.py
@@ -8,6 +8,7 @@
 import traceback
 from typing import Any, Callable, Dict, List, Optional, Tuple, cast
 
+from . import query
 from .__version__ import __version__
 from .backend import KartonBackend, KartonBind, KartonMetrics
 from .base import KartonBase, KartonServiceBase
@@ -122,6 +123,9 @@ def __init__(
         if self.filters is None:
             raise ValueError("Cannot bind consumer on Empty binds")
 
+        # Dummy conversion to make sure the filters are well-formed.
+        query.convert(self.filters)
+
         self.persistent = (
             self.config.getboolean("karton", "persistent", self.persistent)
             and not self.debug
diff --git a/karton/core/query.py b/karton/core/query.py
new file mode 100644
index 00000000..253f8e1a
--- /dev/null
+++ b/karton/core/query.py
@@ -0,0 +1,350 @@
+import fnmatch
+import re
+from collections.abc import Mapping, Sequence
+from typing import Dict, Type
+
+# Source code adopted from https://github.com/kapouille/mongoquery
+# Original licenced under "The Unlicense" license.
+
+
+class QueryError(Exception):
+    """Query error exception"""
+
+    pass
+
+
+class _Undefined(object):
+    pass
+
+
+def is_non_string_sequence(entry):
+    """Returns True if entry is a Python sequence iterable, and not a string"""
+    return isinstance(entry, Sequence) and not isinstance(entry, str)
+
+
+class Query(object):
+    """The Query class is used to match an object against a MongoDB-like query"""
+
+    def __init__(self, definition):
+        self._definition = definition
+
+    def match(self, entry):
+        """Matches the entry object against the query specified on instanciation"""
+        return self._match(self._definition, entry)
+
+    def _match(self, condition, entry):
+        if isinstance(condition, Mapping):
+            return all(
+                self._process_condition(sub_operator, sub_condition, entry)
+                for sub_operator, sub_condition in condition.items()
+            )
+        if is_non_string_sequence(entry):
+            return condition in entry
+        return condition == entry
+
+    def _extract(self, entry, path):
+        if not path:
+            return entry
+        if entry is None:
+            return entry
+        if is_non_string_sequence(entry):
+            try:
+                index = int(path[0])
+                return self._extract(entry[index], path[1:])
+            except ValueError:
+                return [self._extract(item, path) for item in entry]
+        elif isinstance(entry, Mapping) and path[0] in entry:
+            return self._extract(entry[path[0]], path[1:])
+        else:
+            return _Undefined()
+
+    def _path_exists(self, operator, condition, entry):
+        keys_list = list(operator.split("."))
+        for i, k in enumerate(keys_list):
+            if isinstance(entry, Sequence) and not k.isdigit():
+                for elem in entry:
+                    operator = ".".join(keys_list[i:])
+                    if self._path_exists(operator, condition, elem) == condition:
+                        return condition
+                return not condition
+            elif isinstance(entry, Sequence):
+                k = int(k)
+            try:
+                entry = entry[k]
+            except (TypeError, IndexError, KeyError):
+                return not condition
+        return condition
+
+    def _process_condition(self, operator, condition, entry):
+        if isinstance(condition, Mapping) and "$exists" in condition:
+            if isinstance(operator, str) and operator.find(".") != -1:
+                return self._path_exists(operator, condition["$exists"], entry)
+            elif condition["$exists"] != (operator in entry):
+                return False
+            elif tuple(condition.keys()) == ("$exists",):
+                return True
+        if isinstance(operator, str):
+            if operator.startswith("$"):
+                try:
+                    return getattr(self, "_" + operator[1:])(condition, entry)
+                except AttributeError:
+                    raise QueryError(f"{operator} operator isn't supported")
+            else:
+                try:
+                    extracted_data = self._extract(entry, operator.split("."))
+                except IndexError:
+                    extracted_data = _Undefined()
+        else:
+            if operator not in entry:
+                return False
+            extracted_data = entry[operator]
+        return self._match(condition, extracted_data)
+
+    @staticmethod
+    def _not_implemented(*_):
+        raise NotImplementedError
+
+    @staticmethod
+    def _noop(*_):
+        return True
+
+    @staticmethod
+    def _eq(condition, entry):
+        try:
+            return entry == condition
+        except TypeError:
+            return False
+
+    @staticmethod
+    def _gt(condition, entry):
+        try:
+            return entry > condition
+        except TypeError:
+            return False
+
+    @staticmethod
+    def _gte(condition, entry):
+        try:
+            return entry >= condition
+        except TypeError:
+            return False
+
+    @staticmethod
+    def _in(condition, entry):
+        if is_non_string_sequence(condition):
+            for elem in condition:
+                if is_non_string_sequence(entry) and elem in entry:
+                    return True
+                elif not is_non_string_sequence(entry) and elem == entry:
+                    return True
+            return False
+        else:
+            raise TypeError("condition must be a list")
+
+    @staticmethod
+    def _lt(condition, entry):
+        try:
+            return entry < condition
+        except TypeError:
+            return False
+
+    @staticmethod
+    def _lte(condition, entry):
+        try:
+            return entry <= condition
+        except TypeError:
+            return False
+
+    @staticmethod
+    def _ne(condition, entry):
+        return entry != condition
+
+    def _nin(self, condition, entry):
+        return not self._in(condition, entry)
+
+    def _and(self, condition, entry):
+        if isinstance(condition, Sequence):
+            return all(self._match(sub_condition, entry) for sub_condition in condition)
+        raise QueryError(f"$and has been attributed incorrect argument {condition}")
+
+    def _nor(self, condition, entry):
+        if isinstance(condition, Sequence):
+            return all(
+                not self._match(sub_condition, entry) for sub_condition in condition
+            )
+        raise QueryError(f"$nor has been attributed incorrect argument {condition}")
+
+    def _not(self, condition, entry):
+        return not self._match(condition, entry)
+
+    def _or(self, condition, entry):
+        if isinstance(condition, Sequence):
+            return any(self._match(sub_condition, entry) for sub_condition in condition)
+        raise QueryError(f"$or has been attributed incorrect argument {condition}")
+
+    @staticmethod
+    def _type(condition, entry):
+        bson_type: Dict[int, Type] = {
+            1: float,
+            2: str,
+            3: Mapping,
+            4: Sequence,
+            5: bytearray,
+            7: str,  # object id (uuid)
+            8: bool,
+            9: str,  # date (UTC datetime)
+            10: type(None),
+            11: re.Pattern,  # regex,
+            13: str,  # Javascript
+            15: str,  # JavaScript (with scope)
+            16: int,  # 32-bit integer
+            17: int,  # Timestamp
+            18: int,  # 64-bit integer
+        }
+        bson_alias = {
+            "double": 1,
+            "string": 2,
+            "object": 3,
+            "array": 4,
+            "binData": 5,
+            "objectId": 7,
+            "bool": 8,
+            "date": 9,
+            "null": 10,
+            "regex": 11,
+            "javascript": 13,
+            "javascriptWithScope": 15,
+            "int": 16,
+            "timestamp": 17,
+            "long": 18,
+        }
+
+        if condition == "number":
+            return any(
+                [
+                    isinstance(entry, bson_type[bson_alias[alias]])
+                    for alias in ["double", "int", "long"]
+                ]
+            )
+
+        # resolves bson alias, or keeps original condition value
+        condition = bson_alias.get(condition, condition)
+
+        if condition not in bson_type:
+            raise QueryError(f"$type has been used with unknown type {condition}")
+
+        return isinstance(entry, bson_type[condition])
+
+    _exists = _noop
+
+    @staticmethod
+    def _mod(condition, entry):
+        return entry % condition[0] == condition[1]
+
+    @staticmethod
+    def _regex(condition, entry):
+        if not isinstance(entry, str):
+            return False
+        # If the caller has supplied a compiled regex, assume options are already
+        # included.
+        if isinstance(condition, re.Pattern):
+            return bool(re.search(condition, entry))
+
+        try:
+            regex = re.match(r"\A/(.+)/([imsx]{,4})\Z", condition, flags=re.DOTALL)
+        except TypeError:
+            raise QueryError(
+                f"{condition} is not a regular expression and should be a string"
+            )
+
+        flags = 0
+        if regex:
+            options = regex.group(2)
+            for option in options:
+                flags |= getattr(re, option.upper())
+            exp = regex.group(1)
+        else:
+            exp = condition
+
+        try:
+            match = re.search(exp, entry, flags=flags)
+        except Exception as error:
+            raise QueryError(f"{condition} failed to execute with error {error!r}")
+        return bool(match)
+
+    _options = _text = _where = _not_implemented
+
+    def _all(self, condition, entry):
+        return all(self._match(item, entry) for item in condition)
+
+    def _elemMatch(self, condition, entry):
+        if not isinstance(entry, Sequence):
+            return False
+        return any(
+            all(
+                self._process_condition(sub_operator, sub_condition, element)
+                for sub_operator, sub_condition in condition.items()
+            )
+            for element in entry
+        )
+
+    @staticmethod
+    def _size(condition, entry):
+        if not isinstance(condition, int):
+            raise QueryError(
+                f"$size has been attributed incorrect argument {condition}"
+            )
+
+        if is_non_string_sequence(entry):
+            return len(entry) == condition
+
+        return False
+
+    def __repr__(self):
+        return f"<Query({self._definition})>"
+
+
+def toregex(wildcard):
+    if not isinstance(wildcard, str):
+        raise QueryError(f"Unexpected value in the regex conversion: {wildcard}")
+    # If is not neessary, but we avoid unnecessary regular expressions.
+    if any(c in wildcard for c in "?*[]!"):
+        return {"$regex": fnmatch.translate(wildcard)}
+    return wildcard
+
+
+def convert(filters):
+    """Convert filters to the mongo query syntax.
+    A special care is taken to handle old-style negative filters correctly
+    """
+    # Negative_filters are old-style negative assertions, and behave differently.
+    # See issue #246 for the original bug report.
+    #
+    # For a short example:
+    # [{"platform": "!win32"}, {"platform": "!linux"}]
+    # will match all non-linux non-windows samples, but:
+    # [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}]
+    # means `platform != "win32" or "platform != "linux"` and will match everything.
+    # To get equivalent behaviour with mongo syntax, you should use:
+    # [{"platform": {"$not": {"$or": ["win32", "linux"]}}}]
+    regular_filter, negative_filter = [], []
+    for rule in filters:
+        positive_checks, negative_checks = [], []
+        for key, value in rule.items():
+            if isinstance(value, str):
+                if value and value[0] == "!":  # negative check
+                    negative_checks.append({key: toregex(value[1:])})
+                else:
+                    positive_checks.append({key: toregex(value)})
+            else:
+                positive_checks.append({key: value})
+        regular_filter.append({"$and": positive_checks})
+        negative_filter.append({"$and": positive_checks + [{"$or": negative_checks}]})
+    return Query(
+        {
+            "$and": [
+                {"$not": {"$or": negative_filter}},
+                {"$or": regular_filter},
+            ]
+        }
+    )
diff --git a/karton/core/task.py b/karton/core/task.py
index 0c072d83..4d8c34bb 100644
--- a/karton/core/task.py
+++ b/karton/core/task.py
@@ -1,5 +1,4 @@
 import enum
-import fnmatch
 import json
 import time
 import uuid
@@ -16,6 +15,7 @@
     Union,
 )
 
+from . import query
 from .resource import RemoteResource, ResourceBase
 from .utils import recursive_iter, recursive_iter_with_keys, recursive_map
 
@@ -223,75 +223,8 @@ def process(self, task: Task) -> None:
         return new_task
 
     def matches_filters(self, filters: List[Dict[str, Any]]) -> bool:
-        """
-        Checks whether provided task headers match filters
-
-        :param filters: Task header filters
-        :return: True if task headers match specific filters
-
-        :meta private:
-        """
-
-        def test_filter(headers: Dict[str, Any], filter: Dict[str, Any]) -> int:
-            """
-            Filter match follows AND logic, but it's non-boolean because filters may be
-            negated (task:!platform).
-
-            Result values are as follows:
-            - 1  - positive match, no mismatched values in headers
-                   (all matched)
-            - 0  - no match, found value that doesn't match to the filter
-                   (some are not matched)
-            - -1 - negative match, found value that matches negated filter value
-                   (all matched but found negative matches)
-            """
-            matches = 1
-            for filter_key, filter_value in filter.items():
-                # Coerce filter value to string
-                filter_value_str = str(filter_value)
-                negated = False
-                if filter_value_str.startswith("!"):
-                    negated = True
-                    filter_value_str = filter_value_str[1:]
-
-                # If expected key doesn't exist in headers
-                if filter_key not in headers:
-                    # Negated filter ignores non-existent values
-                    if negated:
-                        continue
-                    # But positive filter doesn't
-                    return 0
-
-                # Coerce header value to string
-                header_value_str = str(headers[filter_key])
-                # fnmatch is great for handling simple wildcard patterns (?, *, [abc])
-                match = fnmatch.fnmatchcase(header_value_str, filter_value_str)
-                # If matches, but it's negated: it's negative match
-                if match and negated:
-                    matches = -1
-                # If doesn't match but filter is not negated: it's not a match
-                if not match and not negated:
-                    return 0
-            # If there are no mismatched values: filter is matched
-            return matches
-
-        # List of filter matches follow OR logic, but -1 is special
-        # If there is any -1, result is False
-        #   (any matched, but it's negative match)
-        # If there is any 1, but no -1's: result is True
-        #   (any matched, no negative match)
-        # If there are only 0's: result is False
-        #   (none matched)
-        matches = False
-        for task_filter in filters:
-            match_result = test_filter(self.headers, task_filter)
-            if match_result == -1:
-                # Any negative match results in False
-                return False
-            if match_result == 1:
-                # Any positive match but without negative matches results in True
-                matches = True
-        return matches
+        """Check if a task matches the given filters"""
+        return query.convert(filters).match(self.headers)
 
     def set_task_parent(self, parent: "Task"):
         """
diff --git a/karton/system/system.py b/karton/system/system.py
index 0947d0e8..3d6b14b2 100644
--- a/karton/system/system.py
+++ b/karton/system/system.py
@@ -3,6 +3,7 @@
 import time
 from typing import List, Optional
 
+from karton.core import query
 from karton.core.__version__ import __version__
 from karton.core.backend import (
     KARTON_OPERATIONS_QUEUE,
@@ -175,7 +176,12 @@ def route_task(self, task: Task, binds: List[KartonBind]) -> None:
         pipe = self.backend.make_pipeline()
         for bind in binds:
             identity = bind.identity
-            if task.matches_filters(bind.filters):
+            try:
+                is_match = task.matches_filters(bind.filters)
+            except query.QueryError:
+                self.log.error("Task matching failed - invalid filters?")
+                continue
+            if is_match:
                 routed_task = task.fork_task()
                 routed_task.status = TaskState.SPAWNED
                 routed_task.last_update = time.time()
diff --git a/tests/test_core.py b/tests/test_core.py
index c560e290..831c534d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -38,7 +38,7 @@ def test_missing_config_file(self, mock_isfile, mock_parser):
         """ Test missing config file """
         mock_isfile.return_value = False
         with self.assertRaises(IOError):
-            cfg = Config("this_file_doesnt_exist")
+            Config("this_file_doesnt_exist")
 
     @patch('os.path.isfile', lambda path: True)
     @patch('builtins.open', mock_open(read_data=MOCK_CONFIG))
diff --git a/tests/test_task_filters.py b/tests/test_task_filters.py
index 2bd8599e..0a722870 100644
--- a/tests/test_task_filters.py
+++ b/tests/test_task_filters.py
@@ -261,3 +261,264 @@ def test_negated_filter_for_different_type(self):
             "platform": "win64"
         })
         self.assertFalse(task_sample_win64.matches_filters(filters))
+
+    def test_list_contains(self):
+        filters = [
+            {
+                "type": "sample",
+                "platform": {"$in": ["win32", "linux"]},
+            },
+        ]
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "win32"
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_different_win32 = Task(headers={
+            "type": "sample",
+            "platform": "linux"
+        })
+        self.assertTrue(task_different_win32.matches_filters(filters))
+
+        task_different_win64 = Task(headers={
+            "type": "different",
+            "platform": "win32"
+        })
+        self.assertFalse(task_different_win64.matches_filters(filters))
+
+    def test_element_is_contained(self):
+        filters = [
+            {
+                "type": "sample",
+                "tags": "emotet",
+            },
+        ]
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "tags": ["emotet"],
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "tags": ["emotet", "dump"],
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "tags": ["nymaim", "dump"],
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+    def test_multiple_elements_are_contained(self):
+        filters = [
+            {
+                "type": "sample",
+                "tags": {"$all": ["emotet", "dump"]},
+            },
+        ]
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "tags": ["emotet"],
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "tags": ["emotet", "dump"],
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "tags": ["emotet", "dump", "needs-inspection"],
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "tags": ["nymaim", "dump"],
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+    def test_comparison(self):
+        filters = [
+            {
+                "type": "sample",
+                "version": {"$gt": 3},
+            },
+        ]
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "version": 2,
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "version": 4,
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+    def test_basic_wildcard(self):
+        filters = [
+            {
+                "type": "sample",
+                "platform": "win*",
+            },
+        ]
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "linux",
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "win32",
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "win",
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+    def test_regex_match(self):
+        filters = [
+            {
+                "type": "sample",
+                "platform": {"$regex": "win.*"}
+            },
+        ]
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "linux",
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "win32",
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "win",
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "karton keeps on winning",
+        })
+        # no anchors in the regex, so this should actually match
+        self.assertTrue(task_sample.matches_filters(filters))
+
+    def test_example_from_convert(self):
+        # Test for a literal example used in the convert method documentation
+        oldstyle = [{"platform": "!win32"}, {"platform": "!linux"}]
+        wrong = [{"platform": {"$not": "win32"}}, {"platform": {"$not": "linux"}}]
+        good = [{"platform": {"$not": {"$or": ["win32", "linux"]}}}]
+
+        task_linux = Task(headers={
+            "type": "sample",
+            "platform": "linux",
+        })
+        task_win32 = Task(headers={
+            "type": "sample",
+            "platform": "win32",
+        })
+        task_macos = Task(headers={
+            "type": "sample",
+            "platform": "macos",
+        })
+        tasks = [task_linux, task_win32, task_macos]
+
+        def assertExpect(tasks, filters, results):
+            for task, result in zip(tasks, results):
+                self.assertEqual(task.matches_filters(filters), result)
+
+        assertExpect(tasks, oldstyle, [False, False, True])
+        assertExpect(tasks, wrong, [True, True, True])
+        assertExpect(tasks, good, [False, False, True])
+
+    def test_nested_oldstyle(self):
+        # Old-style wildcards, except negative filters, don't mix
+        filters = [
+            {
+                "platform": {"$or": ["win*", "linux*"]}
+            },
+        ]
+
+        task_sample = Task(headers={
+            "platform": "linux",
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "platform": "linux*",
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+    def test_newstyle_flip(self):
+        # It's not recommended, but mongo syntax is allowed at the top level too
+        # Pointless example: match platform:win32 or kind:runnable
+        filters = [
+            {
+                "$or": [{"platform": "win32"}, {"kind": "runnable"}],
+            },
+        ]
+
+        task_sample = Task(
+            headers={"platform": "linux", "kind": "runnable"}
+        )
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(
+            headers={"platform": "win32"}
+        )
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_sample = Task(
+            headers={"platform": "linux"}
+        )
+        self.assertFalse(task_sample.matches_filters(filters))
+
+    def test_oldstyle_wildcards(self):
+        # Old-style wildcards, except negative filters, don't mix
+        filters = [{"foo": "ba[!rz]"}]
+
+        task_sample = Task(headers={
+            "foo": "bar",
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "foo": "bat",
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+    def test_wildcards_anchored(self):
+        # Just to make sure matching is anchored at ^ and $.
+        filters = [{"foo": "bar"}]
+
+        task_sample = Task(headers={
+            "foo": "rabarbar",
+        })
+        self.assertFalse(task_sample.matches_filters(filters))
+
+        task_sample = Task(headers={
+            "foo": "bar",
+        })
+        self.assertTrue(task_sample.matches_filters(filters))