From 82687d2d56f08d4eed874756bfe685bae6a9c9eb Mon Sep 17 00:00:00 2001 From: Sean Gillies Date: Mon, 11 Dec 2023 10:05:05 -0700 Subject: [PATCH] Add a module level flag indicating whether os.fork needs a patch Drop use of monkeypatch fixture for os.fork isolation. Add some docs and comments to the two new fixtures and rename the test module. --- tiledb/ctx.py | 42 ++++++++++- tiledb/tests/conftest.py | 26 ++++--- tiledb/tests/test_fork_ctx.py | 96 ++++++++++++++++++++++++ tiledb/tests/test_multiprocessing.py | 108 --------------------------- 4 files changed, 153 insertions(+), 119 deletions(-) create mode 100644 tiledb/tests/test_fork_ctx.py delete mode 100644 tiledb/tests/test_multiprocessing.py diff --git a/tiledb/ctx.py b/tiledb/ctx.py index c159c8477e..cad84b7d4a 100644 --- a/tiledb/ctx.py +++ b/tiledb/ctx.py @@ -10,6 +10,7 @@ _ctx_var = ContextVar("ctx") already_warned = False +_needs_fork_wrapper = sys.platform != "win32" and sys.version_info < (3, 12) class Config(lt.Config): @@ -345,6 +346,16 @@ def __init__(self, config: Config = None): self._set_default_tags() + # The core tiledb library uses threads and it's easy + # to experience deadlocks when forking a process that is using + # tiledb. The project doesn't have a solution for this at the + # moment other than to avoid using fork(), which is the same + # recommendation that Python makes. Python 3.12 warns if you + # fork() when multiple threads are detected and Python 3.14 will + # make it so you never accidentally fork(): multiprocessing will + # default to "spawn" on Linux. + _ensure_os_fork_wrap() + def __repr__(self): return "tiledb.Ctx() [see Ctx.config() for configuration]" @@ -439,7 +450,6 @@ def check_ipykernel_warn_once(): global already_warned if not already_warned: try: - import sys import warnings if "ipykernel" in sys.modules and tuple( @@ -521,7 +531,37 @@ def default_ctx(config: Union["Config", dict] = None) -> "Ctx": ctx = _ctx_var.get() if config is not None: raise tiledb.TileDBError("Global context already initialized!") + _ensure_os_fork_wrap() except LookupError: ctx = tiledb.Ctx(config) _ctx_var.set(ctx) return ctx + + +def _ensure_os_fork_wrap(): + global _needs_fork_wrapper + if _needs_fork_wrapper: + import os + import warnings + from functools import wraps + + def warning_wrapper(func): + @wraps(func) + def wrapper(): + warnings.warn( + "TileDB is a multithreading library and deadlocks " + "are likely if fork() is called after a TileDB " + "context has been created (such as for array " + "access). To safely use TileDB with " + "multiprocessing or concurrent.futures, choose " + "'spawn' as the start method for child processes. " + "For example: " + "multiprocessing.set_start_method('spawn').", + UserWarning, + ) + return func() + + return wrapper + + os.fork = warning_wrapper(os.fork) + _needs_fork_wrapper = False diff --git a/tiledb/tests/conftest.py b/tiledb/tests/conftest.py index 42b2808e39..99909c00f3 100644 --- a/tiledb/tests/conftest.py +++ b/tiledb/tests/conftest.py @@ -102,13 +102,19 @@ def __init__(self, params=None): @pytest.fixture(scope="function", autouse=True) -def isolate_os_fork(monkeypatch): - # Use monkeypatch to set an attribute to itself, what? - # This makes sure that before any test is run, we save the original - # value of os.fork, i.e. , and then - # restore it at the end of every test. Calling Ctx() may patch - # os.fork at runtime. - if sys.platform == "win32": - pass - else: - monkeypatch.setattr(os, "fork", os.fork) +def isolate_os_fork(original_os_fork): + """Guarantee that tests start and finish with no os.fork patch.""" + # Python 3.12 warns about fork() and threads. Tiledb only patches + # os.fork for Pythons 3.8-3.11. + if sys.platform != "win32" and sys.version_info < (3, 12): + tiledb.ctx._needs_fork_wrapper = True + os.fork = original_os_fork + yield + tiledb.ctx._needs_fork_wrapper = True + os.fork = original_os_fork + + +@pytest.fixture(scope="session") +def original_os_fork(): + """Provides the original unpatched os.fork.""" + return os.fork diff --git a/tiledb/tests/test_fork_ctx.py b/tiledb/tests/test_fork_ctx.py new file mode 100644 index 0000000000..b25d81c22d --- /dev/null +++ b/tiledb/tests/test_fork_ctx.py @@ -0,0 +1,96 @@ +"""Tests combining fork with tiledb context threads. + +Background: the core tiledb library uses threads and it's easy to +experience deadlocks when forking a process that is using tiledb. The +project doesn't have a solution for this at the moment other than to +avoid using fork(), which is the same recommendation that Python makes. +Python 3.12 warns if you fork() when multiple threads are detected and +Python 3.14 will make it so you never accidentally fork(): +multiprocessing will default to "spawn" on Linux. +""" + +import multiprocessing +import os +import sys +import warnings + +import pytest + +import tiledb + + +def test_no_warning_fork_without_ctx(): + """Get no warning if no tiledb context exists.""" + with warnings.catch_warnings(): + warnings.simplefilter("error") + pid = os.fork() + if pid == 0: + os._exit(0) + else: + os.wait() + + +@pytest.mark.skipif( + sys.platform == "win32", reason="fork() is not available on Windows" +) +def test_warning_fork_with_ctx(): + """Get a warning if we fork after creating a tiledb context.""" + _ = tiledb.Ctx() + with pytest.warns(UserWarning, match="TileDB is a multithreading library"): + pid = os.fork() + if pid == 0: + os._exit(0) + else: + os.wait() + + +@pytest.mark.skipif( + sys.platform == "win32", reason="fork() is not available on Windows" +) +def test_warning_fork_with_default_ctx(): + """Get a warning if we fork after creating a default context.""" + _ = tiledb.default_ctx() + with pytest.warns(UserWarning, match="TileDB is a multithreading library"): + pid = os.fork() + if pid == 0: + os._exit(0) + else: + os.wait() + + pass + + +def test_no_warning_multiprocessing_without_ctx(): + """Get no warning if no tiledb context exists.""" + with warnings.catch_warnings(): + warnings.simplefilter("error") + mp = multiprocessing.get_context("fork") + p = mp.Process() + p.start() + p.join() + + +@pytest.mark.skipif( + sys.platform == "win32", reason="fork() is not available on Windows" +) +def test_warning_multiprocessing_with_ctx(): + """Get a warning if we fork after creating a tiledb context.""" + _ = tiledb.Ctx() + mp = multiprocessing.get_context("fork") + p = mp.Process() + with pytest.warns(UserWarning, match="TileDB is a multithreading library"): + p.start() + p.join() + + +@pytest.mark.skipif( + sys.platform == "win32", reason="fork() is not available on Windows" +) +def test_warning_multiprocessing_with_default_ctx(): + """Get a warning if we fork after creating a default context.""" + _ = tiledb.default_ctx() + mp = multiprocessing.get_context("fork") + p = mp.Process() + with pytest.warns(UserWarning, match="TileDB is a multithreading library"): + p.start() + p.join() diff --git a/tiledb/tests/test_multiprocessing.py b/tiledb/tests/test_multiprocessing.py deleted file mode 100644 index 6253a66b1b..0000000000 --- a/tiledb/tests/test_multiprocessing.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Tests combining multiprocessing with threads""" - -import multiprocessing -import os -import sys -import warnings -from functools import wraps - -import pytest - - -def func(): - return 0 - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -def test_os_warn_at_fork(): - """Get a warning if we fork after importing tiledb.""" - # Background: the core tiledb library uses threads and it's easy to - # experience deadlocks when forking a process that is using tiledb. - # The project doesn't have a solution for this at the moment other - # than to avoid using fork(), which is the same recommendation that - # Python makes. Python 3.12 warns if you fork() when multiple - # threads are detected and Python 3.14 will make it so you never - # accidentally fork(): multiprocessing will default to "spawn" on - # Linux. - - def wrap_fork(): - os_fork = os.fork - - @wraps(os_fork) - def wrapper(): - warnings.warn( - "TileDB is a multithreading library and deadlocks are " - "likely if fork() is called after a TileDB context has " - "been created (such as for array access). " - "To safely use TileDB with multiprocessing or " - "concurrent.futures, choose 'spawn' as the start " - "method for child processes. For example: " - "multiprocessing.set_start_method('spawn').", - UserWarning, - ) - return os_fork() - - return wrapper - - import tiledb # noqa - - if sys.version_info < (3, 12): - os.fork = wrap_fork() - - with pytest.warns(UserWarning): - pid = os.fork() - - if pid == 0: - func() - os._exit(0) - else: - os.wait() - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -def test_multiprocessing_warn_at_fork(): - """Get a warning if we fork after importing tiledb.""" - # Background: the core tiledb library uses threads and it's easy to - # experience deadlocks when forking a process that is using tiledb. - # The project doesn't have a solution for this at the moment other - # than to avoid using fork(), which is the same recommendation that - # Python makes. Python 3.12 warns if you fork() when multiple - # threads are detected and Python 3.14 will make it so you never - # accidentally fork(): multiprocessing will default to "spawn" on - # Linux. - - def wrap_fork(): - os_fork = os.fork - - @wraps(os_fork) - def wrapper(): - warnings.warn( - "TileDB is a multithreading library and deadlocks are " - "likely if fork() is called after a TileDB context has " - "been created (such as for array access). " - "To safely use TileDB with multiprocessing or " - "concurrent.futures, choose 'spawn' as the start " - "method for child processes. For example: " - "multiprocessing.set_start_method('spawn').", - UserWarning, - ) - return os_fork() - - return wrapper - - import tiledb # noqa - - if sys.version_info < (3, 12): - os.fork = wrap_fork() - - ctx = multiprocessing.get_context("fork") - p = ctx.Process(target=func) - - with pytest.warns(UserWarning): - p.start() - - p.join()