From 7d39829088de1e3cdd1486f5e4fc906b85073cc7 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 12 Dec 2024 21:58:48 +0100 Subject: [PATCH] Walk: enforce `MAX_STAT` during ScanDirectory() As a quick kludge, this commit creates a copy of ScanDirectory() as a coroutine so we can `co_await resume_stat` (we stillneed the synchronous version for method Start()). The coroutine copy can then enforce `MAX_STAT` to limit the io_uring statx() concurrency; this is necessary because cache directories with millions of cached files would cause the kernel to consume gigabytes of memory and exceed our memory limit. --- debian/changelog | 2 +- src/Walk.cxx | 24 +++++++++++++++++++++++- src/Walk.hxx | 1 + 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 6a766cc..8fa3b27 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,6 @@ cm4all-cash (0.5) unstable; urgency=low - * + * throttle scanning huge directories (reduce memory usage) -- diff --git a/src/Walk.cxx b/src/Walk.cxx index 37d8ddc..7e2fb90 100644 --- a/src/Walk.cxx +++ b/src/Walk.cxx @@ -138,12 +138,34 @@ IsSpecialFilename(const char *s) noexcept inline void Walk::ScanDirectory(WalkDirectory &directory, UniqueFileDescriptor &&fd) +{ + // TODO eliminate this method, use only CoScanDirectory + + DirectoryReader r{std::move(fd)}; + while (const char *name = r.Read()) { + if (IsSpecialFilename(name)) + continue; + + auto *item = new StatItem(*this, directory, name); + stat.push_back(*item); + + item->Start(uring); + } +} + +inline Co::Task +Walk::CoScanDirectory(WalkDirectory &directory, UniqueFileDescriptor &&fd) { DirectoryReader r{std::move(fd)}; while (const char *name = r.Read()) { if (IsSpecialFilename(name)) continue; + /* throttle if there are too many concurrent statx + system calls */ + while (stat.size() > MAX_STAT) [[unlikely]] + co_await resume_stat; + auto *item = new StatItem(*this, directory, name); stat.push_back(*item); @@ -159,7 +181,7 @@ try { *new WalkDirectory(uring, parent, co_await Uring::CoOpen(uring, parent.fd, name.c_str(), O_PATH|O_DIRECTORY, 0)), }; - ScanDirectory(*directory, co_await Uring::CoOpen(uring, directory->fd, ".", O_DIRECTORY, 0)); + co_await CoScanDirectory(*directory, co_await Uring::CoOpen(uring, directory->fd, ".", O_DIRECTORY, 0)); } catch (...) { fmt::print(stderr, "Failed to scan directory: {}\n", std::current_exception()); } diff --git a/src/Walk.hxx b/src/Walk.hxx index 9e9c66d..783e155 100644 --- a/src/Walk.hxx +++ b/src/Walk.hxx @@ -75,6 +75,7 @@ private: FileTime atime, uint_least64_t size); void ScanDirectory(WalkDirectory &directory, UniqueFileDescriptor &&fd); + Co::Task CoScanDirectory(WalkDirectory &directory, UniqueFileDescriptor &&fd); void OnStatCompletion(StatItem &item) noexcept; };