-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
linux-mainline 3.8: backport sync(2) fix from 3.11
Signed-off-by: Koen Kooi <[email protected]>
- Loading branch information
Showing
2 changed files
with
115 additions
and
0 deletions.
There are no files selected for viewing
114 changes: 114 additions & 0 deletions
114
...nux/linux-mainline-3.8/fixes/0001-sync-don-t-block-the-flusher-thread-waiting-on-IO.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
From 4e3f4e9a8f2076743265cb4e76e3d26827325d5a Mon Sep 17 00:00:00 2001 | ||
From: Dave Chinner <[email protected]> | ||
Date: Tue, 2 Jul 2013 22:38:35 +1000 | ||
Subject: [PATCH] sync: don't block the flusher thread waiting on IO | ||
|
||
When sync does it's WB_SYNC_ALL writeback, it issues data Io and | ||
then immediately waits for IO completion. This is done in the | ||
context of the flusher thread, and hence completely ties up the | ||
flusher thread for the backing device until all the dirty inodes | ||
have been synced. On filesystems that are dirtying inodes constantly | ||
and quickly, this means the flusher thread can be tied up for | ||
minutes per sync call and hence badly affect system level write IO | ||
performance as the page cache cannot be cleaned quickly. | ||
|
||
We already have a wait loop for IO completion for sync(2), so cut | ||
this out of the flusher thread and delegate it to wait_sb_inodes(). | ||
Hence we can do rapid IO submission, and then wait for it all to | ||
complete. | ||
|
||
Effect of sync on fsmark before the patch: | ||
|
||
FSUse% Count Size Files/sec App Overhead | ||
..... | ||
0 640000 4096 35154.6 1026984 | ||
0 720000 4096 36740.3 1023844 | ||
0 800000 4096 36184.6 916599 | ||
0 880000 4096 1282.7 1054367 | ||
0 960000 4096 3951.3 918773 | ||
0 1040000 4096 40646.2 996448 | ||
0 1120000 4096 43610.1 895647 | ||
0 1200000 4096 40333.1 921048 | ||
|
||
And a single sync pass took: | ||
|
||
real 0m52.407s | ||
user 0m0.000s | ||
sys 0m0.090s | ||
|
||
After the patch, there is no impact on fsmark results, and each | ||
individual sync(2) operation run concurrently with the same fsmark | ||
workload takes roughly 7s: | ||
|
||
real 0m6.930s | ||
user 0m0.000s | ||
sys 0m0.039s | ||
|
||
IOWs, sync is 7-8x faster on a busy filesystem and does not have an | ||
adverse impact on ongoing async data write operations. | ||
|
||
Signed-off-by: Dave Chinner <[email protected]> | ||
Reviewed-by: Jan Kara <[email protected]> | ||
Signed-off-by: Linus Torvalds <[email protected]> | ||
--- | ||
fs/fs-writeback.c | 9 +++++++-- | ||
include/linux/writeback.h | 1 + | ||
2 files changed, 8 insertions(+), 2 deletions(-) | ||
|
||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c | ||
index 310972b..c5862a9 100644 | ||
--- a/fs/fs-writeback.c | ||
+++ b/fs/fs-writeback.c | ||
@@ -46,6 +46,7 @@ struct wb_writeback_work { | ||
unsigned int for_kupdate:1; | ||
unsigned int range_cyclic:1; | ||
unsigned int for_background:1; | ||
+ unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | ||
enum wb_reason reason; /* why was writeback initiated? */ | ||
|
||
struct list_head list; /* pending work list */ | ||
@@ -455,9 +456,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | ||
/* | ||
* Make sure to wait on the data before writing out the metadata. | ||
* This is important for filesystems that modify metadata on data | ||
- * I/O completion. | ||
+ * I/O completion. We don't do it for sync(2) writeback because it has a | ||
+ * separate, external IO completion path and ->sync_fs for guaranteeing | ||
+ * inode metadata is written back correctly. | ||
*/ | ||
- if (wbc->sync_mode == WB_SYNC_ALL) { | ||
+ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { | ||
int err = filemap_fdatawait(mapping); | ||
if (ret == 0) | ||
ret = err; | ||
@@ -590,6 +593,7 @@ static long writeback_sb_inodes(struct super_block *sb, | ||
.tagged_writepages = work->tagged_writepages, | ||
.for_kupdate = work->for_kupdate, | ||
.for_background = work->for_background, | ||
+ .for_sync = work->for_sync, | ||
.range_cyclic = work->range_cyclic, | ||
.range_start = 0, | ||
.range_end = LLONG_MAX, | ||
@@ -1391,6 +1395,7 @@ void sync_inodes_sb(struct super_block *sb) | ||
.range_cyclic = 0, | ||
.done = &done, | ||
.reason = WB_REASON_SYNC, | ||
+ .for_sync = 1, | ||
}; | ||
|
||
/* Nothing to do? */ | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h | ||
index b82a83a..7aa94e5 100644 | ||
--- a/include/linux/writeback.h | ||
+++ b/include/linux/writeback.h | ||
@@ -77,6 +77,7 @@ struct writeback_control { | ||
unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ | ||
unsigned for_reclaim:1; /* Invoked from the page allocator */ | ||
unsigned range_cyclic:1; /* range_start is cyclic */ | ||
+ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | ||
}; | ||
|
||
/* | ||
-- | ||
1.8.2.1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters