From 0360a6f4596b40da94c2b344a7a22d5dbf0e8ce0 Mon Sep 17 00:00:00 2001 From: Koen Kooi Date: Tue, 3 Sep 2013 11:08:36 +0200 Subject: [PATCH] linux-mainline 3.8: backport sync(2) fix from 3.11 Signed-off-by: Koen Kooi --- ...ock-the-flusher-thread-waiting-on-IO.patch | 114 ++++++++++++++++++ .../linux/linux-mainline_3.8.bb | 1 + 2 files changed, 115 insertions(+) create mode 100644 common-bsp/recipes-kernel/linux/linux-mainline-3.8/fixes/0001-sync-don-t-block-the-flusher-thread-waiting-on-IO.patch diff --git a/common-bsp/recipes-kernel/linux/linux-mainline-3.8/fixes/0001-sync-don-t-block-the-flusher-thread-waiting-on-IO.patch b/common-bsp/recipes-kernel/linux/linux-mainline-3.8/fixes/0001-sync-don-t-block-the-flusher-thread-waiting-on-IO.patch new file mode 100644 index 00000000..0a2cb080 --- /dev/null +++ b/common-bsp/recipes-kernel/linux/linux-mainline-3.8/fixes/0001-sync-don-t-block-the-flusher-thread-waiting-on-IO.patch @@ -0,0 +1,114 @@ +From 4e3f4e9a8f2076743265cb4e76e3d26827325d5a Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 2 Jul 2013 22:38:35 +1000 +Subject: [PATCH] sync: don't block the flusher thread waiting on IO + +When sync does it's WB_SYNC_ALL writeback, it issues data Io and +then immediately waits for IO completion. This is done in the +context of the flusher thread, and hence completely ties up the +flusher thread for the backing device until all the dirty inodes +have been synced. On filesystems that are dirtying inodes constantly +and quickly, this means the flusher thread can be tied up for +minutes per sync call and hence badly affect system level write IO +performance as the page cache cannot be cleaned quickly. + +We already have a wait loop for IO completion for sync(2), so cut +this out of the flusher thread and delegate it to wait_sb_inodes(). +Hence we can do rapid IO submission, and then wait for it all to +complete. + +Effect of sync on fsmark before the patch: + +FSUse% Count Size Files/sec App Overhead +..... + 0 640000 4096 35154.6 1026984 + 0 720000 4096 36740.3 1023844 + 0 800000 4096 36184.6 916599 + 0 880000 4096 1282.7 1054367 + 0 960000 4096 3951.3 918773 + 0 1040000 4096 40646.2 996448 + 0 1120000 4096 43610.1 895647 + 0 1200000 4096 40333.1 921048 + +And a single sync pass took: + + real 0m52.407s + user 0m0.000s + sys 0m0.090s + +After the patch, there is no impact on fsmark results, and each +individual sync(2) operation run concurrently with the same fsmark +workload takes roughly 7s: + + real 0m6.930s + user 0m0.000s + sys 0m0.039s + +IOWs, sync is 7-8x faster on a busy filesystem and does not have an +adverse impact on ongoing async data write operations. + +Signed-off-by: Dave Chinner +Reviewed-by: Jan Kara +Signed-off-by: Linus Torvalds +--- + fs/fs-writeback.c | 9 +++++++-- + include/linux/writeback.h | 1 + + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 310972b..c5862a9 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -46,6 +46,7 @@ struct wb_writeback_work { + unsigned int for_kupdate:1; + unsigned int range_cyclic:1; + unsigned int for_background:1; ++ unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ + enum wb_reason reason; /* why was writeback initiated? */ + + struct list_head list; /* pending work list */ +@@ -455,9 +456,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) + /* + * Make sure to wait on the data before writing out the metadata. + * This is important for filesystems that modify metadata on data +- * I/O completion. ++ * I/O completion. We don't do it for sync(2) writeback because it has a ++ * separate, external IO completion path and ->sync_fs for guaranteeing ++ * inode metadata is written back correctly. + */ +- if (wbc->sync_mode == WB_SYNC_ALL) { ++ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { + int err = filemap_fdatawait(mapping); + if (ret == 0) + ret = err; +@@ -590,6 +593,7 @@ static long writeback_sb_inodes(struct super_block *sb, + .tagged_writepages = work->tagged_writepages, + .for_kupdate = work->for_kupdate, + .for_background = work->for_background, ++ .for_sync = work->for_sync, + .range_cyclic = work->range_cyclic, + .range_start = 0, + .range_end = LLONG_MAX, +@@ -1391,6 +1395,7 @@ void sync_inodes_sb(struct super_block *sb) + .range_cyclic = 0, + .done = &done, + .reason = WB_REASON_SYNC, ++ .for_sync = 1, + }; + + /* Nothing to do? */ +diff --git a/include/linux/writeback.h b/include/linux/writeback.h +index b82a83a..7aa94e5 100644 +--- a/include/linux/writeback.h ++++ b/include/linux/writeback.h +@@ -77,6 +77,7 @@ struct writeback_control { + unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ + unsigned for_reclaim:1; /* Invoked from the page allocator */ + unsigned range_cyclic:1; /* range_start is cyclic */ ++ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ + }; + + /* +-- +1.8.2.1 + diff --git a/common-bsp/recipes-kernel/linux/linux-mainline_3.8.bb b/common-bsp/recipes-kernel/linux/linux-mainline_3.8.bb index 5f85922d..28c5c07f 100644 --- a/common-bsp/recipes-kernel/linux/linux-mainline_3.8.bb +++ b/common-bsp/recipes-kernel/linux/linux-mainline_3.8.bb @@ -700,6 +700,7 @@ SRC_URI += " \ file://capes/0015-DT-overlay-for-BeBoPr-with-enable-patch-and-BeagleBo.patch \ file://capes/0016-DT-overlay-for-BeBoPr-Bridge-and-BeagleBone-any-colo.patch \ file://capes/0017-Removed-Whitelist-and-Blacklist-Modes-From-HDMI-Devi.patch \ + file://fixes/0001-sync-don-t-block-the-flusher-thread-waiting-on-IO.patch \ file://defconfig \ file://am335x-pm-firmware.bin \ file://logo_linux_clut224.ppm \