From 8756866e4939a3a2132ec35a406d9dc0b17dd717 Mon Sep 17 00:00:00 2001 From: Robert Evans Date: Sat, 20 Jan 2024 18:05:52 -0500 Subject: [PATCH] Use DNODE_FIND_DIRTY for SEEK_HOLE/SEEK_DATA. There is no need to worry about txg syncs anymore, so remove dmu_offset_next_sync and dnode_is_dirty too. This should be transparent to userland execpt that holes created by compression do not become visible until after the next txg sync. mmap_seek_001_pos is updated to force a sync to match this case. Signed-off-by: Robert Evans --- include/sys/dnode.h | 1 - module/zfs/dmu.c | 49 ++----------------- module/zfs/dnode.c | 28 ----------- tests/zfs-tests/cmd/Makefile.am | 1 + tests/zfs-tests/cmd/mmap_seek.c | 19 ++++++- tests/zfs-tests/include/tunables.cfg | 1 - .../functional/mmap/mmap_seek_001_pos.ksh | 7 +-- 7 files changed, 23 insertions(+), 83 deletions(-) diff --git a/include/sys/dnode.h b/include/sys/dnode.h index 2bdc3e952c1a..22bf44a06c05 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -435,7 +435,6 @@ boolean_t dnode_add_ref(dnode_t *dn, const void *ref); void dnode_rele(dnode_t *dn, const void *ref); void dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting); int dnode_try_claim(objset_t *os, uint64_t object, int slots); -boolean_t dnode_is_dirty(dnode_t *dn); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index d82211e6d4c7..bb73f4a7c12d 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -75,15 +75,6 @@ static int zfs_nopwrite_enabled = 1; */ static uint_t zfs_per_txg_dirty_frees_percent = 30; -/* - * Enable/disable forcing txg sync when dirty checking for holes with lseek(). - * By default this is enabled to ensure accurate hole reporting, it can result - * in a significant performance penalty for lseek(SEEK_HOLE) heavy workloads. - * Disabling this option will result in holes never being reported in dirty - * files which is always safe. - */ -static int zfs_dmu_offset_next_sync = 1; - /* * Limit the amount we can prefetch with one call to this amount. This * helps to limit the amount of memory that can be used by prefetching. @@ -2159,45 +2150,16 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int restarted = 0, err; + int err; -restart: err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); - rw_enter(&dn->dn_struct_rwlock, RW_READER); - - if (dnode_is_dirty(dn)) { - /* - * If the zfs_dmu_offset_next_sync module option is enabled - * then hole reporting has been requested. Dirty dnodes - * must be synced to disk to accurately report holes. - * - * Provided a RL_READER rangelock spanning 0-UINT64_MAX is - * held by the caller only a single restart will be required. - * We tolerate callers which do not hold the rangelock by - * returning EBUSY and not reporting holes after one restart. - */ - if (zfs_dmu_offset_next_sync) { - rw_exit(&dn->dn_struct_rwlock); - dnode_rele(dn, FTAG); + err = dnode_next_offset(dn, + DNODE_FIND_DIRTY | (hole ? DNODE_FIND_HOLE : 0), + off, 1, 1, 0); - if (restarted) - return (SET_ERROR(EBUSY)); - - txg_wait_synced(dmu_objset_pool(os), 0); - restarted = 1; - goto restart; - } - - err = SET_ERROR(EBUSY); - } else { - err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK | - (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); - } - - rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); @@ -2597,9 +2559,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, nopwrite_enabled, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, UINT, ZMOD_RW, "Percentage of dirtied blocks from frees in one TXG"); -ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW, - "Enable forcing txg sync to find holes"); - /* CSTYLED */ ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW, "Limit one prefetch call to this size"); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 9f9844cdae51..72fe2bfd22b7 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1785,34 +1785,6 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) slots, NULL, NULL)); } -/* - * Checks if the dnode itself is dirty, or is carrying any uncommitted records. - * It is important to check both conditions, as some operations (eg appending - * to a file) can dirty both as a single logical unit, but they are not synced - * out atomically, so checking one and not the other can result in an object - * appearing to be clean mid-way through a commit. - * - * Do not change this lightly! If you get it wrong, dmu_offset_next() can - * detect a hole where there is really data, leading to silent corruption. - */ -boolean_t -dnode_is_dirty(dnode_t *dn) -{ - mutex_enter(&dn->dn_mtx); - - for (int i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i]) || - !list_is_empty(&dn->dn_dirty_records[i])) { - mutex_exit(&dn->dn_mtx); - return (B_TRUE); - } - } - - mutex_exit(&dn->dn_mtx); - - return (B_FALSE); -} - void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 23848a82ffbd..3f7b19725ccd 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -66,6 +66,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/mkbusy %D%/mkfile %D%/mkfiles %D%/mktree scripts_zfs_tests_bin_PROGRAMS += %D%/mmap_exec %D%/mmap_seek %D%/mmap_sync %D%/mmapwrite %D%/readmmap +%C%_mmap_seek_LDADD = libzfs_core.la %C%_mmapwrite_LDADD = -lpthread if WANT_MMAP_LIBAIO diff --git a/tests/zfs-tests/cmd/mmap_seek.c b/tests/zfs-tests/cmd/mmap_seek.c index 7be92d109565..7a9275436f40 100644 --- a/tests/zfs-tests/cmd/mmap_seek.c +++ b/tests/zfs-tests/cmd/mmap_seek.c @@ -34,6 +34,8 @@ #ifdef __linux__ #include #endif +#include +#include static void seek_data(int fd, off_t offset, off_t expected) @@ -65,12 +67,14 @@ main(int argc, char **argv) char *buf = NULL; int err; - if (argc != 4) { + if (argc != 5) { (void) printf("usage: %s " - "\n", argv[0]); + " \n", argv[0]); exit(1); } + (void) libzfs_core_init(); + int fd = open(file_path, O_RDWR | O_CREAT, 0666); if (fd == -1) { (void) fprintf(stderr, "%s: %s: ", execname, file_path); @@ -80,6 +84,7 @@ main(int argc, char **argv) off_t file_size = atoi(argv[2]); off_t block_size = atoi(argv[3]); + const char *poolname = argv[4]; if (block_size * 2 > file_size) { (void) fprintf(stderr, "file size must be at least " @@ -133,6 +138,16 @@ main(int argc, char **argv) /* Punch a hole (required compression be enabled). */ memset(buf + block_size, 0, block_size); + err = msync(buf, file_size, MS_SYNC); + if (err == -1) { + perror("msync"); + exit(2); + } + err = lzc_sync(poolname, NULL, NULL); + if (err != 0) { + perror("lzc_sync"); + exit(2); + } seek_data(fd, 0, 0); seek_data(fd, block_size, 2 * block_size); seek_hole(fd, 0, block_size); diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index a619b846dd11..13a41df5f3e2 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -33,7 +33,6 @@ DEADMAN_FAILMODE deadman.failmode zfs_deadman_failmode DEADMAN_SYNCTIME_MS deadman.synctime_ms zfs_deadman_synctime_ms DEADMAN_ZIOTIME_MS deadman.ziotime_ms zfs_deadman_ziotime_ms DISABLE_IVSET_GUID_CHECK disable_ivset_guid_check zfs_disable_ivset_guid_check -DMU_OFFSET_NEXT_SYNC dmu_offset_next_sync zfs_dmu_offset_next_sync EMBEDDED_SLOG_MIN_MS embedded_slog_min_ms zfs_embedded_slog_min_ms INITIALIZE_CHUNK_SIZE initialize_chunk_size zfs_initialize_chunk_size INITIALIZE_VALUE initialize_value zfs_initialize_value diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh index c09746b4b66a..6e465bd21c0a 100755 --- a/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh @@ -43,24 +43,19 @@ function cleanup log_must zfs set compression=off $TESTPOOL/$TESTFS log_must zfs set recordsize=128k $TESTPOOL/$TESTFS log_must rm -f $TESTDIR/test-mmap-file - log_must set_tunable64 DMU_OFFSET_NEXT_SYNC $dmu_offset_next_sync } log_assert "lseek() data/holes for an mmap()'d file." log_onexit cleanup -# Enable hole reporting for dirty files. -typeset dmu_offset_next_sync=$(get_tunable DMU_OFFSET_NEXT_SYNC) -log_must set_tunable64 DMU_OFFSET_NEXT_SYNC 1 - # Compression must be enabled to convert zero'd blocks to holes. # This behavior is checked by the mmap_seek test. log_must zfs set compression=on $TESTPOOL/$TESTFS for bs in 4096 8192 16384 32768 65536 131072; do log_must zfs set recordsize=$bs $TESTPOOL/$TESTFS - log_must mmap_seek $TESTDIR/test-mmap-file $((1024*1024)) $bs + log_must mmap_seek $TESTDIR/test-mmap-file $((1024*1024)) $bs $TESTPOOL log_must rm $TESTDIR/test-mmap-file done