From 2aa3fbe761a2f38c1fe743467635b1323bebe093 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 14 Jan 2025 00:33:31 +1100 Subject: [PATCH] zinject: count matches and injections for each handler When building tests with zinject, it can be quite difficult to work out if you're producing the right kind of IO to match the rules you've set up. So, here we extend injection records to count the number of times a handler matched the operation, and how often an error was actually injected (ie after frequency and other exclusions are applied). Then, display those counts in the `zinject` output. Reviewed-by: Tony Hutter Reviewed-by: Alexander Motin Signed-off-by: Rob Norris Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Closes #16938 --- cmd/zinject/zinject.c | 66 ++++---- include/sys/zfs_ioctl.h | 4 +- module/zfs/zio_inject.c | 60 ++++++-- tests/runfiles/common.run | 2 +- tests/zfs-tests/tests/Makefile.am | 1 + .../cli_root/zinject/zinject_counts.ksh | 142 ++++++++++++++++++ 6 files changed, 236 insertions(+), 39 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zinject/zinject_counts.ksh diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index ed60cce3dd16..6c856763c958 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -22,7 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. - * Copyright (c) 2023-2024, Klara Inc. + * Copyright (c) 2023-2025, Klara, Inc. */ /* @@ -404,27 +404,30 @@ print_data_handler(int id, const char *pool, zinject_record_t *record, if (*count == 0) { (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s " - "%-15s\n", "ID", "POOL", "OBJSET", "OBJECT", "TYPE", - "LVL", "DVAs", "RANGE"); + "%-15s %-6s %-15s\n", "ID", "POOL", "OBJSET", "OBJECT", + "TYPE", "LVL", "DVAs", "RANGE", "MATCH", "INJECT"); (void) printf("--- --------------- ------ " - "------ -------- --- ---- ---------------\n"); + "------ -------- --- ---- --------------- " + "------ ------\n"); } *count += 1; - (void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x ", - id, pool, (u_longlong_t)record->zi_objset, - (u_longlong_t)record->zi_object, type_to_name(record->zi_type), - record->zi_level, record->zi_dvas); - - - if (record->zi_start == 0 && - record->zi_end == -1ULL) - (void) printf("all\n"); + char rangebuf[32]; + if (record->zi_start == 0 && record->zi_end == -1ULL) + snprintf(rangebuf, sizeof (rangebuf), "all"); else - (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, + snprintf(rangebuf, sizeof (rangebuf), "[%llu, %llu]", + (u_longlong_t)record->zi_start, (u_longlong_t)record->zi_end); + + (void) printf("%3d %-15s %-6llu %-6llu %-8s %-3d 0x%02x %-15s " + "%6lu %6lu\n", id, pool, (u_longlong_t)record->zi_objset, + (u_longlong_t)record->zi_object, type_to_name(record->zi_type), + record->zi_level, record->zi_dvas, rangebuf, + record->zi_match_count, record->zi_inject_count); + return (0); } @@ -445,11 +448,14 @@ print_device_handler(int id, const char *pool, zinject_record_t *record, return (0); if (*count == 0) { - (void) printf("%3s %-15s %-16s %-5s %-10s %-9s\n", - "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ"); + (void) printf("%3s %-15s %-16s %-5s %-10s %-9s " + "%-6s %-6s\n", + "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ", + "MATCH", "INJECT"); (void) printf( "--- --------------- ---------------- " - "----- ---------- ---------\n"); + "----- ---------- --------- " + "------ ------\n"); } *count += 1; @@ -457,9 +463,10 @@ print_device_handler(int id, const char *pool, zinject_record_t *record, double freq = record->zi_freq == 0 ? 100.0f : (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f; - (void) printf("%3d %-15s %llx %-5s %-10s %8.4f%%\n", id, pool, - (u_longlong_t)record->zi_guid, iotypestr[record->zi_iotype], - err_to_str(record->zi_error), freq); + (void) printf("%3d %-15s %llx %-5s %-10s %8.4f%% " + "%6lu %6lu\n", id, pool, (u_longlong_t)record->zi_guid, + iotypestr[record->zi_iotype], err_to_str(record->zi_error), + freq, record->zi_match_count, record->zi_inject_count); return (0); } @@ -477,18 +484,25 @@ print_delay_handler(int id, const char *pool, zinject_record_t *record, return (0); if (*count == 0) { - (void) printf("%3s %-15s %-15s %-15s %s\n", - "ID", "POOL", "DELAY (ms)", "LANES", "GUID"); - (void) printf("--- --------------- --------------- " - "--------------- ----------------\n"); + (void) printf("%3s %-15s %-16s %-10s %-5s %-9s " + "%-6s %-6s\n", + "ID", "POOL", "GUID", "DELAY (ms)", "LANES", "FREQ", + "MATCH", "INJECT"); + (void) printf("--- --------------- ---------------- " + "---------- ----- --------- " + "------ ------\n"); } *count += 1; - (void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool, + double freq = record->zi_freq == 0 ? 100.0f : + (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f; + + (void) printf("%3d %-15s %llx %10llu %5llu %8.4f%% " + "%6lu %6lu\n", id, pool, (u_longlong_t)record->zi_guid, (u_longlong_t)NSEC2MSEC(record->zi_timer), (u_longlong_t)record->zi_nlanes, - (u_longlong_t)record->zi_guid); + freq, record->zi_match_count, record->zi_inject_count); return (0); } diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index aa20e52a7634..e61d7644764e 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -23,7 +23,7 @@ * Copyright (c) 2012, 2024 by Delphix. All rights reserved. * Copyright 2016 RackTop Systems. * Copyright (c) 2017, Intel Corporation. - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024-2025, Klara, Inc. */ #ifndef _SYS_ZFS_IOCTL_H @@ -421,6 +421,8 @@ typedef struct zinject_record { uint64_t zi_nlanes; uint32_t zi_cmd; uint32_t zi_dvas; + uint64_t zi_match_count; /* count of times matched */ + uint64_t zi_inject_count; /* count of times injected */ } zinject_record_t; #define ZINJECT_NULL 0x1 diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index 012a0e3c6c17..f972522b6454 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -22,7 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. - * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024-2025, Klara, Inc. */ /* @@ -129,6 +129,9 @@ static boolean_t zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva, zinject_record_t *record, int error) { + boolean_t matched = B_FALSE; + boolean_t injected = B_FALSE; + /* * Check for a match against the MOS, which is based on type */ @@ -137,9 +140,8 @@ zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva, record->zi_object == DMU_META_DNODE_OBJECT) { if (record->zi_type == DMU_OT_NONE || type == record->zi_type) - return (freq_triggered(record->zi_freq)); - else - return (B_FALSE); + matched = B_TRUE; + goto done; } /* @@ -153,10 +155,20 @@ zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva, (record->zi_dvas == 0 || (dva != ZI_NO_DVA && (record->zi_dvas & (1ULL << dva)))) && error == record->zi_error) { - return (freq_triggered(record->zi_freq)); + matched = B_TRUE; + goto done; } - return (B_FALSE); +done: + if (matched) { + record->zi_match_count++; + injected = freq_triggered(record->zi_freq); + } + + if (injected) + record->zi_inject_count++; + + return (injected); } /* @@ -177,8 +189,11 @@ zio_handle_panic_injection(spa_t *spa, const char *tag, uint64_t type) continue; if (handler->zi_record.zi_type == type && - strcmp(tag, handler->zi_record.zi_func) == 0) + strcmp(tag, handler->zi_record.zi_func) == 0) { + handler->zi_record.zi_match_count++; + handler->zi_record.zi_inject_count++; panic("Panic requested in function %s\n", tag); + } } rw_exit(&inject_lock); @@ -336,6 +351,8 @@ zio_handle_label_injection(zio_t *zio, int error) if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && (offset >= start && offset <= end)) { + handler->zi_record.zi_match_count++; + handler->zi_record.zi_inject_count++; ret = error; break; } @@ -400,12 +417,16 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2) if (handler->zi_record.zi_error == err1 || handler->zi_record.zi_error == err2) { + handler->zi_record.zi_match_count++; + /* * limit error injection if requested */ if (!freq_triggered(handler->zi_record.zi_freq)) continue; + handler->zi_record.zi_inject_count++; + /* * For a failed open, pretend like the device * has gone away. @@ -441,6 +462,8 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2) break; } if (handler->zi_record.zi_error == ENXIO) { + handler->zi_record.zi_match_count++; + handler->zi_record.zi_inject_count++; ret = SET_ERROR(EIO); break; } @@ -483,6 +506,8 @@ zio_handle_ignored_writes(zio_t *zio) handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) continue; + handler->zi_record.zi_match_count++; + /* * Positive duration implies # of seconds, negative * a number of txgs @@ -495,8 +520,10 @@ zio_handle_ignored_writes(zio_t *zio) } /* Have a "problem" writing 60% of the time */ - if (random_in_range(100) < 60) + if (random_in_range(100) < 60) { + handler->zi_record.zi_inject_count++; zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; + } break; } @@ -520,6 +547,9 @@ spa_handle_ignored_writes(spa_t *spa) handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) continue; + handler->zi_record.zi_match_count++; + handler->zi_record.zi_inject_count++; + if (handler->zi_record.zi_duration > 0) { VERIFY(handler->zi_record.zi_timer == 0 || ddi_time_after64( @@ -601,9 +631,6 @@ zio_handle_io_delay(zio_t *zio) if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO) continue; - if (!freq_triggered(handler->zi_record.zi_freq)) - continue; - if (vd->vdev_guid != handler->zi_record.zi_guid) continue; @@ -628,6 +655,12 @@ zio_handle_io_delay(zio_t *zio) ASSERT3U(handler->zi_record.zi_nlanes, >, handler->zi_next_lane); + handler->zi_record.zi_match_count++; + + /* Limit the use of this handler if requested */ + if (!freq_triggered(handler->zi_record.zi_freq)) + continue; + /* * We want to issue this IO to the lane that will become * idle the soonest, so we compare the soonest this @@ -699,6 +732,9 @@ zio_handle_io_delay(zio_t *zio) */ min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) % min_handler->zi_record.zi_nlanes; + + min_handler->zi_record.zi_inject_count++; + } mutex_exit(&inject_delay_mtx); @@ -721,9 +757,11 @@ zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command) handler = list_next(&inject_handlers, handler)) { ASSERT3P(handler->zi_spa_name, !=, NULL); if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) { + handler->zi_record.zi_match_count++; uint64_t pause = SEC2NSEC(handler->zi_record.zi_duration); if (pause > elapsed) { + handler->zi_record.zi_inject_count++; delay = pause - elapsed; } id = handler->zi_id; diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 3ec052f56b40..9e186de37369 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -159,7 +159,7 @@ tests = ['json_sanity'] tags = ['functional', 'cli_root', 'json'] [tests/functional/cli_root/zinject] -tests = ['zinject_args'] +tests = ['zinject_args', 'zinject_counts'] pre = post = tags = ['functional', 'cli_root', 'zinject'] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index d62613d30035..044a70d1998b 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -615,6 +615,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/json/setup.ksh \ functional/cli_root/json/json_sanity.ksh \ functional/cli_root/zinject/zinject_args.ksh \ + functional/cli_root/zinject/zinject_counts.ksh \ functional/cli_root/zdb/zdb_002_pos.ksh \ functional/cli_root/zdb/zdb_003_pos.ksh \ functional/cli_root/zdb/zdb_004_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_counts.ksh b/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_counts.ksh new file mode 100755 index 000000000000..19b223aba46c --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_counts.ksh @@ -0,0 +1,142 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +# +# This test sets various injections, does some IO to trigger them. and then +# checks the "match" and "inject" counters on the injection records to ensure +# that they're being counted properly. +# +# Note that this is a test of the counters, not injection generally. We're +# usually only looking for the counters moving at all, not caring too much +# about their actual values. + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +log_assert "Check zinject counts are displayed and advanced as expected." + +DISK1=${DISKS%% *} + +function cleanup +{ + zinject -c all + default_cleanup_noexit +} + +log_onexit cleanup + +default_mirror_setup_noexit $DISKS + +# Call zinject, get the match and inject counts, and make sure they look +# plausible for the requested frequency. +function check_count_freq +{ + typeset -i freq=$1 + + # assuming a single rule, with the match and inject counts in the + # last two columns + typeset rule=$(zinject | grep -m 1 -oE '^ *[0-9].*[0-9]$') + + log_note "check_count_freq: using rule: $rule" + + typeset -a record=($(echo $rule | grep -oE ' [0-9]+ +[0-9]+$')) + typeset -i match=${record[0]} + typeset -i inject=${record[1]} + + log_note "check_count_freq: freq=$freq match=$match inject=$inject" + + # equality check, for 100% frequency, or if we've never matched the rule + if [[ $match -eq 0 || $freq -eq 100 ]] ; then + return [[ $match -eq 0 $inject ]] + fi + + # Compute the expected injection count, and compare. Because we're + # not testing the fine details here, it's considered good-enough for + # the injection account to be within +/- 10% of the expected count. + typeset -i expect=$(($match * $freq / 100)) + typeset -i diff=$((($expect - $inject) / 10)) + return [[ $diff -ge -1 && $diff -le 1 ]] +} + +# Test device IO injections by injecting write errors, doing some writes, +# and making sure the count moved +function test_device_injection +{ + for freq in 100 50 ; do + log_must zinject -d $DISK1 -e io -T write -f $freq $TESTPOOL + + log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=1 + log_must zpool sync + + log_must check_count_freq $freq + + log_must zinject -c all + done +} + +# Test object injections by writing a file, injecting checksum errors and +# trying to read it back +function test_object_injection +{ + log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=1 + zpool sync + + for freq in 100 50 ; do + log_must zinject -t data -e checksum -f $freq /$TESTPOOL/file + + cat /tank/file > /dev/null || true + + log_must check_count_freq $freq + + log_must zinject -c all + done +} + +# Test delay injections, by injecting delays and writing +function test_delay_injection +{ + for freq in 100 50 ; do + log_must zinject -d $DISK1 -D 50:1 -f $freq $TESTPOOL + + log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=1 + zpool sync + + log_must check_count_freq $freq + + log_must zinject -c all + done +} + +# Disable cache, to ensure reads induce IO +log_must zfs set primarycache=none $TESTPOOL + +# Test 'em all. +log_must test_device_injection +log_must test_object_injection +log_must test_delay_injection + +log_pass "zinject counts are displayed and advanced as expected."