Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16876 tests: test for invalid DTX entry - b26 #15912

Open
wants to merge 1 commit into
base: Nasf-Fan/DAOS-16876_discard_b26
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/dtx/dtx_common.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -1438,7 +1439,9 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul
D_WARN(DF_UUID": Fail to sync %s commit DTX "DF_DTI": "DF_RC"\n",
DP_UUID(cont->sc_uuid), dlh->dlh_coll ? "collective" : "regular",
DP_DTI(&dth->dth_xid), DP_RC(rc));
if (likely(dtx_batched_ult_max != 0)) {
if (likely(dtx_batched_ult_max != 0 &&
!DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P1) &&
!DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P2))) {
dth->dth_sync = 0;
goto cache;
}
Expand Down
3 changes: 2 additions & 1 deletion src/dtx/dtx_rpc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -411,7 +412,7 @@ dtx_req_list_send(struct dtx_common_args *dca, bool is_reentrance)
dca->dca_drr->drr_result = 0;

if (unlikely(dra->dra_opc == DTX_COMMIT && dca->dca_i == 0 &&
DAOS_FAIL_CHECK(DAOS_DTX_FAIL_COMMIT)))
DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P1)))
dtx_req_send(dca->dca_drr, 1);
else
dtx_req_send(dca->dca_drr, dca->dca_epoch);
Expand Down
6 changes: 5 additions & 1 deletion src/dtx/dtx_srv.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -189,7 +190,7 @@ dtx_handler(crt_rpc_t *rpc)
break;

if (unlikely(din->di_epoch == 1))
D_GOTO(out, rc = -DER_IO);
daos_fail_loc_set(DAOS_DTX_PARTIAL_COMMIT_P2);

while (i < din->di_dtx_array.ca_count) {
if (i + count > din->di_dtx_array.ca_count)
Expand All @@ -205,6 +206,9 @@ dtx_handler(crt_rpc_t *rpc)
i += count;
}

if (unlikely(din->di_epoch == 1))
D_GOTO(out, rc = -DER_IO);

if (din->di_flags.ca_count > 0)
flags = din->di_flags.ca_arrays;

Expand Down
4 changes: 3 additions & 1 deletion src/include/daos/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -848,11 +848,13 @@ enum {
#define DAOS_DTX_RESEND_DELAY1 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x48)
#define DAOS_DTX_UNCERTAIN (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x49)
#define DAOS_DTX_RESYNC_DELAY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4a)
#define DAOS_DTX_FAIL_COMMIT (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4b)

#define DAOS_OBJ_SYNC_RETRY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4c)
#define DAOS_OBJ_COLL_SPARSE (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4d)

#define DAOS_DTX_PARTIAL_COMMIT_P1 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4e)
#define DAOS_DTX_PARTIAL_COMMIT_P2 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4f)

#define DAOS_NVME_FAULTY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x50)
#define DAOS_NVME_WRITE_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x51)
#define DAOS_NVME_READ_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x52)
Expand Down
5 changes: 5 additions & 0 deletions src/include/daos_srv/vos_types.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2015-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -57,6 +58,10 @@ enum dtx_entry_flags {
* on all yet, need to be re-committed.
*/
DTE_PARTIAL_COMMITTED = (1 << 5),
/* The DTX epoch is sorted locally. */
DTE_EPOCH_SORTED = (1 << 6),
/* The active DTX entry is redundant, should be discarded. */
DTE_REDUN = (1 << 7),
};

struct dtx_entry {
Expand Down
95 changes: 70 additions & 25 deletions src/tests/suite/daos_base_tx.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -847,41 +848,85 @@ dtx_20(void **state)
static void
dtx_21(void **state)
{
test_arg_t *arg = *state;
char *update_buf;
const char *dkey = dts_dtx_dkey;
const char *akey = dts_dtx_akey;
daos_obj_id_t oid;
struct ioreq req;
test_arg_t *arg = *state;
char *update_buf;
const char *dkey = dts_dtx_dkey;
const char *akey = dts_dtx_akey;
daos_obj_id_t oid;
struct ioreq req;
daos_pool_info_t info_prep = { 0 };
daos_pool_info_t info_post = { 0 };
int rc;

FAULT_INJECTION_REQUIRED();

print_message("do not abort partially committed DTX\n");
print_message("handle partially committed DTX\n");

if (!test_runable(arg, dts_dtx_replica_cnt))
return;

D_ALLOC(update_buf, dts_dtx_iosize);
assert_non_null(update_buf);
dts_buf_render(update_buf, dts_dtx_iosize);

oid = daos_test_oid_gen(arg->coh, dts_dtx_class, 0, 0, arg->myrank);
ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg);

dtx_set_fail_loc(arg, DAOS_DTX_FAIL_COMMIT);
/*
* The DTX that create the object will trigger synchronous commit. One of
* the replicas will fail commit locally because of DAOS_DTX_FAIL_COMMIT.
* But the other replicas will commit successfully, then related data can
* be accessed.
* The DTX that create the object will trigger synchronous commit. One non-leader
* replica will commit locally but left the active DTX entry on disk, then report
* IO failure, that will cause the DTX on leader to be marked as partially commit.
*
* Then restart the system, such DTX will be re-loaded from disk. The subsequent
* DTX resync will re-commit such partial committed DTX. The logic on non-leader
* should be able to detect and handle such re-committed DTX; otherwise, it will
* trigger assertion.
*
* To simplify the test logic, only perform the test on rank_0.
*/
insert_single(dkey, akey, 0, update_buf, dts_dtx_iosize, DAOS_TX_NONE, &req);
dtx_set_fail_loc(arg, 0);

dtx_check_replicas(dkey, akey, "update_succ", update_buf, dts_dtx_iosize, &req);
par_barrier(PAR_COMM_WORLD);
if (arg->myrank == 0) {
rc = daos_pool_query(arg->pool.poh, NULL, &info_prep, NULL, NULL);
assert_true(rc == 0);

D_FREE(update_buf);
ioreq_fini(&req);
D_ALLOC(update_buf, dts_dtx_iosize);
assert_non_null(update_buf);
dts_buf_render(update_buf, dts_dtx_iosize);

oid = daos_test_oid_gen(arg->coh, OC_RP_3GX, 0, 0, arg->myrank);
ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg);

daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC,
DAOS_DTX_PARTIAL_COMMIT_P1, 0, NULL);

print_message("Generating partially committed DTX ...\n");
insert_single(dkey, akey, 0, update_buf, dts_dtx_iosize, DAOS_TX_NONE, &req);

D_FREE(update_buf);
ioreq_fini(&req);
daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL);
}
par_barrier(PAR_COMM_WORLD);

test_teardown_cont_hdl(arg);
daos_pool_disconnect(arg->pool.poh, NULL);
arg->pool.poh = DAOS_HDL_INVAL;

par_barrier(PAR_COMM_WORLD);
if (arg->myrank == 0) {
print_message("Stopping system ...\n");
dmg_system_stop_rank(dmg_config_file, CRT_NO_RANK, false);

print_message("Starting system ...\n");
dmg_system_start_rank(dmg_config_file, CRT_NO_RANK);

/* Sleep a while for DTX resync. */
sleep(30);

rc = daos_pool_connect(arg->pool.pool_str, arg->group, DAOS_PC_RW,
&arg->pool.poh, NULL, NULL);
assert_true(rc == 0);

rc = daos_pool_query(arg->pool.poh, NULL, &info_post, NULL, NULL);
assert_true(rc == 0);

assert_int_equal(info_prep.pi_ndisabled, info_post.pi_ndisabled);
}
par_barrier(PAR_COMM_WORLD);
}

static int
Expand Down Expand Up @@ -945,7 +990,7 @@ static const struct CMUnitTest dtx_tests[] = {
dtx_19, NULL, test_case_teardown},
{"DTX20: race between DTX refresh and DTX resync",
dtx_20, dtx_base_rf1_setup, rebuild_sub_teardown},
{"DTX21: do not abort partially committed DTX",
{"DTX21: handle partially committed DTX",
dtx_21, dtx_base_rf0_setup, rebuild_sub_teardown},
};

Expand Down
30 changes: 22 additions & 8 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,12 +662,10 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
}

if (unlikely(rc == -DER_NONEXIST)) {
struct vos_tls *tls = vos_tls_get(false);

D_WARN("DTX record no longer exists, may indicate some corruption: "
DF_DTI " type %u, discard\n",
DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec));
d_tm_inc_gauge(tls->vtl_invalid_dtx, 1);
d_tm_inc_gauge(vos_tls_get(cont->vc_pool->vp_sysdb)->vtl_invalid_dtx, 1);
}

return rc;
Expand Down Expand Up @@ -740,6 +738,24 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
}
}

/*
* Inject fault to simulate the case of DTX records are left on disk after commit.
* Then when container re-open (such as for engine restart), it will be re-loaded
* from disk and then trigger DTX re-commit via DTX resync.
*/
if (unlikely(DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P2))) {
rc = umem_tx_add_ptr(umm, &dae_df->dae_flags, sizeof(dae_df->dae_flags));
if (rc != 0)
return rc;

dae_df->dae_flags |= DTE_REDUN;
return 0;
}

/* It is expected to detect some invalid DTX records. Otherwise assert for test. */
if (DAE_REC_CNT(dae) != 0 && unlikely(DAE_FLAGS(dae) & DTE_REDUN))
D_ASSERT(invalid);

if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) {
rc = umem_free(umm, dae_df->dae_rec_off);
if (rc != 0)
Expand Down Expand Up @@ -2259,8 +2275,7 @@ vos_dtx_post_handle(struct vos_container *cont,
}

if (!abort && dces != NULL) {
struct vos_tls *tls = vos_tls_get(false);
int j = 0;
int j = 0;

D_ASSERT(cont->vc_pool->vp_sysdb == false);
for (i = 0; i < count; i++) {
Expand All @@ -2271,7 +2286,7 @@ vos_dtx_post_handle(struct vos_container *cont,
if (j > 0) {
cont->vc_dtx_committed_count += j;
cont->vc_pool->vp_dtx_committed_count += j;
d_tm_inc_gauge(tls->vtl_committed, j);
d_tm_inc_gauge(vos_tls_get(cont->vc_pool->vp_sysdb)->vtl_committed, j);
}
}

Expand Down Expand Up @@ -2588,7 +2603,6 @@ vos_dtx_set_flags(daos_handle_t coh, struct dtx_id dtis[], int count, uint32_t f
int
vos_dtx_aggregate(daos_handle_t coh)
{
struct vos_tls *tls = vos_tls_get(false);
struct vos_container *cont;
struct vos_cont_df *cont_df;
struct umem_instance *umm;
Expand Down Expand Up @@ -2707,7 +2721,7 @@ vos_dtx_aggregate(daos_handle_t coh)

cont->vc_dtx_committed_count -= count;
cont->vc_pool->vp_dtx_committed_count -= count;
d_tm_dec_gauge(tls->vtl_committed, count);
d_tm_dec_gauge(vos_tls_get(cont->vc_pool->vp_sysdb)->vtl_committed, count);
}

DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc,
Expand Down