Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16876 vos: discard invalid DTX when commit or abort - b26 #15901

Merged
merged 1 commit into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/include/daos_srv/evtree.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2017-2023 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -810,4 +811,16 @@ evt_feats_get(struct evt_root *root)
*/
int evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats);

/** Validate the provided evt.
*
* Note: It is designed for catastrophic recovery. Not to perform at run-time.
*
* \param evt[in]
* \param dtx_lid[in] local id of the DTX entry the evt is supposed to belong to
*
* \return true if evt is valid.
**/
bool
evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid);

#endif /* __DAOS_EV_TREE_H__ */
5 changes: 5 additions & 0 deletions src/tests/ftest/util/telemetry_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""
(C) Copyright 2021-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
(C) Copyright 2025 Google LLC

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -207,6 +209,8 @@ class TelemetryUtils():
_gen_stats_metrics("engine_io_dtx_committable")
ENGINE_IO_DTX_COMMITTED_METRICS = \
_gen_stats_metrics("engine_io_dtx_committed")
ENGINE_IO_DTX_INVALID_METRICS = \
_gen_stats_metrics("engine_io_dtx_invalid")
ENGINE_IO_LATENCY_FETCH_METRICS = \
_gen_stats_metrics("engine_io_latency_fetch")
ENGINE_IO_LATENCY_BULK_FETCH_METRICS = \
Expand Down Expand Up @@ -310,6 +314,7 @@ class TelemetryUtils():
ENGINE_IO_METRICS = ENGINE_IO_DTX_ASYNC_CMT_LAT_METRICS +\
ENGINE_IO_DTX_COMMITTABLE_METRICS +\
ENGINE_IO_DTX_COMMITTED_METRICS +\
ENGINE_IO_DTX_INVALID_METRICS +\
ENGINE_IO_LATENCY_FETCH_METRICS +\
ENGINE_IO_LATENCY_BULK_FETCH_METRICS +\
ENGINE_IO_LATENCY_VOS_FETCH_METRICS +\
Expand Down
3 changes: 2 additions & 1 deletion src/vos/evt_priv.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/**
* (C) Copyright 2017-2022 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -115,7 +117,6 @@ struct evt_context {
umem_off2ptr(evt_umm(tcx), offset)

#define EVT_NODE_MAGIC 0xf00d
#define EVT_DESC_MAGIC 0xbeefdead

/** Convert an offset to a evtree node descriptor
* \param[IN] tcx Tree context
Expand Down
10 changes: 10 additions & 0 deletions src/vos/evtree.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2017-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -4086,3 +4087,12 @@ evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats)
return rc;
}

bool
evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid)
{
if (evt == NULL || evt->dc_magic != EVT_DESC_MAGIC) {
return false;
}

return (evt->dc_dtx == dtx_lid);
}
41 changes: 25 additions & 16 deletions src/vos/ilog.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -390,16 +392,18 @@ ilog_create(struct umem_instance *umm, struct ilog_df *root)
return rc;
}

#define ILOG_ASSERT_VALID(root_df) \
do { \
struct ilog_root *_root; \
\
_root = (struct ilog_root *)(root_df); \
D_ASSERTF((_root != NULL) && \
ILOG_MAGIC_VALID(_root->lr_magic), \
"Invalid ilog root detected %p magic=%#x\n", \
_root, _root == NULL ? 0 : _root->lr_magic); \
} while (0)
#define ILOG_CHECK_VALID(root_df) \
({ \
struct ilog_root *_root = NULL; \
D_ASSERT((root_df) != NULL); \
_root = (struct ilog_root *)(root_df); \
if (!ILOG_MAGIC_VALID(_root->lr_magic)) { \
D_WARN("Invalid ilog root detected %p magic=%#x\n", _root, \
_root == NULL ? 0 : _root->lr_magic); \
_root = NULL; \
} \
_root != NULL; \
})

int
ilog_open(struct umem_instance *umm, struct ilog_df *root,
Expand All @@ -408,7 +412,8 @@ ilog_open(struct umem_instance *umm, struct ilog_df *root,
struct ilog_context *lctx;
int rc;

ILOG_ASSERT_VALID(root);
if (!ILOG_CHECK_VALID(root))
return -DER_NONEXIST;

rc = ilog_ctx_create(umm, (struct ilog_root *)root, cbs, &lctx);
if (rc != 0)
Expand Down Expand Up @@ -474,7 +479,7 @@ ilog_destroy(struct umem_instance *umm,
int rc = 0;
struct ilog_array_cache cache = {0};

ILOG_ASSERT_VALID(root);
D_ASSERT(ILOG_CHECK_VALID(root));

rc = ilog_tx_begin(&lctx);
if (rc != 0) {
Expand Down Expand Up @@ -984,8 +989,12 @@ ilog_modify(daos_handle_t loh, const struct ilog_id *id_in,
"%s in incarnation log " DF_X64 " status: rc=" DF_RC " tree_version: %d\n",
opc_str[opc], id_in->id_epoch, DP_RC(rc), ilog_mag2ver(lctx->ic_root->lr_magic));

if (rc == 0 && version != ilog_mag2ver(lctx->ic_root->lr_magic) &&
(opc == ILOG_OP_PERSIST || opc == ILOG_OP_ABORT)) {
if (rc == 0 && opc != ILOG_OP_UPDATE) {
if (version == ilog_mag2ver(lctx->ic_root->lr_magic)) {
D_WARN("ilog entry on %s doesn't exist\n", opc_str[opc]);
return -DER_NONEXIST;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite follow this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As my understand, when we commit or abort the ilog via dtx_{commit,abort}, if commit/abort succeed, then the version will bump. Here, if the version does not bump, then must be not found. @jolivier23 , is it your expected?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is correct. It means the entry is no longer in the ilog. This code is shared between abort, persist, and update (or insert) but only the former two need to remove the ilog entry from the dtx record

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, if we consider it as an error, I think we'd use D_ERROR instead of D_WARN.

}

/** If we persisted or aborted an entry successfully,
* invoke the callback, if applicable but without
* deregistration
Expand Down Expand Up @@ -1213,7 +1222,7 @@ ilog_fetch(struct umem_instance *umm, struct ilog_df *root_df,
int rc = 0;
bool retry;

ILOG_ASSERT_VALID(root_df);
D_ASSERT(ILOG_CHECK_VALID(root_df));

root = (struct ilog_root *)root_df;

Expand Down Expand Up @@ -1539,7 +1548,7 @@ ilog_aggregate(struct umem_instance *umm, struct ilog_df *ilog,

root = lctx->ic_root;

ILOG_ASSERT_VALID(root);
D_ASSERT(ILOG_CHECK_VALID(root));

D_ASSERT(!ilog_empty(root)); /* ilog_fetch should have failed */

Expand Down
19 changes: 19 additions & 0 deletions src/vos/tests/vts_ilog.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -530,6 +532,12 @@ ilog_test_update(void **state)
rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries);
assert_rc_equal(rc, 0);

/* Test non-existent tx */
id.id_epoch = epoch;
id.id_tx_id = current_tx_id.id_tx_id + 4000;
rc = ilog_persist(loh, &id);
assert_rc_equal(rc, -DER_NONEXIST);

/* Commit the punch ilog. */
id.id_epoch = epoch;
id.id_tx_id = current_tx_id.id_tx_id;
Expand Down Expand Up @@ -668,6 +676,12 @@ ilog_test_abort(void **state)
rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries);
assert_rc_equal(rc, 0);

/* Test non-existent tx */
id = current_tx_id;
id.id_tx_id += 400;
rc = ilog_abort(loh, &id);
assert_rc_equal(rc, -DER_NONEXIST);

id = current_tx_id;
rc = ilog_abort(loh, &id);
LOG_FAIL(rc, 0, "Failed to abort log entry\n");
Expand Down Expand Up @@ -735,6 +749,11 @@ ilog_test_abort(void **state)
rc = ilog_destroy(umm, &ilog_callbacks, ilog);
assert_rc_equal(rc, 0);

/** Test open of "reallocated" ilog */
memset(ilog, 0xa1, sizeof(*ilog));
rc = ilog_open(umm, ilog, &ilog_callbacks, false, &loh);
assert_rc_equal(rc, -DER_NONEXIST);

assert_true(d_list_empty(&fake_tx_list));
ilog_free_root(umm, ilog);
}
Expand Down
6 changes: 6 additions & 0 deletions src/vos/vos_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,12 @@ vos_tls_init(int tags, int xs_id, int tgt_id)
D_WARN("Failed to create committed cnt sensor: "DF_RC"\n",
DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_invalid_dtx, D_TM_STATS_GAUGE,
"Number of invalid active DTX", "entries",
"io/dtx/invalid/tgt_%u", tgt_id);
if (rc)
D_WARN("Failed to create invalid DTX cnt sensor: " DF_RC "\n", DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE,
"Number of cached vos object", "entry",
"mem/vos/vos_obj_%u/tgt_%u",
Expand Down
72 changes: 54 additions & 18 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -573,7 +574,7 @@ dtx_ilog_rec_release(struct umem_instance *umm, struct vos_container *cont,

ilog_close(loh);

if (rc != 0)
if (rc != 0 && rc != -DER_NONEXIST)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it too late to check DER_NONEXIST here? I suppose error is already returned when above ilog_open() failed, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Such log message is no matter, its caller do_dtx_rec_release() will print.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to add an error message in ilog_open() or after the ilog_open() call above.

D_ERROR("Failed to release ilog rec for "DF_DTI", abort %s: "DF_RC"\n",
DP_DTI(&DAE_XID(dae)), abort ? "yes" : "no", DP_RC(rc));

Expand All @@ -598,6 +599,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
struct vos_irec_df *svt;

svt = umem_off2ptr(umm, umem_off2offset(rec));

if (!vos_irec_is_valid(svt, DAE_LID(dae))) {
rc = -DER_NONEXIST;
break;
}

if (abort) {
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) {
rc = umem_tx_add_ptr(umm, &svt->ir_dtx,
Expand All @@ -621,6 +628,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
struct evt_desc *evt;

evt = umem_off2ptr(umm, umem_off2offset(rec));

if (!evt_desc_is_valid(evt, DAE_LID(dae))) {
rc = -DER_NONEXIST;
break;
}

if (abort) {
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) {
rc = umem_tx_add_ptr(umm, &evt->dc_dtx,
Expand Down Expand Up @@ -648,6 +661,15 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
break;
}

if (unlikely(rc == -DER_NONEXIST)) {
struct vos_tls *tls = vos_tls_get(false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to use vos_pool->vp_sysdb as parameter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, it will be changed in subsequent patch.


D_WARN("DTX record no longer exists, may indicate some corruption: "
DF_DTI " type %u, discard\n",
DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec));
d_tm_inc_gauge(tls->vtl_invalid_dtx, 1);
}

return rc;
}

Expand All @@ -657,6 +679,7 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
struct umem_instance *umm = vos_cont2umm(cont);
struct vos_dtx_act_ent_df *dae_df;
struct vos_dtx_blob_df *dbd;
bool invalid = false;
int count;
int i;
int rc = 0;
Expand Down Expand Up @@ -685,42 +708,52 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd,
DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id));

if (dae->dae_records != NULL) {
/* Handle DTX records as FIFO order to find out potential invalid DTX earlier. */

if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT)
count = DTX_INLINE_REC_CNT;
else
count = DAE_REC_CNT(dae);

for (i = 0; i < count; i++) {
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort);
if (unlikely(rc == -DER_NONEXIST)) {
invalid = true;
break;
}
if (rc != 0)
return rc;
}

if (!invalid && dae->dae_records != NULL) {
D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT);
D_ASSERT(!UMOFF_IS_NULL(dae_df->dae_rec_off));

for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; i >= 0; i--) {
for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) {
rc = do_dtx_rec_release(umm, cont, dae, dae->dae_records[i], abort);
if (unlikely(rc == -DER_NONEXIST)) {
invalid = true;
break;
}
if (rc != 0)
return rc;
}
}

if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) {
rc = umem_free(umm, dae_df->dae_rec_off);
if (rc != 0)
return rc;

if (keep_act) {
if (!invalid && keep_act) {
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off));
if (rc != 0)
return rc;

dae_df->dae_rec_off = UMOFF_NULL;
}

count = DTX_INLINE_REC_CNT;
} else {
D_ASSERT(DAE_REC_CNT(dae) <= DTX_INLINE_REC_CNT);

count = DAE_REC_CNT(dae);
}

for (i = count - 1; i >= 0; i--) {
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort);
if (rc != 0)
return rc;
}

if (keep_act) {
if (!invalid && keep_act) {
/* When re-commit partial committed DTX, the count can be zero. */
if (dae_df->dae_rec_cnt > 0) {
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_cnt,
Expand All @@ -747,6 +780,9 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
return 0;
}

if (invalid)
rc = 0;

if (!UMOFF_IS_NULL(dae_df->dae_mbs_off)) {
/* dae_mbs_off will be invalid via flag DTE_INVALID. */
rc = umem_free(umm, dae_df->dae_mbs_off);
Expand Down
Loading
Loading