-
Notifications
You must be signed in to change notification settings - Fork 307
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-16876 vos: discard invalid DTX when commit or abort - b26 #15901
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
/** | ||
* (C) Copyright 2019-2024 Intel Corporation. | ||
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
* (C) Copyright 2025 Google LLC | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
@@ -573,7 +574,7 @@ dtx_ilog_rec_release(struct umem_instance *umm, struct vos_container *cont, | |
|
||
ilog_close(loh); | ||
|
||
if (rc != 0) | ||
if (rc != 0 && rc != -DER_NONEXIST) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't it too late to check DER_NONEXIST here? I suppose error is already returned when above ilog_open() failed, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Such log message is no matter, its caller There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's better to add an error message in ilog_open() or after the ilog_open() call above. |
||
D_ERROR("Failed to release ilog rec for "DF_DTI", abort %s: "DF_RC"\n", | ||
DP_DTI(&DAE_XID(dae)), abort ? "yes" : "no", DP_RC(rc)); | ||
|
||
|
@@ -598,6 +599,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, | |
struct vos_irec_df *svt; | ||
|
||
svt = umem_off2ptr(umm, umem_off2offset(rec)); | ||
|
||
if (!vos_irec_is_valid(svt, DAE_LID(dae))) { | ||
rc = -DER_NONEXIST; | ||
break; | ||
} | ||
|
||
if (abort) { | ||
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) { | ||
rc = umem_tx_add_ptr(umm, &svt->ir_dtx, | ||
|
@@ -621,6 +628,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, | |
struct evt_desc *evt; | ||
|
||
evt = umem_off2ptr(umm, umem_off2offset(rec)); | ||
|
||
if (!evt_desc_is_valid(evt, DAE_LID(dae))) { | ||
rc = -DER_NONEXIST; | ||
break; | ||
} | ||
|
||
if (abort) { | ||
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) { | ||
rc = umem_tx_add_ptr(umm, &evt->dc_dtx, | ||
|
@@ -648,6 +661,15 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, | |
break; | ||
} | ||
|
||
if (unlikely(rc == -DER_NONEXIST)) { | ||
struct vos_tls *tls = vos_tls_get(false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to use vos_pool->vp_sysdb as parameter. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, it will be changed in subsequent patch. |
||
|
||
D_WARN("DTX record no longer exists, may indicate some corruption: " | ||
DF_DTI " type %u, discard\n", | ||
DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec)); | ||
d_tm_inc_gauge(tls->vtl_invalid_dtx, 1); | ||
} | ||
|
||
return rc; | ||
} | ||
|
||
|
@@ -657,6 +679,7 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab | |
struct umem_instance *umm = vos_cont2umm(cont); | ||
struct vos_dtx_act_ent_df *dae_df; | ||
struct vos_dtx_blob_df *dbd; | ||
bool invalid = false; | ||
int count; | ||
int i; | ||
int rc = 0; | ||
|
@@ -685,42 +708,52 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab | |
abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd, | ||
DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); | ||
|
||
if (dae->dae_records != NULL) { | ||
/* Handle DTX records as FIFO order to find out potential invalid DTX earlier. */ | ||
|
||
if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) | ||
count = DTX_INLINE_REC_CNT; | ||
else | ||
count = DAE_REC_CNT(dae); | ||
|
||
for (i = 0; i < count; i++) { | ||
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort); | ||
if (unlikely(rc == -DER_NONEXIST)) { | ||
invalid = true; | ||
break; | ||
} | ||
if (rc != 0) | ||
return rc; | ||
} | ||
|
||
if (!invalid && dae->dae_records != NULL) { | ||
D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT); | ||
D_ASSERT(!UMOFF_IS_NULL(dae_df->dae_rec_off)); | ||
|
||
for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; i >= 0; i--) { | ||
for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) { | ||
rc = do_dtx_rec_release(umm, cont, dae, dae->dae_records[i], abort); | ||
if (unlikely(rc == -DER_NONEXIST)) { | ||
invalid = true; | ||
break; | ||
} | ||
if (rc != 0) | ||
return rc; | ||
} | ||
} | ||
|
||
if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) { | ||
rc = umem_free(umm, dae_df->dae_rec_off); | ||
if (rc != 0) | ||
return rc; | ||
|
||
if (keep_act) { | ||
if (!invalid && keep_act) { | ||
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off)); | ||
if (rc != 0) | ||
return rc; | ||
|
||
dae_df->dae_rec_off = UMOFF_NULL; | ||
} | ||
|
||
count = DTX_INLINE_REC_CNT; | ||
} else { | ||
D_ASSERT(DAE_REC_CNT(dae) <= DTX_INLINE_REC_CNT); | ||
|
||
count = DAE_REC_CNT(dae); | ||
} | ||
|
||
for (i = count - 1; i >= 0; i--) { | ||
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort); | ||
if (rc != 0) | ||
return rc; | ||
} | ||
|
||
if (keep_act) { | ||
if (!invalid && keep_act) { | ||
/* When re-commit partial committed DTX, the count can be zero. */ | ||
if (dae_df->dae_rec_cnt > 0) { | ||
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_cnt, | ||
|
@@ -747,6 +780,9 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab | |
return 0; | ||
} | ||
|
||
if (invalid) | ||
rc = 0; | ||
|
||
if (!UMOFF_IS_NULL(dae_df->dae_mbs_off)) { | ||
/* dae_mbs_off will be invalid via flag DTE_INVALID. */ | ||
rc = umem_free(umm, dae_df->dae_mbs_off); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't quite follow this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As my understand, when we commit or abort the ilog via dtx_{commit,abort}, if commit/abort succeed, then the version will bump. Here, if the version does not bump, then must be not found. @jolivier23 , is it your expected?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is correct. It means the entry is no longer in the ilog. This code is shared between abort, persist, and update (or insert) but only the former two need to remove the ilog entry from the dtx record
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, if we consider it as an error, I think we'd use D_ERROR instead of D_WARN.