Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kvs: add transaction stats #6556

Merged
merged 4 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions src/modules/kvs/kvs.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
char *hash_name;
unsigned int seq; /* for commit transactions */
kvs_checkpoint_t *kcp;
tstat_t txn_commit_stats;
tstat_t txn_fence_stats;
zhashx_t *requests; /* track unfinished requests */
struct list_head work_queue;
};
Expand Down Expand Up @@ -1721,6 +1723,8 @@
goto error;
}

tstat_push (&ctx->txn_commit_stats, (double)json_array_size (ops));

/* N.B. no request tracking for relay. The relay does not get a
* response, only the original via finalize_transaction_bynames().
*/
Expand Down Expand Up @@ -1813,6 +1817,8 @@
goto error;
}

tstat_push (&ctx->txn_commit_stats, (double)json_array_size (ops));

work_queue_check_append (ctx, root);
}
else {
Expand Down Expand Up @@ -1920,6 +1926,9 @@
goto error;
}

tstat_push (&ctx->txn_fence_stats,
(double)json_array_size (treq_get_ops (tr)));

work_queue_check_append (ctx, root);
}

Expand Down Expand Up @@ -2040,6 +2049,9 @@
goto error;
}

tstat_push (&ctx->txn_fence_stats,
(double)json_array_size (treq_get_ops (tr)));

work_queue_check_append (ctx, root);
}
}
Expand Down Expand Up @@ -2349,6 +2361,21 @@
return 0;
}

static json_t *get_tstat_obj (tstat_t *ts, double scale)
{
json_t *o = json_pack ("{ s:i s:I s:f s:f s:I }",
"count", tstat_count (ts),
"min", (json_int_t)(tstat_min (ts)*scale),
"mean", tstat_mean (ts)*scale,
"stddev", tstat_stddev (ts)*scale,
"max", (json_int_t)(tstat_max (ts)*scale));
if (!o) {
errno = ENOMEM;
return NULL;

Check warning on line 2374 in src/modules/kvs/kvs.c

View check run for this annotation

Codecov / codecov/patch

src/modules/kvs/kvs.c#L2373-L2374

Added lines #L2373 - L2374 were not covered by tests
}
return o;
}

static void stats_get_cb (flux_t *h,
flux_msg_handler_t *mh,
const flux_msg_t *msg,
Expand All @@ -2357,6 +2384,8 @@
struct kvs_ctx *ctx = arg;
json_t *tstats = NULL;
json_t *cstats = NULL;
json_t *txncstats = NULL;
json_t *txnfstats = NULL;
json_t *nsstats = NULL;
tstat_t ts = { 0 };
int size = 0, incomplete = 0, dirty = 0;
Expand All @@ -2371,12 +2400,7 @@
goto error;
}

if (!(tstats = json_pack ("{ s:i s:f s:f s:f s:f }",
"count", tstat_count (&ts),
"min", tstat_min (&ts)*scale,
"mean", tstat_mean (&ts)*scale,
"stddev", tstat_stddev (&ts)*scale,
"max", tstat_max (&ts)*scale)))
if (!(tstats = get_tstat_obj (&ts, scale)))
goto nomem;

if (!(cstats = json_pack ("{ s:f s:O s:i s:i s:i }",
Expand All @@ -2387,6 +2411,12 @@
"#faults", ctx->faults)))
goto nomem;

if (!(txncstats = get_tstat_obj (&ctx->txn_commit_stats, 1.0)))
goto nomem;

Check warning on line 2415 in src/modules/kvs/kvs.c

View check run for this annotation

Codecov / codecov/patch

src/modules/kvs/kvs.c#L2415

Added line #L2415 was not covered by tests

if (!(txnfstats = get_tstat_obj (&ctx->txn_fence_stats, 1.0)))
goto nomem;

Check warning on line 2418 in src/modules/kvs/kvs.c

View check run for this annotation

Codecov / codecov/patch

src/modules/kvs/kvs.c#L2418

Added line #L2418 was not covered by tests

if (!(nsstats = json_object ()))
goto nomem;

Expand Down Expand Up @@ -2415,13 +2445,18 @@

if (flux_respond_pack (h,
msg,
"{ s:O s:O s:i }",
"{ s:O s:O s:{s:O s:O} s:i }",
"cache", cstats,
"namespace", nsstats,
"transaction-opcount",
"commit", txncstats,
"fence", txnfstats,
"pending_requests", zhashx_size (ctx->requests)) < 0)
flux_log_error (h, "%s: flux_respond_pack", __FUNCTION__);
json_decref (tstats);
json_decref (cstats);
json_decref (txncstats);
json_decref (txnfstats);
json_decref (nsstats);
return;
nomem:
Expand All @@ -2431,6 +2466,8 @@
flux_log_error (h, "%s: flux_respond_error", __FUNCTION__);
json_decref (tstats);
json_decref (cstats);
json_decref (txncstats);
json_decref (txnfstats);

Check warning on line 2470 in src/modules/kvs/kvs.c

View check run for this annotation

Codecov / codecov/patch

src/modules/kvs/kvs.c#L2469-L2470

Added lines #L2469 - L2470 were not covered by tests
json_decref (nsstats);
Comment on lines 2467 to 2471
Copy link
Member

@garlick garlick Jan 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the convenience function added a key to an existing object rather than return an object, it would save on the temp variables.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had thought about that, but that's only for the first case w/ the object stats here.

    if (!(cstats = json_pack ("{ s:f s:O s:i s:i s:i }",                                                                                    
                              "obj size total (MiB)", (double)size/1048576,                                                                 
                              "obj size (KiB)", tstats,                                                                                     
                              "#obj dirty", dirty,                                                                                          
                              "#obj incomplete", incomplete,                                                                                
                              "#faults", ctx->faults)))                                                                                     
        goto nomem;   

Most of the "object' construction is done with a rpc response at the end:

    if (flux_respond_pack (h,                                                                                                               
                           msg,                                                                                                             
                           "{ s:O s:O s:{s:O s:O} s:i }",                                                                                   
                           "cache", cstats,                                                                                                 
                           "namespace", nsstats,                                                                                            
                           "transaction-opcount",                                                                                           
                             "commit", txncstats,                                                                                           
                             "fence", txnfstats,                                                                                            
                           "pending_requests", zhashx_size (ctx->requests)) < 0)                                                            
        flux_log_error (h, "%s: flux_respond_pack", __FUNCTION__);   

we could create a "transaction-ops" object with json_object() and pass it around. It seemed like a "meh" win to me ... feel strongly about it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope :-)

}

Expand All @@ -2443,6 +2480,8 @@
static void stats_clear (struct kvs_ctx *ctx)
{
ctx->faults = 0;
memset (&ctx->txn_commit_stats, '\0', sizeof (ctx->txn_commit_stats));
memset (&ctx->txn_fence_stats, '\0', sizeof (ctx->txn_fence_stats));

if (kvsroot_mgr_iter_roots (ctx->krm, stats_clear_root_cb, NULL) < 0)
flux_log_error (ctx->h, "%s: kvsroot_mgr_iter_roots", __FUNCTION__);
Expand Down
94 changes: 64 additions & 30 deletions t/t1001-kvs-internals.t
Original file line number Diff line number Diff line change
Expand Up @@ -461,36 +461,6 @@ test_expect_success 'kvs: read-your-writes consistency on alt namespace' '
flux kvs namespace remove rywtestns
'

#
# test clear of stats
#

# each store of largeval will increase the noop store count, b/c we
# know that the identical large value will be cached as raw data

test_expect_success 'kvs: clear stats locally' '
flux kvs unlink -Rf $DIR &&
flux module stats -c kvs &&
flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 &&
flux kvs put $DIR.largeval1=$largeval &&
flux kvs put $DIR.largeval2=$largeval &&
! flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 &&
flux module stats -c kvs &&
flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0
'

test_expect_success NO_ASAN 'kvs: clear stats globally' '
flux kvs unlink -Rf $DIR &&
flux module stats -C kvs &&
flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" &&
for i in `seq 0 $((${SIZE} - 1))`; do
flux exec -n -r $i sh -c "flux kvs put $DIR.$i.largeval1=$largeval $DIR.$i.largeval2=$largeval"
done &&
! flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" &&
flux module stats -C kvs &&
flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0"
'

#
# test fence api
#
Expand Down Expand Up @@ -534,6 +504,70 @@ test_expect_success 'kvs: 1 pending requests at end of tests before module remov
test $pendingcount1 -eq 1
'

#
# transaction module stats
#

test_expect_success 'kvs: module stats returns reasonable transaction stats' '
commitdata=$(flux module stats -p transaction-opcount.commit kvs) &&
echo $commitdata | jq -e ".count > 0" &&
echo $commitdata | jq -e ".min > 0" &&
echo $commitdata | jq -e ".max > 0" &&
echo $commitdata | jq -e ".mean > 0.0" &&
echo $commitdata | jq -e ".stddev >= 0.0" &&
fencedata=$(flux module stats -p transaction-opcount.fence kvs) &&
echo $fencedata | jq -e ".count > 0" &&
echo $fencedata | jq -e ".min > 0" &&
echo $fencedata | jq -e ".max > 0" &&
echo $fencedata | jq -e ".mean > 0.0" &&
echo $fencedata | jq -e ".stddev >= 0.0"
'

#
# test clear of stats
#

# each store of largeval will increase the noop store count, b/c we
# know that the identical large value will be cached as raw data

test_expect_success 'kvs: clear stats locally' '
flux kvs unlink -Rf $DIR &&
flux module stats -c kvs &&
flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 &&
flux kvs put $DIR.largeval1=$largeval &&
flux kvs put $DIR.largeval2=$largeval &&
! flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 &&
flux module stats -c kvs &&
flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0
'

test_expect_success 'kvs: clear of transaction stats works' '
commitdata=$(flux module stats -p transaction-opcount.commit kvs) &&
echo $commitdata | jq -e ".count == 0" &&
echo $commitdata | jq -e ".min == 0" &&
echo $commitdata | jq -e ".max == 0" &&
echo $commitdata | jq -e ".mean == 0.0" &&
echo $commitdata | jq -e ".stddev == 0.0" &&
fencedata=$(flux module stats -p transaction-opcount.fence kvs) &&
echo $fencedata | jq -e ".count == 0" &&
echo $fencedata | jq -e ".min == 0" &&
echo $fencedata | jq -e ".max == 0" &&
echo $fencedata | jq -e ".mean == 0.0" &&
echo $fencedata | jq -e ".stddev == 0.0"
'

test_expect_success NO_ASAN 'kvs: clear stats globally' '
flux kvs unlink -Rf $DIR &&
flux module stats -C kvs &&
flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" &&
for i in `seq 0 $((${SIZE} - 1))`; do
flux exec -n -r $i sh -c "flux kvs put $DIR.$i.largeval1=$largeval $DIR.$i.largeval2=$largeval"
done &&
! flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" &&
flux module stats -C kvs &&
flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0"
'

#
# test ENOSYS on unfinished requests when unloading the KVS module
#
Expand Down
Loading