Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kvs: add transaction stats #6556

Merged
merged 4 commits into from
Jan 21, 2025
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions src/modules/kvs/kvs.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ struct kvs_ctx {
char *hash_name;
unsigned int seq; /* for commit transactions */
kvs_checkpoint_t *kcp;
tstat_t txn_commit_stats;
tstat_t txn_fence_stats;
zhashx_t *requests; /* track unfinished requests */
struct list_head work_queue;
};
Expand Down Expand Up @@ -1721,6 +1723,8 @@ static void relaycommit_request_cb (flux_t *h,
goto error;
}

tstat_push (&ctx->txn_commit_stats, (double)json_array_size (ops));

/* N.B. no request tracking for relay. The relay does not get a
* response, only the original via finalize_transaction_bynames().
*/
Expand Down Expand Up @@ -1813,6 +1817,8 @@ static void commit_request_cb (flux_t *h,
goto error;
}

tstat_push (&ctx->txn_commit_stats, (double)json_array_size (ops));

work_queue_check_append (ctx, root);
}
else {
Expand Down Expand Up @@ -1920,6 +1926,9 @@ static void relayfence_request_cb (flux_t *h,
goto error;
}

tstat_push (&ctx->txn_fence_stats,
(double)json_array_size (treq_get_ops (tr)));

work_queue_check_append (ctx, root);
}

Expand Down Expand Up @@ -2040,6 +2049,9 @@ static void fence_request_cb (flux_t *h,
goto error;
}

tstat_push (&ctx->txn_fence_stats,
(double)json_array_size (treq_get_ops (tr)));

work_queue_check_append (ctx, root);
}
}
Expand Down Expand Up @@ -2349,6 +2361,21 @@ static int stats_get_root_cb (struct kvsroot *root, void *arg)
return 0;
}

static json_t *get_tstat_obj (tstat_t *ts, double scale)
{
json_t *o = json_pack ("{ s:i s:I s:f s:f s:I }",
"count", tstat_count (ts),
"min", (json_int_t)(tstat_min (ts)*scale),
"mean", tstat_mean (ts)*scale,
"stddev", tstat_stddev (ts)*scale,
"max", (json_int_t)(tstat_max (ts)*scale));
if (!o) {
errno = ENOMEM;
return NULL;
}
return o;
}

static void stats_get_cb (flux_t *h,
flux_msg_handler_t *mh,
const flux_msg_t *msg,
Expand All @@ -2357,6 +2384,8 @@ static void stats_get_cb (flux_t *h,
struct kvs_ctx *ctx = arg;
json_t *tstats = NULL;
json_t *cstats = NULL;
json_t *txncstats = NULL;
json_t *txnfstats = NULL;
json_t *nsstats = NULL;
tstat_t ts = { 0 };
int size = 0, incomplete = 0, dirty = 0;
Expand All @@ -2371,12 +2400,7 @@ static void stats_get_cb (flux_t *h,
goto error;
}

if (!(tstats = json_pack ("{ s:i s:f s:f s:f s:f }",
"count", tstat_count (&ts),
"min", tstat_min (&ts)*scale,
"mean", tstat_mean (&ts)*scale,
"stddev", tstat_stddev (&ts)*scale,
"max", tstat_max (&ts)*scale)))
if (!(tstats = get_tstat_obj (&ts, scale)))
goto nomem;

if (!(cstats = json_pack ("{ s:f s:O s:i s:i s:i }",
Expand All @@ -2387,6 +2411,12 @@ static void stats_get_cb (flux_t *h,
"#faults", ctx->faults)))
goto nomem;

if (!(txncstats = get_tstat_obj (&ctx->txn_commit_stats, 1.0)))
goto nomem;

if (!(txnfstats = get_tstat_obj (&ctx->txn_fence_stats, 1.0)))
goto nomem;

if (!(nsstats = json_object ()))
goto nomem;

Expand Down Expand Up @@ -2415,13 +2445,18 @@ static void stats_get_cb (flux_t *h,

if (flux_respond_pack (h,
msg,
"{ s:O s:O s:i }",
"{ s:O s:O s:{s:O s:O} s:i }",
"cache", cstats,
"namespace", nsstats,
"transactions",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: make this key more descriptive like transaction-opcount?

"commit", txncstats,
"fence", txnfstats,
"pending_requests", zhashx_size (ctx->requests)) < 0)
flux_log_error (h, "%s: flux_respond_pack", __FUNCTION__);
json_decref (tstats);
json_decref (cstats);
json_decref (txncstats);
json_decref (txnfstats);
json_decref (nsstats);
return;
nomem:
Expand All @@ -2431,6 +2466,8 @@ static void stats_get_cb (flux_t *h,
flux_log_error (h, "%s: flux_respond_error", __FUNCTION__);
json_decref (tstats);
json_decref (cstats);
json_decref (txncstats);
json_decref (txnfstats);
json_decref (nsstats);
Comment on lines 2467 to 2471
Copy link
Member

@garlick garlick Jan 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the convenience function added a key to an existing object rather than return an object, it would save on the temp variables.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had thought about that, but that's only for the first case w/ the object stats here.

    if (!(cstats = json_pack ("{ s:f s:O s:i s:i s:i }",                                                                                    
                              "obj size total (MiB)", (double)size/1048576,                                                                 
                              "obj size (KiB)", tstats,                                                                                     
                              "#obj dirty", dirty,                                                                                          
                              "#obj incomplete", incomplete,                                                                                
                              "#faults", ctx->faults)))                                                                                     
        goto nomem;   

Most of the "object' construction is done with a rpc response at the end:

    if (flux_respond_pack (h,                                                                                                               
                           msg,                                                                                                             
                           "{ s:O s:O s:{s:O s:O} s:i }",                                                                                   
                           "cache", cstats,                                                                                                 
                           "namespace", nsstats,                                                                                            
                           "transaction-opcount",                                                                                           
                             "commit", txncstats,                                                                                           
                             "fence", txnfstats,                                                                                            
                           "pending_requests", zhashx_size (ctx->requests)) < 0)                                                            
        flux_log_error (h, "%s: flux_respond_pack", __FUNCTION__);   

we could create a "transaction-ops" object with json_object() and pass it around. It seemed like a "meh" win to me ... feel strongly about it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope :-)

}

Expand All @@ -2443,6 +2480,8 @@ static int stats_clear_root_cb (struct kvsroot *root, void *arg)
static void stats_clear (struct kvs_ctx *ctx)
{
ctx->faults = 0;
memset (&ctx->txn_commit_stats, '\0', sizeof (ctx->txn_commit_stats));
memset (&ctx->txn_fence_stats, '\0', sizeof (ctx->txn_fence_stats));

if (kvsroot_mgr_iter_roots (ctx->krm, stats_clear_root_cb, NULL) < 0)
flux_log_error (ctx->h, "%s: kvsroot_mgr_iter_roots", __FUNCTION__);
Expand Down