From 6d52a0961f41b2112b6e09b45bffe926b72ec451 Mon Sep 17 00:00:00 2001 From: luomingmeng Date: Wed, 15 Jan 2025 14:45:22 +0800 Subject: [PATCH] chore(qrm): qrm cpu/memory plugin alloc_failed and get_topology_hints_failed metrics add inplace resizing tag --- pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go | 9 +++++++-- pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go | 9 +++++++-- pkg/agent/qrm-plugins/util/consts.go | 4 ++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go index ea8a4d1d7..c3e2216f6 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go @@ -19,6 +19,7 @@ package dynamicpolicy import ( "context" "fmt" + "strconv" "sync" "time" @@ -695,13 +696,16 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context, defer func() { p.RUnlock() if err != nil { + inplaceUpdateResizing := util.PodInplaceUpdateResizing(req) _ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw, - metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}) + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}, + metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(inplaceUpdateResizing)}) general.ErrorS(err, "GetTopologyHints failed", "podNamespace", req.PodNamespace, "podName", req.PodName, "containerName", req.ContainerName, + "inplaceUpdateResizing", inplaceUpdateResizing, ) } general.InfoS("finished", @@ -844,7 +848,8 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, } else if respErr != nil { _ = p.removeContainer(req.PodUid, req.ContainerName) _ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw, - metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)}) + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)}, + metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(util.PodInplaceUpdateResizing(req))}) } p.Unlock() diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go index 058ad3402..e94423e0e 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "strconv" "sync" "time" @@ -585,12 +586,15 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context, defer func() { p.RUnlock() if err != nil { + inplaceUpdateResizing := util.PodInplaceUpdateResizing(req) _ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw, - metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}) + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}, + metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(inplaceUpdateResizing)}) general.ErrorS(err, "GetTopologyHints failed", "podNamespace", req.PodNamespace, "podName", req.PodName, "containerName", req.ContainerName, + "inplaceUpdateResizing", inplaceUpdateResizing, ) } general.InfoS("finished", @@ -949,7 +953,8 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, } else if respErr != nil { _ = p.removeContainer(req.PodUid, req.ContainerName) _ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw, - metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)}) + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)}, + metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(util.PodInplaceUpdateResizing(req))}) } p.Unlock() diff --git a/pkg/agent/qrm-plugins/util/consts.go b/pkg/agent/qrm-plugins/util/consts.go index 8dd5c9602..58bc451ea 100644 --- a/pkg/agent/qrm-plugins/util/consts.go +++ b/pkg/agent/qrm-plugins/util/consts.go @@ -58,6 +58,10 @@ const ( MetricNameShareCoresNoEnoughResourceFailed = "share_cores_no_enough_resource" ) +const ( + MetricTagNameInplaceUpdateResizing = "inplaceUpdateResizing" +) + // those are OCI property names to be used by QRM plugins const ( OCIPropertyNameCPUSetCPUs = "CpusetCpus"