Skip to content

Commit

Permalink
merge changes from mainline
Browse files Browse the repository at this point in the history
  • Loading branch information
aditya-purang committed Aug 8, 2024
1 parent eddc488 commit 2a8de0e
Show file tree
Hide file tree
Showing 19 changed files with 139 additions and 137 deletions.
4 changes: 3 additions & 1 deletion RELEASE_NOTES
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
========================================================================
Amazon CloudWatch Agent 1.300043.0 (2024-08-06)
Amazon CloudWatch Agent 1.300043.0 (2024-08-08)
========================================================================
Bug Fixes:
* [Metrics/Plugin] Drop original metrics support added for statsd, collectd and ethtool metrics
* [Logs/Windows Event] Add windows event log service restart detection and resubscribe
* [Metrics/JMX, Metrics/Net, Metrics/DiskIO] Change cumulative to delta conversion to drop initial value
* [Metrics/JMX] Suppress sessions unit warning

Enhancements:
* [Metrics/JMX] Add cumulative to delta conversion for JMX metrics
Expand Down
1 change: 1 addition & 0 deletions internal/cloudwatch/unit.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ var scaledBaseUnits = map[types.StandardUnit]map[unit.MetricPrefix]types.Standar

var knownNonConvertibleUnits = collections.NewSet(
// JMX/Tomcat units
"sessions",
"errors",
"threads",
"requests",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ const (
Kubernetes = "kubernetes"
Region = "region"
SubnetId = "subnet_id"
RuntimeTagOverride = "DEFAULT"
NeuronExecutionErrorsAggregatedMetric = containerinsightscommon.NeuronExecutionErrors + "_total"
NeuronDeviceHardwareEccEventsAggregatedMetric = containerinsightscommon.NeuronDeviceHardwareEccEvents + "_total"
)
Expand Down Expand Up @@ -121,7 +122,7 @@ func (md *AwsNeuronMetricModifier) ModifyMetric(originalMetric pmetric.Metric, m
}
// Neuron metrics sent by the neuron monitor don't have any units so we add them in the agent.
addUnit(originalMetric)
prefixCoreAndDeviceLabels(originalMetric)
updateCoreDeviceRuntimeLabels(originalMetric)
resetStaleDatapoints(originalMetric)

originalMetricName := originalMetric.Name()
Expand Down Expand Up @@ -248,7 +249,7 @@ func (md *AwsNeuronMetricModifier) extractDatapointsAsMetricsAndAggregate(origin

// This method prefixes NeuronCore and NeuronDevice values with `core` and `device` respectively
// to make the attribute values more verbose
func prefixCoreAndDeviceLabels(originalMetric pmetric.Metric) {
func updateCoreDeviceRuntimeLabels(originalMetric pmetric.Metric) {
dps := originalMetric.Sum().DataPoints()
for i := 0; i < dps.Len(); i++ {
dp := dps.At(i)
Expand All @@ -257,6 +258,7 @@ func prefixCoreAndDeviceLabels(originalMetric pmetric.Metric) {
dp.Attributes().PutStr(attributeKey, attributeValuePrefix+value.Str())
}
}
dp.Attributes().PutStr(RuntimeTag, RuntimeTagOverride)
}
}

Expand Down Expand Up @@ -313,7 +315,7 @@ func resetStaleDatapoints(originalMetric pmetric.Metric) {
dp := dps.At(i)
if dp.ValueType() == pmetric.NumberDataPointValueTypeEmpty || dp.Flags().NoRecordedValue() {
dp.SetDoubleValue(dp.DoubleValue())
dp.Attributes().PutStr(RuntimeTag, "default")
dp.Attributes().PutStr(RuntimeTag, RuntimeTagOverride)
dp.SetFlags(dp.Flags().WithNoRecordedValue(false))
}
}
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ var PodNeuronLabelFilter = map[string]map[string]interface{}{
containerinsightscommon.K8sLabelsKey: nil,
},
internal.Region: nil,
internal.RuntimeTag: nil,
internal.SubnetId: nil,
internal.NeuronCore: nil,
containerinsightscommon.MetricType: nil,
Expand All @@ -132,7 +131,6 @@ var ContainerNeuronLabelFilter = map[string]map[string]interface{}{
containerinsightscommon.K8sLabelsKey: nil,
},
internal.Region: nil,
internal.RuntimeTag: nil,
internal.SubnetId: nil,
internal.NeuronCore: nil,
containerinsightscommon.MetricType: nil,
Expand All @@ -152,7 +150,6 @@ var NodeNeuronLabelFilter = map[string]map[string]interface{}{
containerinsightscommon.K8sLabelsKey: nil,
},
internal.Region: nil,
internal.RuntimeTag: nil,
internal.SubnetId: nil,
internal.NeuronCore: nil,
containerinsightscommon.MetricType: nil,
Expand Down
1 change: 0 additions & 1 deletion plugins/processors/gpuattributes/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,4 +230,3 @@ func dropResourceMetricAttributes(resourceMetric pmetric.ResourceMetrics) {
resourceMetric.Resource().Attributes().Clear()
}
}

40 changes: 23 additions & 17 deletions plugins/processors/gpuattributes/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,15 @@ func TestProcessMetricsForNeuronMetrics(t *testing.T) {
}),
wantMetricCnt: 2,
want: []map[string]string{
// neuron_execution_latency
{
"ClusterName": "cluster",
"Drop": "val",
"percentile": "p50",
"runtime_tag": "DEFAULT",
"kubernetes": "{\"host\":\"test\",\"drop\":\"2\",\"labels\":\"label\"}",
},
// node_neuron_execution_latency
{
"ClusterName": "cluster",
"Type": "NodeAWSNeuron",
Expand All @@ -256,65 +259,66 @@ func TestProcessMetricsForNeuronMetrics(t *testing.T) {
}),
wantMetricCnt: 7,
want: []map[string]string{
// neuroncore_memory_usage_constants
{
"ClusterName": "cluster",
"Drop": "val",
"runtime_tag": "10",
"runtime_tag": "DEFAULT",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"PodName": "testPod",
"ContainerName": "testContainer",
"kubernetes": "{\"host\":\"test\",\"drop\":\"2\",\"labels\":\"label\"}",
},
// container_neuroncore_memory_usage_constants
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "ContainerAWSNeuronCore",
"PodName": "testPod",
"ContainerName": "testContainer",
"kubernetes": "{\"host\":\"test\",\"labels\":\"label\"}",
},
// pod_neuroncore_memory_usage_constants
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "PodAWSNeuronCore",
"PodName": "testPod",
"kubernetes": "{\"host\":\"test\",\"labels\":\"label\"}",
},
// node_neuroncore_memory_usage_constants
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "NodeAWSNeuronCore",
"kubernetes": "{\"host\":\"test\",\"labels\":\"label\"}",
},
// container_neuroncore_memory_usage_total
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "ContainerAWSNeuronCore",
"PodName": "testPod",
"ContainerName": "testContainer",
"kubernetes": "{\"host\":\"test\",\"labels\":\"label\"}",
},
// pod_neuroncore_memory_usage_total
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "PodAWSNeuronCore",
"PodName": "testPod",
"kubernetes": "{\"host\":\"test\",\"labels\":\"label\"}",
},
// node_neuroncore_memory_usage_total
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "NodeAWSNeuronCore",
Expand All @@ -335,25 +339,26 @@ func TestProcessMetricsForNeuronMetrics(t *testing.T) {
}),
wantMetricCnt: 3,
want: []map[string]string{
// neuroncore_memory_usage_constants
{
"ClusterName": "cluster",
"Drop": "val",
"runtime_tag": "10",
"runtime_tag": "DEFAULT",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"kubernetes": "{\"host\":\"test\",\"drop\":\"2\",\"labels\":\"label\"}",
},
// node_neuroncore_memory_usage_constants
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "NodeAWSNeuronCore",
"kubernetes": "{\"host\":\"test\",\"labels\":\"label\"}",
},
// node_neuroncore_memory_usage_total
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "NodeAWSNeuronCore",
Expand All @@ -377,66 +382,67 @@ func TestProcessMetricsForNeuronMetrics(t *testing.T) {
}),
wantMetricCnt: 7,
want: []map[string]string{
// neurondevice_hw_ecc_events
{
"ClusterName": "cluster",
"Drop": "val",
"runtime_tag": "10",
"runtime_tag": "DEFAULT",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"event_type": "mem_ecc_corrected",
"kubernetes": "{\"host\":\"test\",\"drop\":\"2\",\"labels\":\"label\"}",
"PodName": "testPod",
"ContainerName": "testContainer",
},
// container_neurondevice_hw_ecc_events_mem_ecc_corrected
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "ContainerAWSNeuronDevice",
"kubernetes": "{\"host\":\"test\"}",
"PodName": "testPod",
"ContainerName": "testContainer",
},
// pod_neurondevice_hw_ecc_events_mem_ecc_corrected
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "PodAWSNeuronDevice",
"kubernetes": "{\"host\":\"test\"}",
"PodName": "testPod",
},
// node_neurondevice_hw_ecc_events_mem_ecc_corrected
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "NodeAWSNeuronDevice",
"kubernetes": "{\"host\":\"test\"}",
},
// container_neurondevice_hw_ecc_events_total
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "ContainerAWSNeuronDevice",
"kubernetes": "{\"host\":\"test\"}",
"PodName": "testPod",
"ContainerName": "testContainer",
},
// pod_neurondevice_hw_ecc_events_total
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "PodAWSNeuronDevice",
"kubernetes": "{\"host\":\"test\"}",
"PodName": "testPod",
},
// node_neurondevice_hw_ecc_events_total
{
"ClusterName": "cluster",
"runtime_tag": "10",
"NeuronCore": "core0",
"NeuronDevice": "device0",
"Type": "NodeAWSNeuronDevice",
Expand Down
1 change: 1 addition & 0 deletions tool/clean/clean_eks/clean_eks.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ var (
ClustersToClean = []string{
"cwagent-eks-integ-",
"cwagent-operator-helm-integ-",
"cwagent-helm-chart-integ-",
"cwagent-operator-eks-integ-",
}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ processors:
- diskio_iops_in_progress
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ processors:
- diskio_iops_in_progress
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ processors:
- diskio_iops_in_progress
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
4 changes: 2 additions & 2 deletions translator/tocwconfig/sampleConfig/complete_linux_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,14 @@ processors:
- diskio_iops_in_progress
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
cumulativetodelta/jmx:
exclude:
match_type: ""
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
2 changes: 1 addition & 1 deletion translator/tocwconfig/sampleConfig/delta_config_linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ processors:
- diskio_iops_in_progress
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ processors:
match_type: ""
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
2 changes: 1 addition & 1 deletion translator/tocwconfig/sampleConfig/jmx_config_linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ processors:
match_type: ""
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
filter/jmx:
error_mode: propagate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ processors:
- diskio_iops_in_progress
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ processors:
- diskio_iops_in_progress
include:
match_type: ""
initial_value: 0
initial_value: 2
max_staleness: 0s
ec2tagger:
ec2_instance_tag_keys:
Expand Down
Loading

0 comments on commit 2a8de0e

Please sign in to comment.