Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perf/Stress test fixes #1066

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion e2e/stress/IoTClientPerf/IoTClientPerf.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.1</TargetFramework>
<TargetFramework>netcoreapp2.2</TargetFramework>
<RootNamespace>Microsoft.Azure.Devices.E2ETests</RootNamespace>
</PropertyGroup>

Expand Down
16 changes: 8 additions & 8 deletions e2e/stress/IoTClientPerf/PerfTestRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ public class PerfTestRunner
private readonly int _timeSeconds;
private readonly Func<PerfScenarioConfig, PerfScenario> _scenarioFactory;

private PerfScenario[] _tests;
private Stopwatch _sw = new Stopwatch();
private readonly PerfScenario[] _tests;
private readonly Stopwatch _sw = new Stopwatch();

public PerfTestRunner(
ResultWriter writer,
Expand Down Expand Up @@ -148,13 +148,13 @@ private async Task LoopAsync()
(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
double avgBps = avgRps * _messageSizeBytes;
double stdDevBps = stdDevRps * _messageSizeBytes;
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);

Console.WriteLine($"[{_sw.Elapsed}] Loop Statistics:");
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
Console.WriteLine($"Throughput: {GetHumanReadableBytes(transferPerSec)}/s Avg: {GetHumanReadableBytes(avgBps)}/s +/-StdDev: {GetHumanReadableBytes(avgRps)}/s ");
Console.WriteLine($"Connected : {devConn,10:N0} ");
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine("----");
Console.WriteLine($"TOTALs: ");
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
Expand Down Expand Up @@ -211,12 +211,12 @@ private async Task SetupAllAsync()
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;

(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);

Console.WriteLine($"[{_sw.Elapsed}] Setup Statistics:");
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
Console.WriteLine($"Connected : {devConn,10:N0} ");
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine("----");
Console.WriteLine($"TOTALs: ");
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
Expand Down Expand Up @@ -257,13 +257,13 @@ private async Task TeardownAllAsync()
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;

(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);


Console.WriteLine($"[{_sw.Elapsed}] Teardown Statistics:");
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
Console.WriteLine($"Connected : {devConn,10:N0} ");
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine("----");
Console.WriteLine($"TOTALs: ");
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
Expand Down
17 changes: 0 additions & 17 deletions e2e/stress/IoTClientPerf/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Net.NetworkInformation;

namespace Microsoft.Azure.Devices.E2ETests
{
Expand Down Expand Up @@ -49,21 +47,6 @@ private static Dictionary<string, Tuple<string, Func<PerfScenarioConfig, PerfSce
"Devices receiving method calls from IoT Hub.",
(c) => {return new DeviceMethodTest(c);}) },

{ "device_d2c_noretry",
new Tuple<string, Func<PerfScenarioConfig, PerfScenario>>(
"Like device_d2c but will disable retries and create a new DeviceClient when the previous enters a faulted state.",
(c) => {return new DeviceD2CNoRetry(c);})},

{ "device_c2d_noretry",
new Tuple<string, Func<PerfScenarioConfig, PerfScenario>>(
"Like device_c2d but will disable retries and create a new DeviceClient when the previous enters a faulted state.",
(c) => {return new DeviceC2DNoRetry(c);})},

{ "device_methods_noretry",
new Tuple<string, Func<PerfScenarioConfig, PerfScenario>>(
"Like device_methods but will disable retries and create a new DeviceClient when the previous enters a faulted state.",
(c) => {return new DeviceMethodsNoRetry(c);})},

{"service_c2d",
new Tuple<string, Func<PerfScenarioConfig, PerfScenario>>(
"ServiceClient sending events to devices through IoT Hub.",
Expand Down
2 changes: 1 addition & 1 deletion e2e/stress/IoTClientPerf/Properties/launchSettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"profiles": {
"IoTClientPerf": {
"commandName": "Project",
"commandLineArgs": "-o s:\\tmp\\device.csv -t 60 -n 10 -f device_d2c"
"commandLineArgs": "-o device.csv -t 60 -n 10 -f device_d2c"
}
}
}
5 changes: 5 additions & 0 deletions e2e/stress/IoTClientPerf/Reporting/ResultWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ namespace Microsoft.Azure.Devices.E2ETests
public abstract class ResultWriter
{
protected string _header;
private const string NullInstance = "(null)";

public ResultWriter(string header = null)
{
_header = header;
}

public static string IdOf(object value) => value != null ? value.GetType().Name + "#" + GetHashCode(value) : NullInstance;

public Task WriteAsync(TelemetryMetrics m)
{
return WriteLineAsync(m.ToString());
Expand All @@ -22,5 +25,7 @@ public Task WriteAsync(TelemetryMetrics m)
public abstract Task FlushAsync();

protected abstract Task WriteLineAsync(string s);

private static int GetHashCode(object value) => value?.GetHashCode() ?? 0;
}
}
2 changes: 1 addition & 1 deletion e2e/stress/IoTClientPerf/Reporting/ResultWriterFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace Microsoft.Azure.Devices.E2ETests
{
public class ResultWriterFile : ResultWriter
{
private const long MaximumFileSize = (long)2 * 1024 * 1024 * 1024;
private const long MaximumFileSize = (long)1 * 1024 * 1024 * 1024;
private const int FileBufferBytes = 100 * 1024 * 1024;
private StreamWriter _writer;
private SemaphoreSlim _semaphore = new SemaphoreSlim(1);
Expand Down
47 changes: 30 additions & 17 deletions e2e/stress/IoTClientPerf/Reporting/SystemMetrics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Diagnostics;
using System.Net.NetworkInformation;
using System.Text;
Expand All @@ -12,23 +13,23 @@ namespace Microsoft.Azure.Devices.E2ETests
{
public static class SystemMetrics
{
private const int RefreshIntervalMs = 1000;
private static readonly Stopwatch _sw = new Stopwatch();
private static double s_lastTotalCpuUsageMs = 0.0;
private static int s_cpuPercent;
private const int RefreshIntervalMs = 500;
private static readonly Stopwatch s_sw = new Stopwatch();
private static TimeSpan s_lastProcCpuUsageMs = TimeSpan.Zero;
private static int s_cpuLoad;
private static long s_totalMemoryBytes;
private static long s_lastGcBytes;
private static long s_lastTcpConnections;
private static long s_tcpPortFilter;

private static long s_devicesConnected;

private static object s_lock = new object();
private static readonly object s_lock = new object();

public static void GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devicesConn)
public static void GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devicesConn)
{
EnsureUpToDate();
cpuPercent = s_cpuPercent;
cpuLoad = s_cpuLoad;
memoryBytes = s_totalMemoryBytes;
gcBytes = s_lastGcBytes;
tcpConn = s_lastTcpConnections;
Expand All @@ -52,15 +53,23 @@ public static void TcpFilterPort(int port)

private static void UpdateCpuUsage()
{
var proc = Process.GetCurrentProcess();
double currentTotalCpuUsageMs = proc.TotalProcessorTime.TotalMilliseconds / Environment.ProcessorCount;
double timeDeltaMs = _sw.Elapsed.TotalMilliseconds;
TimeSpan elapsed = s_sw.Elapsed;
Process proc = Process.GetCurrentProcess();

if ((elapsed.Ticks != 0) && (s_lastProcCpuUsageMs != TimeSpan.Zero))
{

double usedTimeDeltaMs = currentTotalCpuUsageMs - s_lastTotalCpuUsageMs;
if (timeDeltaMs > 0.1) s_cpuPercent = (int)(usedTimeDeltaMs * 100 / timeDeltaMs);
if (s_cpuPercent > 100) s_cpuPercent = 100;
TimeSpan currentTotalCpuUsageMs = proc.TotalProcessorTime;
TimeSpan usedTimeDelta = currentTotalCpuUsageMs - s_lastProcCpuUsageMs;

s_lastTotalCpuUsageMs = currentTotalCpuUsageMs;
s_cpuLoad = (int)(((double)usedTimeDelta.Ticks / elapsed.Ticks) * 100);
}
else
{
s_cpuLoad = -1;
}

s_lastProcCpuUsageMs = proc.TotalProcessorTime;
}

private static void UpdateTotalMemoryBytes()
Expand Down Expand Up @@ -91,16 +100,20 @@ private static void UpdateTCPConnections()

private static void EnsureUpToDate()
{
if (!_sw.IsRunning || _sw.ElapsedMilliseconds > RefreshIntervalMs)
if (!s_sw.IsRunning)
{
s_sw.Start();
}
else if (s_sw.ElapsedMilliseconds > RefreshIntervalMs)
{
lock (s_lock)
{
UpdateCpuUsage();
UpdateGCMemoryBytes();
UpdateTCPConnections();
UpdateTotalMemoryBytes();
UpdateCpuUsage();

_sw.Restart();
s_sw.Restart();
}
}
}
Expand Down
15 changes: 12 additions & 3 deletions e2e/stress/IoTClientPerf/Reporting/TelemetryMetrics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public class TelemetryMetrics
public const string DeviceOperationCreate = "device_create";
public const string DeviceOperationOpen = "device_open";
public const string DeviceOperationClose = "device_close";
public const string DeviceOperationDispose = "device_dispose";
public const string DeviceOperationSend = "device_send";
public const string DeviceOperationReceive = "device_receive";
public const string DeviceOperationMethodEnable = "device_method_enable";
Expand All @@ -30,7 +31,7 @@ public class TelemetryMetrics

private static string s_configString; // Contains all Config* parameters.
public int? Id;
public string OperationType; // e.g. OpenAsync / SendAsync, etc
private string OperationType; // e.g. OpenAsync / SendAsync, etc
public double? ScheduleTime;
public double? ExecuteTime;
public string ErrorMessage;
Expand Down Expand Up @@ -62,6 +63,14 @@ public static string GetHeader()
"ErrorMessage";
}

public void Clear(string operationType)
{
OperationType = operationType;
ScheduleTime = null;
ExecuteTime = null;
ErrorMessage = null;
}

public static void SetStaticConfigParameters(
string runId,
int timeSeconds,
Expand All @@ -87,9 +96,9 @@ public override string ToString()
Add(sb, ScheduleTime);
Add(sb, ExecuteTime);

SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);

Add(sb, cpuPercent);
Add(sb, cpuLoad);
Add(sb, memoryBytes);
Add(sb, gcBytes);
Add(sb, tcpConn);
Expand Down
40 changes: 34 additions & 6 deletions e2e/stress/IoTClientPerf/Scenarios/DeviceAllNoRetry.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using Microsoft.Azure.Devices.Client.Exceptions;
using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;

namespace Microsoft.Azure.Devices.E2ETests
{
public class DeviceAllNoRetry : DeviceClientScenario
{
private const int DelaySecondsAfterFailure = 1;
private readonly SemaphoreSlim _lock = new SemaphoreSlim(1);
private Task _sendTask;
private Task _receiveTask;
private Task _waitForMethodTask;
Expand All @@ -28,14 +33,37 @@ public override async Task SetupAsync(CancellationToken ct)

public override async Task RunTestAsync(CancellationToken ct)
{
SetupTasks(ct);
Task completedTask = await Task.WhenAny(_waitForDisconnectTask, _sendTask, _receiveTask, _waitForMethodTask).ConfigureAwait(false);

if (completedTask == _waitForDisconnectTask)
try
{
DisposeDevice();
await SetupAsync(ct).ConfigureAwait(false);
await _lock.WaitAsync().ConfigureAwait(false);
SetupTasks(ct);

Task completedTask = await Task.WhenAny(_waitForDisconnectTask, _sendTask, _receiveTask, _waitForMethodTask).ConfigureAwait(false);

if (completedTask == _waitForDisconnectTask)
{
await DisposeDevice().ConfigureAwait(false);

try
{
// Drain current operations. Method will not be notified in any way of the disconnect.
await Task.WhenAll(_sendTask, _receiveTask).ConfigureAwait(false);
}
catch (IotHubException) { }
catch (OperationCanceledException) { }

_waitForDisconnectTask = null;
_sendTask = null;
_receiveTask = null;
_waitForMethodTask = null;

await Task.Delay(DelaySecondsAfterFailure * 1000).ConfigureAwait(false);
await SetupAsync(ct).ConfigureAwait(false);
}
}
finally
{
_lock.Release();
}
}

Expand Down
50 changes: 0 additions & 50 deletions e2e/stress/IoTClientPerf/Scenarios/DeviceC2DNoRetry.cs

This file was deleted.

Loading