diff --git a/NetFabric.ForEachEx.Benchmarks/ForEachBenchmarks.cs b/NetFabric.ForEachEx.Benchmarks/ForEachBenchmarks.cs index 3dc62a0..df0630c 100644 --- a/NetFabric.ForEachEx.Benchmarks/ForEachBenchmarks.cs +++ b/NetFabric.ForEachEx.Benchmarks/ForEachBenchmarks.cs @@ -11,13 +11,13 @@ public class ForEachBenchmarks List? list; int[]? array; - [Params(10, 1_000)] + [Params(10, 10_000)] public int Count { get; set; } [GlobalSetup] public void GlobalSetup() { - enumerable = Utils.GetEnumerable(Count); + enumerable = Utils.GetEnumerable(Count, 100); list = enumerable.ToList(); array = enumerable.ToArray(); } diff --git a/NetFabric.ForEachEx.Benchmarks/ForEachExEnumerableBenchmarks.cs b/NetFabric.ForEachEx.Benchmarks/ForEachExEnumerableBenchmarks.cs index 7e18a1f..81526cc 100644 --- a/NetFabric.ForEachEx.Benchmarks/ForEachExEnumerableBenchmarks.cs +++ b/NetFabric.ForEachEx.Benchmarks/ForEachExEnumerableBenchmarks.cs @@ -7,13 +7,13 @@ public class ForEachExEnumerableBenchmarks { int[]? array; - [Params(1_000)] + [Params(10_000)] public int Count { get; set; } [GlobalSetup] public void GlobalSetup() { - var enumerable = Utils.GetEnumerable(Count); + var enumerable = Utils.GetEnumerable(Count, 100); array = enumerable.ToArray(); } diff --git a/NetFabric.ForEachEx.Benchmarks/ForEachVectorExBenchmarks.cs b/NetFabric.ForEachEx.Benchmarks/ForEachVectorExBenchmarks.cs index 3c2115c..5a3df7b 100644 --- a/NetFabric.ForEachEx.Benchmarks/ForEachVectorExBenchmarks.cs +++ b/NetFabric.ForEachEx.Benchmarks/ForEachVectorExBenchmarks.cs @@ -5,13 +5,13 @@ public class ForEachVectorExBenchmarks { int[]? array; - [Params(10, 1_000)] + [Params(10, 10_000)] public int Count { get; set; } [GlobalSetup] public void GlobalSetup() { - var enumerable = Utils.GetEnumerable(Count); + var enumerable = Utils.GetEnumerable(Count, 100); array = enumerable.ToArray(); } diff --git a/NetFabric.ForEachEx.Benchmarks/NetFabric.ForEachEx.Benchmarks.csproj b/NetFabric.ForEachEx.Benchmarks/NetFabric.ForEachEx.Benchmarks.csproj index 32ec1d0..e2b6544 100644 --- a/NetFabric.ForEachEx.Benchmarks/NetFabric.ForEachEx.Benchmarks.csproj +++ b/NetFabric.ForEachEx.Benchmarks/NetFabric.ForEachEx.Benchmarks.csproj @@ -2,7 +2,7 @@ Exe - net8.0 + net7.0 enable enable true diff --git a/NetFabric.ForEachEx.Benchmarks/Program.cs b/NetFabric.ForEachEx.Benchmarks/Program.cs index 7898805..e92d559 100644 --- a/NetFabric.ForEachEx.Benchmarks/Program.cs +++ b/NetFabric.ForEachEx.Benchmarks/Program.cs @@ -1,13 +1,49 @@ -using BenchmarkDotNet.Columns; +using System.Runtime.Intrinsics; +using BenchmarkDotNet.Columns; using BenchmarkDotNet.Configs; using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Environments; using BenchmarkDotNet.Exporters; +using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Reports; using BenchmarkDotNet.Running; +using Perfolizer.Horology; + +var net70 = Job.Default + .WithRuntime(CoreRuntime.Core70) + .WithWarmupCount(1) + .WithIterationTime(TimeInterval.FromSeconds(0.25)) + .WithMaxIterationCount(20); + +var net80 = Job.Default + .WithRuntime(CoreRuntime.Core80) + .WithWarmupCount(1) + .WithIterationTime(TimeInterval.FromSeconds(0.25)) + .WithMaxIterationCount(20); var config = DefaultConfig.Instance .WithSummaryStyle(SummaryStyle.Default.WithRatioStyle(RatioStyle.Trend)) + .HideColumns(Column.Runtime, Column.EnvironmentVariables, Column.RatioSD, Column.Error) .AddDiagnoser(MemoryDiagnoser.Default) - .AddExporter(MarkdownExporter.GitHub); + // .AddDiagnoser(new DisassemblyDiagnoser(new DisassemblyDiagnoserConfig + // (exportGithubMarkdown: true, printInstructionAddresses: false))) + .AddJob(net70.WithEnvironmentVariable("DOTNET_EnableHWIntrinsic", "0").WithId(".NET 7 Scalar").AsBaseline()) + .AddJob(net80.WithEnvironmentVariable("DOTNET_EnableHWIntrinsic", "0").WithId(".NET 8 Scalar")); + +if (Vector256.IsHardwareAccelerated) +{ + config = config + .AddJob(net70.WithId(".NET 7 Vector256")) + .AddJob(net80.WithId(".NET 8 Vector256")) + .AddJob(net70.WithEnvironmentVariable("DOTNET_EnableAVX2", "0").WithId(".NET 7 Vector128")) + .AddJob(net80.WithEnvironmentVariable("DOTNET_EnableAVX2", "0").WithId(".NET 8 Vector128")); + +} +else if (Vector128.IsHardwareAccelerated) +{ + config = config + .AddJob(net70.WithId(".NET 7 Vector128")) + .AddJob(net80.WithId(".NET 8 Vector128")); +} BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config); \ No newline at end of file diff --git a/NetFabric.ForEachEx.Benchmarks/UnrollBenchmarks.cs b/NetFabric.ForEachEx.Benchmarks/UnrollBenchmarks.cs new file mode 100644 index 0000000..1c3c914 --- /dev/null +++ b/NetFabric.ForEachEx.Benchmarks/UnrollBenchmarks.cs @@ -0,0 +1,63 @@ +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +public class UnrollBenchmarks +{ + int[]? array; + + [Params(10, 1_000_000)] + public int Count { get; set; } + + [GlobalSetup] + public void GlobalSetup() + { + var enumerable = Utils.GetEnumerable(Count, 100); + array = enumerable.ToArray(); + } + + [Benchmark(Baseline = true)] + public int Baseline() + { + var sum = 0; + foreach(var item in array!) + sum += item; + return sum; + } + + [Benchmark] + public int Unrolled() + { + var source = array.AsSpan(); + ref var sourceRef = ref MemoryMarshal.GetReference(source); + + var sum = 0; +#if NET7_0_OR_GREATER + var index = nint.Zero; +#else + var index = (nint)0; +#endif + var end = source.Length - (source.Length % 4); + while (index < end) + { + sum += Unsafe.Add(ref sourceRef, index); + sum += Unsafe.Add(ref sourceRef, index + 1); + sum += Unsafe.Add(ref sourceRef, index + 2); + sum += Unsafe.Add(ref sourceRef, index + 3); + + index += 4; + } + + // handle remaining elements + while (index < source.Length) + { + sum += Unsafe.Add(ref sourceRef, index); + + index++; + } + + return sum; + } +} + + diff --git a/NetFabric.ForEachEx.Benchmarks/Utils.cs b/NetFabric.ForEachEx.Benchmarks/Utils.cs index 209f579..451452d 100644 --- a/NetFabric.ForEachEx.Benchmarks/Utils.cs +++ b/NetFabric.ForEachEx.Benchmarks/Utils.cs @@ -1,9 +1,9 @@ static class Utils { - public static IEnumerable GetEnumerable(int count) + public static IEnumerable GetEnumerable(int count, int maxValue) { var random = new Random(42); for (var item = 0; item < count; item++) - yield return random.Next(count); + yield return random.Next(maxValue); } } diff --git a/NetFabric.ForEachEx/ForEachEx.ValueAction.cs b/NetFabric.ForEachEx/ForEachEx.ValueAction.cs index 3917b25..c7a1f1f 100644 --- a/NetFabric.ForEachEx/ForEachEx.ValueAction.cs +++ b/NetFabric.ForEachEx/ForEachEx.ValueAction.cs @@ -17,6 +17,7 @@ public static partial class Extensions /// This method enables custom actions to be applied to each element in a collection efficiently /// by using a value-based action implementation, minimizing overhead. /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ForEachEx(this IEnumerable source, ref TAction action) where TAction : struct, IAction { @@ -98,7 +99,31 @@ public static void ForEachEx(this Span source, ref TAction action public static void ForEachEx(this ReadOnlySpan source, ref TAction action) where TAction : struct, IAction { - foreach (ref readonly var item in source) - action.Invoke(in item); +#if NET7_0_OR_GREATER + var index = nint.Zero; +#else + var index = (nint)0; +#endif + + // use a reference to elide bound chacks + ref var sourceRef = ref MemoryMarshal.GetReference(source); + + // unroll iteration for improved performance + var end = source.Length - (source.Length % 4); + while (index < end) + { + action.Invoke(in Unsafe.Add(ref sourceRef, index)); + action.Invoke(in Unsafe.Add(ref sourceRef, index + 1)); + action.Invoke(in Unsafe.Add(ref sourceRef, index + 2)); + action.Invoke(in Unsafe.Add(ref sourceRef, index + 3)); + index += 4; + } + + // handle remaining elements + while (index < source.Length) + { + action.Invoke(in Unsafe.Add(ref sourceRef, index)); + index++; + } } } diff --git a/NetFabric.ForEachEx/ForEachEx.ValueVectorAction.cs b/NetFabric.ForEachEx/ForEachEx.ValueVectorAction.cs index 120a76d..623be4e 100644 --- a/NetFabric.ForEachEx/ForEachEx.ValueVectorAction.cs +++ b/NetFabric.ForEachEx/ForEachEx.ValueVectorAction.cs @@ -18,6 +18,7 @@ public static partial class Extensions /// This method streamlines the process of iterating through a collection and applying a custom action to each element /// efficiently by leveraging vectorization (SIMD) for enhanced performance on supported types and compatible hardware. /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ForEachVectorEx(this IEnumerable source, ref TAction action) where T : struct where TAction : struct, IVectorAction @@ -104,6 +105,12 @@ public static void ForEachVectorEx(this ReadOnlySpan source, ref where T : struct where TAction : struct, IVectorAction { +#if NET7_0_OR_GREATER + var index = nint.Zero; +#else + var index = (nint)0; +#endif + // Check if hardware acceleration is available and supported data types for SIMD operations. if (Vector.IsHardwareAccelerated && #if NET7_0_OR_GREATER @@ -119,18 +126,31 @@ public static void ForEachVectorEx(this ReadOnlySpan source, ref action.Invoke(in vector); // Calculate the remaining elements after processing vectors. - var remaining = source.Length % Vector.Count; - - // Reduce the source span to the remaining elements for further processing. - source = source[^remaining..]; + index = source.Length - (source.Length % Vector.Count); } // Iterate through the remaining elements (or all elements if not using SIMD operations) // and invoke the action on each individual element. - foreach (ref readonly var item in source) + + // use a reference to elide bound checks + ref var sourceRef = ref MemoryMarshal.GetReference(source); + + // unroll iteration for improved performance + var end = source.Length - (source.Length % 4); + while (index < end) { - action.Invoke(in item); + action.Invoke(in Unsafe.Add(ref sourceRef, index)); + action.Invoke(in Unsafe.Add(ref sourceRef, index + 1)); + action.Invoke(in Unsafe.Add(ref sourceRef, index + 2)); + action.Invoke(in Unsafe.Add(ref sourceRef, index + 3)); + index += 4; } - } + // handle remaining elements + while (index < source.Length) + { + action.Invoke(in Unsafe.Add(ref sourceRef, index)); + index++; + } + } }