From fad26513918cb47c8f1dca0134d912042cc56a4f Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Wed, 29 Jan 2025 20:28:15 -0500 Subject: [PATCH] Replace MimeReader calls to IndexOf('\n') with new EndOfLine() method This was an attempt at overcoming the performance degradation of using Span in place of raw pointers in MimeReader, specifically for the .NET 4.x frameworks. Unfortunately, this new custom implementation (based on the previous pointer-based optimizations I had written) is actually slightly slower than Span.indexOf('\n'). Need to figure out a better solution... --- MimeKit/MimeReader.cs | 34 ++++++------ MimeKit/Utils/SpanExtensions.cs | 98 +++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 16 deletions(-) create mode 100644 MimeKit/Utils/SpanExtensions.cs diff --git a/MimeKit/MimeReader.cs b/MimeKit/MimeReader.cs index 470eab0480..1b7611b3b2 100644 --- a/MimeKit/MimeReader.cs +++ b/MimeKit/MimeReader.cs @@ -1209,14 +1209,14 @@ static bool IsMboxMarker (byte[] buffer, int startIndex = 0, bool allowMunged = bool StepMboxMarkerStart (ref bool midline) { - var span = input.AsSpan (); + var span = input.AsSpan (0, inputEnd + 1); int index = inputIndex; input[inputEnd] = (byte) '\n'; if (midline) { // we're in the middle of a line, so we need to scan for the end of the line - index = span.Slice (index).IndexOf ((byte) '\n') + index; + index = span.Slice (index).EndOfLine () + index; if (index == inputEnd) { // we don't have enough input data @@ -1238,7 +1238,7 @@ bool StepMboxMarkerStart (ref bool midline) } // scan for the end of the line - index = span.Slice (index).IndexOf ((byte) '\n') + index; + index = span.Slice (index).EndOfLine () + index; if (index == inputEnd) { // we don't have enough data to check for a From line @@ -1261,7 +1261,7 @@ bool StepMboxMarker (out int count) input[inputEnd] = (byte) '\n'; // scan for the end of the line - count = input.AsSpan (inputIndex).IndexOf ((byte) '\n'); + count = input.AsSpan (inputIndex, (inputEnd + 1) - inputIndex).EndOfLine (); int index = inputIndex + count; @@ -1435,13 +1435,14 @@ void StepHeaderField (int headerFieldLength) bool StepHeaderValue (ref bool midline) { + var span = input.AsSpan (0, inputEnd + 1); int index = inputIndex; int nread; input[inputEnd] = (byte) '\n'; while (index < inputEnd && (midline || IsBlank (input[index]))) { - int count = input.AsSpan (index).IndexOf ((byte) '\n'); + int count = span.Slice (index).EndOfLine (); index += count; @@ -1499,8 +1500,8 @@ bool TryCheckBoundaryWithinHeaderBlock () { input[inputEnd] = (byte) '\n'; - var span = input.AsSpan (inputIndex); - int length = span.IndexOf ((byte) '\n'); + var span = input.AsSpan (inputIndex, (inputEnd + 1) - inputIndex); + int length = span.EndOfLine (); if (inputIndex + length == inputEnd) return false; @@ -1710,7 +1711,7 @@ bool InnerSkipLine (bool consumeNewLine) { input[inputEnd] = (byte) '\n'; - int index = input.AsSpan (inputIndex).IndexOf ((byte) '\n') + inputIndex; + int index = input.AsSpan (inputIndex, (inputEnd + 1) - inputIndex).EndOfLine () + inputIndex; if (index < inputEnd) { inputIndex = index; @@ -1858,8 +1859,8 @@ BoundaryType CheckBoundary () { input[inputEnd] = (byte) '\n'; - var span = input.AsSpan (inputIndex); - int length = span.IndexOf ((byte) '\n'); + var span = input.AsSpan (inputIndex, (inputEnd + 1) - inputIndex); + int length = span.EndOfLine (); var line = span.Slice (0, length); return CheckBoundary (inputIndex, line); @@ -1871,8 +1872,8 @@ bool FoundImmediateBoundary (bool final) input[inputEnd] = (byte) '\n'; - var span = input.AsSpan (inputIndex); - int length = span.IndexOf ((byte) '\n'); + var span = input.AsSpan (inputIndex, (inputEnd + 1) - inputIndex); + int length = span.EndOfLine (); var line = span.Slice (0, length); return IsBoundary (line, bounds[0].Marker, boundaryLength); @@ -1907,6 +1908,7 @@ static bool IsMessagePart (ContentType contentType, ContentEncoding? encoding) void ScanContent (ref int nleft, ref bool midline, ref bool[] formats) { + var span = input.AsSpan (0, inputEnd + 1); int length = inputEnd - inputIndex; int startIndex = inputIndex; int index = inputIndex; @@ -1917,13 +1919,13 @@ void ScanContent (ref int nleft, ref bool midline, ref bool[] formats) input[inputEnd] = (byte) '\n'; while (index < inputEnd) { - var span = input.AsSpan (index); + var slice = span.Slice (index); - length = span.IndexOf ((byte) '\n'); + length = slice.EndOfLine (); index += length; if (index < inputEnd) { - var line = span.Slice (0, length); + var line = slice.Slice (0, length); if ((boundary = CheckBoundary (startIndex, line)) != BoundaryType.None) break; @@ -1946,7 +1948,7 @@ void ScanContent (ref int nleft, ref bool midline, ref bool[] formats) break; } - var line = span.Slice (0, length); + var line = slice.Slice (0, length); if ((boundary = CheckBoundary (startIndex, line)) != BoundaryType.None) break; diff --git a/MimeKit/Utils/SpanExtensions.cs b/MimeKit/Utils/SpanExtensions.cs new file mode 100644 index 0000000000..c3b6f85c59 --- /dev/null +++ b/MimeKit/Utils/SpanExtensions.cs @@ -0,0 +1,98 @@ +// +// SpanExtensions.cs +// +// Author: Jeffrey Stedfast +// +// Copyright (c) 2013-2025 .NET Foundation and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace MimeKit.Utils { + static class SpanExtensions + { +#if NET8_0_OR_GREATER + [MethodImpl (MethodImplOptions.AggressiveInlining)] + public static int EndOfLine (this Span span) + { + // Note: Span.IndexOf(byte) is insanely fast in .NET >= 8.0, so use it. + return span.IndexOf ((byte) '\n'); + } +#else + [MethodImpl (MethodImplOptions.AggressiveInlining)] + public unsafe static int EndOfLine (this Span span) + { + fixed (byte* inbuf = &MemoryMarshal.GetReference (span)) { + byte* inptr = inbuf; + + // scan for a linefeed character until we are 4-byte aligned. + switch (((long) inptr) & 0x03) { + case 1: + if (*inptr == (byte) '\n') + return (int) (inptr - inbuf); + inptr++; + goto case 2; + case 2: + if (*inptr == (byte) '\n') + return (int) (inptr - inbuf); + inptr++; + goto case 3; + case 3: + if (*inptr == (byte) '\n') + return (int) (inptr - inbuf); + inptr++; + break; + } + + // -funroll-loops, yippee ki-yay. + uint* dword = (uint*) inptr; + uint mask; + + do { + mask = *dword ^ 0x0A0A0A0A; + mask = ((mask - 0x01010101) & (~mask & 0x80808080)); + + if (mask != 0) + break; + + dword++; + } while (true); + + inptr = (byte*) dword; + int offset = (int) (inptr - inbuf); + + if (inptr[0] == (byte) '\n') + return offset; + + if (inptr[1] == (byte) '\n') + return offset + 1; + + if (inptr[2] == (byte) '\n') + return offset + 2; + + return offset + 3; + } + } +#endif + } +}