Skip to content

Commit

Permalink
Fixed MimeReader/ExperimentalMimeParser to handle really long mbox ma…
Browse files Browse the repository at this point in the history
…rkers

Working on making MimeReader/ExperimentalMimeParser more robust.
  • Loading branch information
jstedfast committed Dec 20, 2024
1 parent b83f08e commit ac8b968
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 37 deletions.
21 changes: 12 additions & 9 deletions MimeKit/AsyncMimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,8 @@ async Task<bool> StepByteOrderMarkAsync (CancellationToken cancellationToken)

async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
{
int mboxMarkerIndex, mboxMarkerLength;
bool midline = false;
bool complete;
int left = 0;

// consume data until we find a line that begins with "From "
do {
Expand All @@ -98,26 +96,31 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
} while (!complete);

var mboxMarkerOffset = GetOffset (inputIndex);
var mboxMarkerLineNumber = lineNumber;

// FIXME: if the mbox marker is > the size of the input buffer, parsing will fail
do {
var available = await ReadAheadAsync (Math.Max (ReadAheadSize, left + 1), 0, cancellationToken).ConfigureAwait (false);
OnMboxMarkerBegin (mboxMarkerOffset, lineNumber, cancellationToken);

if (available <= left) {
do {
if (await ReadAheadAsync (ReadAheadSize, 0, cancellationToken).ConfigureAwait (false) < 1) {
// failed to find the end of the mbox marker; EOF reached
state = MimeParserState.Error;
inputIndex = inputEnd;
return;
}

int startIndex = inputIndex;
int count;

unsafe {
fixed (byte* inbuf = input) {
complete = StepMboxMarker (inbuf, ref left, out mboxMarkerIndex, out mboxMarkerLength);
complete = StepMboxMarker (inbuf, out count);
}
}

// TODO: Remove beginOffset and lineNumber arguments from OnMboxMarkerReadAsync() in v5.0
await OnMboxMarkerReadAsync (input, startIndex, count, mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken).ConfigureAwait (false);
} while (!complete);

await OnMboxMarkerReadAsync (input, mboxMarkerIndex, mboxMarkerLength, mboxMarkerOffset, lineNumber - 1, cancellationToken).ConfigureAwait (false);
OnMboxMarkerEnd (mboxMarkerOffset, mboxMarkerLineNumber, GetOffset (inputIndex), cancellationToken);

state = MimeParserState.MessageHeaders;
}
Expand Down
27 changes: 22 additions & 5 deletions MimeKit/ExperimentalMimeParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,22 @@ void PopEntity ()

#region Mbox Events

/// <summary>
/// Called when an Mbox marker is encountered in the stream.
/// </summary>
/// <remarks>
/// <para>When the stream is specified to be in <see cref="MimeFormat.Mbox"/> format, this method will be called whenever the parser encounters an Mbox marker.</para>
/// <para>It is not necessary to override this method unless it is desirable to track the offsets of mbox markers within a stream or to extract the mbox marker itself.</para>
/// </remarks>
/// <param name="beginOffset">The offset into the stream where the mbox marker begins.</param>
/// <param name="lineNumber">The line number where the mbox marker exists within the stream.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected override void OnMboxMarkerBegin (long beginOffset, int lineNumber, CancellationToken cancellationToken)
{
mboxMarkerOffset = beginOffset;
mboxMarkerLength = 0;
}

/// <summary>
/// Called when an Mbox marker is encountered in the stream.
/// </summary>
Expand All @@ -281,12 +297,13 @@ void PopEntity ()
/// <param name="cancellationToken">The cancellation token.</param>
protected override void OnMboxMarkerRead (byte[] buffer, int startIndex, int count, long beginOffset, int lineNumber, CancellationToken cancellationToken)
{
if (mboxMarkerBuffer.Length < count)
Array.Resize (ref mboxMarkerBuffer, count);
int needed = mboxMarkerLength + count;

Buffer.BlockCopy (buffer, startIndex, mboxMarkerBuffer, 0, count);
mboxMarkerOffset = beginOffset;
mboxMarkerLength = count;
if (mboxMarkerBuffer.Length < needed)
Array.Resize (ref mboxMarkerBuffer, needed);

Buffer.BlockCopy (buffer, startIndex, mboxMarkerBuffer, mboxMarkerLength, count);
mboxMarkerLength += count;
}

#endregion Mbox Events
Expand Down
73 changes: 50 additions & 23 deletions MimeKit/MimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,20 @@ public virtual void SetStream (Stream stream, MimeFormat format = MimeFormat.Def

#region Mbox Events

/// <summary>
/// Called when an Mbox marker is encountered in the stream.
/// </summary>
/// <remarks>
/// <para>When the stream is specified to be in <see cref="MimeFormat.Mbox"/> format, this method will be called whenever the parser encounters an Mbox marker.</para>
/// <para>It is not necessary to override this method unless it is desirable to track the offsets of mbox markers within a stream or to extract the mbox marker itself.</para>
/// </remarks>
/// <param name="beginOffset">The offset into the stream where the mbox marker begins.</param>
/// <param name="lineNumber">The line number where the mbox marker exists within the stream.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected virtual void OnMboxMarkerBegin (long beginOffset, int lineNumber, CancellationToken cancellationToken)
{
}

/// <summary>
/// Called when an Mbox marker is encountered in the stream.
/// </summary>
Expand Down Expand Up @@ -257,6 +271,21 @@ protected virtual Task OnMboxMarkerReadAsync (byte[] buffer, int startIndex, int
return Task.CompletedTask;
}

/// <summary>
/// Called when the end of an Mbox marker is encountered in the stream.
/// </summary>
/// <remarks>
/// <para>When the stream is specified to be in <see cref="MimeFormat.Mbox"/> format, this method will be called whenever the parser encounters the end of an Mbox marker.</para>
/// <para>It is not necessary to override this method unless it is desirable to track the offsets of mbox markers within a stream or to extract the mbox marker itself.</para>
/// </remarks>
/// <param name="beginOffset">The offset into the stream where the mbox marker begins.</param>
/// <param name="lineNumber">The line number where the mbox marker exists within the stream.</param>
/// <param name="endOffset">The offset into the stream where the mbox marker ends.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected virtual void OnMboxMarkerEnd (long beginOffset, int lineNumber, long endOffset, CancellationToken cancellationToken)
{
}

#endregion Mbox Events

#region Header Events
Expand Down Expand Up @@ -1255,11 +1284,10 @@ unsafe bool StepMboxMarkerStart (byte* inbuf, ref bool midline)
return false;
}

unsafe bool StepMboxMarker (byte* inbuf, ref int left, out int mboxMarkerIndex, out int mboxMarkerLength)
unsafe bool StepMboxMarker (byte* inbuf, out int count)
{
byte* inptr = inbuf + inputIndex;
byte* inend = inbuf + inputEnd;
int startIndex = inputIndex;
byte* start = inptr;

*inend = (byte) '\n';
Expand All @@ -1268,19 +1296,18 @@ unsafe bool StepMboxMarker (byte* inbuf, ref int left, out int mboxMarkerIndex,
while (*inptr != (byte) '\n')
inptr++;

count = (int) (inptr - start);

// make sure not to consume the '\r' if it exists
if (inptr > start && *(inptr - 1) == (byte) '\r')
count--;

if (inptr == inend) {
// we don't have enough input data
left = (int) (inptr - start);
mboxMarkerLength = 0;
mboxMarkerIndex = 0;
// we've only consumed a partial mbox marker
inputIndex += count;
return false;
}

var markerLength = (int) (inptr - start);

if (inptr > start && *(inptr - 1) == (byte) '\r')
markerLength--;

// consume the '\n'
inptr++;

Expand All @@ -1289,18 +1316,13 @@ unsafe bool StepMboxMarker (byte* inbuf, ref int left, out int mboxMarkerIndex,
inputIndex += lineLength;
IncrementLineNumber (inputIndex);

mboxMarkerLength = markerLength;
mboxMarkerIndex = startIndex;

return true;
}

unsafe void StepMboxMarker (byte* inbuf, CancellationToken cancellationToken)
{
int mboxMarkerIndex, mboxMarkerLength;
bool midline = false;
bool complete;
int left = 0;

// consume data until we find a line that begins with "From "
do {
Expand All @@ -1317,22 +1339,27 @@ unsafe void StepMboxMarker (byte* inbuf, CancellationToken cancellationToken)
} while (!complete);

var mboxMarkerOffset = GetOffset (inputIndex);
var mboxMarkerLineNumber = lineNumber;

// FIXME: if the mbox marker is > the size of the input buffer, parsing will fail
do {
var available = ReadAhead (Math.Max (ReadAheadSize, left + 1), 0, cancellationToken);
OnMboxMarkerBegin (mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken);

if (available <= left) {
do {
if (ReadAhead (ReadAheadSize, 0, cancellationToken) < 1) {
// failed to find the end of the mbox marker; EOF reached
state = MimeParserState.Error;
inputIndex = inputEnd;
return;
}

complete = StepMboxMarker (inbuf, ref left, out mboxMarkerIndex, out mboxMarkerLength);
int startIndex = inputIndex;
int count;

complete = StepMboxMarker (inbuf, out count);

// TODO: Remove beginOffset and lineNumber arguments from OnMboxMarkerRead() in v5.0
OnMboxMarkerRead (input, startIndex, count, mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken);
} while (!complete);

OnMboxMarkerRead (input, mboxMarkerIndex, mboxMarkerLength, mboxMarkerOffset, lineNumber - 1, cancellationToken);
OnMboxMarkerEnd (mboxMarkerOffset, mboxMarkerLineNumber, GetOffset (inputIndex), cancellationToken);

state = MimeParserState.MessageHeaders;
}
Expand Down
40 changes: 40 additions & 0 deletions UnitTests/ExperimentalMimeParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,46 @@ public async Task TestDoubleMboxMarkerAsync ()
}
}

[Test]
public void TestReallyLongMboxMarker ()
{
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
var marker = "From " + new string ('X', 4092);

using (var stream = new MemoryStream ()) {
var bytes = Encoding.ASCII.GetBytes (marker);
stream.Write (bytes, 0, bytes.Length);
stream.Write (content, 0, content.Length);
stream.Position = 0;

var parser = new ExperimentalMimeParser (stream, MimeFormat.Mbox);
var message = parser.ParseMessage ();

Assert.That (message.Headers.Count, Is.EqualTo (3));
Assert.That (parser.MboxMarker, Is.EqualTo (marker));
}
}

[Test]
public async Task TestReallyLongMboxMarkerAsync ()
{
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
var marker = "From " + new string ('X', 4092);

using (var stream = new MemoryStream ()) {
var bytes = Encoding.ASCII.GetBytes (marker);
stream.Write (bytes, 0, bytes.Length);
stream.Write (content, 0, content.Length);
stream.Position = 0;

var parser = new ExperimentalMimeParser (stream, MimeFormat.Mbox);
var message = await parser.ParseMessageAsync ();

Assert.That (message.Headers.Count, Is.EqualTo (3));
Assert.That (parser.MboxMarker, Is.EqualTo (marker));
}
}

[Test]
public void TestEmptyMessage ()
{
Expand Down
46 changes: 46 additions & 0 deletions UnitTests/MimeParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,52 @@ public async Task TestDoubleMboxMarkerAsync ()
}
}

[Test]
public void TestReallyLongMboxMarker ()
{
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
var marker = "From " + new string ('X', 4092);

using (var stream = new MemoryStream ()) {
var bytes = Encoding.ASCII.GetBytes (marker);
stream.Write (bytes, 0, bytes.Length);
stream.Write (content, 0, content.Length);
stream.Position = 0;

// FIXME: Fix MimeParser to handle this as well as ExperimentalMimeParser?
var parser = new MimeParser (stream, MimeFormat.Mbox);
//var message = parser.ParseMessage ();

//Assert.That (message.Headers.Count, Is.EqualTo (3));
//Assert.That (parser.MboxMarker, Is.EqualTo (marker));

Assert.Throws<FormatException> (() => parser.ParseMessage ());
}
}

[Test]
public void TestReallyLongMboxMarkerAsync ()
{
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
var marker = "From " + new string ('X', 4092);

using (var stream = new MemoryStream ()) {
var bytes = Encoding.ASCII.GetBytes (marker);
stream.Write (bytes, 0, bytes.Length);
stream.Write (content, 0, content.Length);
stream.Position = 0;

// FIXME: Fix MimeParser to handle this as well as ExperimentalMimeParser?
var parser = new MimeParser (stream, MimeFormat.Mbox);
//var message = await parser.ParseMessageAsync ();

//Assert.That (message.Headers.Count, Is.EqualTo (3));
//Assert.That (parser.MboxMarker, Is.EqualTo (marker));

Assert.ThrowsAsync<FormatException> (async () => await parser.ParseMessageAsync ());
}
}

[Test]
public void TestEmptyMessage ()
{
Expand Down

0 comments on commit ac8b968

Please sign in to comment.