diff --git a/mzLib/MassSpectrometry/MsDataScan.cs b/mzLib/MassSpectrometry/MsDataScan.cs index 6e54e05e8..71cc16502 100644 --- a/mzLib/MassSpectrometry/MsDataScan.cs +++ b/mzLib/MassSpectrometry/MsDataScan.cs @@ -27,9 +27,29 @@ namespace MassSpectrometry { public class MsDataScan { - public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder, bool isCentroid, Polarity polarity, double retentionTime, MzRange scanWindowRange, string scanFilter, MZAnalyzerType mzAnalyzer, - double totalIonCurrent, double? injectionTime, double[,] noiseData, string nativeId, double? selectedIonMz = null, int? selectedIonChargeStateGuess = null, double? selectedIonIntensity = null, double? isolationMZ = null, - double? isolationWidth = null, DissociationType? dissociationType = null, int? oneBasedPrecursorScanNumber = null, double? selectedIonMonoisotopicGuessMz = null, string hcdEnergy = null, string scanDescription = null) + public MsDataScan(MzSpectrum massSpectrum, + int oneBasedScanNumber, + int msnOrder, + bool isCentroid, + Polarity polarity, + double retentionTime, + MzRange scanWindowRange, + string scanFilter, + MZAnalyzerType mzAnalyzer, + double totalIonCurrent, + double? injectionTime, + double[,] noiseData, + string nativeId, + double? selectedIonMz = null, + int? selectedIonChargeStateGuess = null, + double? selectedIonIntensity = null, + double? isolationMZ = null, + double? isolationWidth = null, + DissociationType? dissociationType = null, + int? oneBasedPrecursorScanNumber = null, + double? selectedIonMonoisotopicGuessMz = null, + string hcdEnergy = null, + string scanDescription = null) { OneBasedScanNumber = oneBasedScanNumber; MsnOrder = msnOrder; @@ -61,7 +81,7 @@ public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder, /// public MzSpectrum MassSpectrum { get; protected set; } - public int OneBasedScanNumber { get; private set; } + public int OneBasedScanNumber { get; protected set; } public int MsnOrder { get; } public double RetentionTime { get; } public Polarity Polarity { get; } @@ -70,7 +90,7 @@ public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder, public string ScanFilter { get; } public string NativeId { get; private set; } public bool IsCentroid { get; } - public double TotalIonCurrent { get; } + public double TotalIonCurrent { get; protected set; } public double? InjectionTime { get; } public double[,] NoiseData { get; } @@ -82,7 +102,7 @@ public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder, public double? SelectedIonMZ { get; private set; } // May be adjusted by calibration public DissociationType? DissociationType { get; } public double? IsolationWidth { get; } - public int? OneBasedPrecursorScanNumber { get; private set; } + public int? OneBasedPrecursorScanNumber { get; protected set; } public double? SelectedIonMonoisotopicGuessIntensity { get; private set; } // May be refined public double? SelectedIonMonoisotopicGuessMz { get; private set; } // May be refined public string HcdEnergy { get; private set; } diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs index 0129154a4..e5b8ce7bc 100644 --- a/mzLib/MzLibUtil/ClassExtensions.cs +++ b/mzLib/MzLibUtil/ClassExtensions.cs @@ -57,6 +57,18 @@ public static T[] SubArray(this T[] data, int index, int length) return result; } + public static bool ToEnum(this int modeInt, out T result) where T : Enum + { + Type enumType = typeof(T); + if (!Enum.IsDefined(enumType, modeInt)) + { + result = default(T); + return false; + } + result = (T)Enum.ToObject(enumType, modeInt); + return true; + } + /// /// Checks if two collections are equivalent, regardless of the order of their contents /// diff --git a/mzLib/MzLibUtil/MzLibUtil.csproj b/mzLib/MzLibUtil/MzLibUtil.csproj index ae8fef5ea..864dc74cf 100644 --- a/mzLib/MzLibUtil/MzLibUtil.csproj +++ b/mzLib/MzLibUtil/MzLibUtil.csproj @@ -11,7 +11,6 @@ - diff --git a/mzLib/Readers/Bruker/BrukerFileReader.cs b/mzLib/Readers/Bruker/BrukerFileReader.cs index 0c80151e9..ec943a691 100644 --- a/mzLib/Readers/Bruker/BrukerFileReader.cs +++ b/mzLib/Readers/Bruker/BrukerFileReader.cs @@ -378,7 +378,7 @@ private List GetFullStepsTable() /// SQLiteReader object, initialized after the execution of a command. /// Return null exception if there is an error in the data format of the baf file. /// - private T SqlColumnReader(SQLiteDataReader reader) where T: new() + public static T SqlColumnReader(SQLiteDataReader reader) where T: new() { // get all the property names, then iterate over that. // The objects should be exact 1:1 column corresponding so as @@ -516,7 +516,7 @@ private static void ThrowLastBaf2SqlError() } /* ----------------------------------------------------------------------------------------------- */ - private static byte[] ConvertStringToUTF8ByteArray(String input) + public static byte[] ConvertStringToUTF8ByteArray(String input) { byte[] utf8 = Encoding.UTF8.GetBytes(input); var result = new byte[utf8.Length + 1]; diff --git a/mzLib/Readers/MsDataFileReader.cs b/mzLib/Readers/MsDataFileReader.cs index ff577f85b..91c801235 100644 --- a/mzLib/Readers/MsDataFileReader.cs +++ b/mzLib/Readers/MsDataFileReader.cs @@ -20,6 +20,7 @@ public static MsDataFile GetDataFile(string filePath) SupportedFileType.MzML => new Mzml(filePath), SupportedFileType.Mgf => new Mgf(filePath), SupportedFileType.BrukerD => new BrukerFileReader(filePath), + SupportedFileType.BrukerTimsTof => new TimsTofFileReader(filePath), _ => throw new MzLibException("File type not supported"), }; } diff --git a/mzLib/Readers/Readers.csproj b/mzLib/Readers/Readers.csproj index 076ed9e40..ab22f2a05 100644 --- a/mzLib/Readers/Readers.csproj +++ b/mzLib/Readers/Readers.csproj @@ -1,10 +1,11 @@ - + net8.0 x64 enable enable + true @@ -12,6 +13,7 @@ + @@ -21,7 +23,6 @@ - Thermo\ThermoFisher.CommonCore.BackgroundSubtraction.dll @@ -70,6 +71,18 @@ Always + + PreserveNewest + + + PreserveNewest + + + Always + + + Always + diff --git a/mzLib/Readers/Thermo/ThermoRawFileReader.cs b/mzLib/Readers/Thermo/ThermoRawFileReader.cs index 12d8a12b5..48289f254 100644 --- a/mzLib/Readers/Thermo/ThermoRawFileReader.cs +++ b/mzLib/Readers/Thermo/ThermoRawFileReader.cs @@ -526,8 +526,6 @@ private static DissociationType GetDissociationType(ActivationType activationTyp } } - - /// /// Gets all the MS orders of all scans in a dynamic connection. This is useful if you want to open all MS1 scans /// without loading all of the other MSn scans. diff --git a/mzLib/Readers/Util/SupportedFileTypes.cs b/mzLib/Readers/Util/SupportedFileTypes.cs index 371caf455..a960cb794 100644 --- a/mzLib/Readers/Util/SupportedFileTypes.cs +++ b/mzLib/Readers/Util/SupportedFileTypes.cs @@ -12,7 +12,6 @@ public enum SupportedFileType ThermoRaw, MzML, Mgf, - BrukerD, psmtsv, //osmtsv ToppicPrsm, @@ -27,7 +26,9 @@ public enum SupportedFileType MsPathFinderTDecoys, MsPathFinderTAllResults, CruxResult, - ExperimentAnnotation + ExperimentAnnotation, + BrukerD, + BrukerTimsTof } public static class SupportedFileTypeExtensions @@ -51,6 +52,7 @@ public static string GetFileExtension(this SupportedFileType type) SupportedFileType.MzML => ".mzML", SupportedFileType.Mgf => ".mgf", SupportedFileType.BrukerD => ".d", + SupportedFileType.BrukerTimsTof => ".d", SupportedFileType.psmtsv => ".psmtsv", //SupportedFileType.osmtsv => ".osmtsv", SupportedFileType.ToppicPrsm => "_prsm.tsv", @@ -76,7 +78,14 @@ public static SupportedFileType ParseFileType(this string filePath) case ".raw": return SupportedFileType.ThermoRaw; case ".mzml": return SupportedFileType.MzML; case ".mgf": return SupportedFileType.Mgf; - case ".d": return SupportedFileType.BrukerD; + case ".d": + if(!Directory.Exists(filePath)) throw new FileNotFoundException(); + var fileList = Directory.GetFiles(filePath).Select(p => Path.GetFileName(p)); + if (fileList.Any(file => file == "analysis.baf")) + return SupportedFileType.BrukerD; + if (fileList.Any(file => file == "analysis.tdf")) + return SupportedFileType.BrukerTimsTof; + throw new MzLibException("Bruker file type not recognized"); case ".psmtsv": return SupportedFileType.psmtsv; //case ".osmtsv": return SupportedFileType.osmtsv; case ".feature": diff --git a/mzLib/Readers/timsTOF/FrameProxy.cs b/mzLib/Readers/timsTOF/FrameProxy.cs new file mode 100644 index 000000000..00568e3c7 --- /dev/null +++ b/mzLib/Readers/timsTOF/FrameProxy.cs @@ -0,0 +1,334 @@ +using MassSpectrometry; +using System.Runtime.InteropServices; + +namespace Readers +{ + /// + /// Factory class for creating FrameProxy instances and managing frame-related data. + /// + internal class FrameProxyFactory + { + internal FrameTable FramesTable { get; } + internal UInt64 FileHandle { get; } + internal Object FileLock { get; } + internal TimsConversion Converter { get; } + public int MaxIndex { get; init; } + /// + /// Used to convert the tofIndices stored in the .d file to m/z values + /// + public double[] MzLookupArray { get; set; } + /// + /// Used to convert scan number to 1/K0 values + /// + public double[] OneOverK0LookupArray { get; set; } + + internal FrameProxyFactory(FrameTable table, UInt64 fileHandle, Object fileLock, int maxIndex) + { + FramesTable = table; + FileHandle = fileHandle; + FileLock = fileLock; + Converter = new TimsConversion(fileHandle, fileLock); + MaxIndex = maxIndex; + InitializeLookupTables(fileHandle); + } + + internal FrameProxy GetFrameProxy(long frameId) + { + return new FrameProxy(FileHandle, frameId, FramesTable.NumScans[frameId - 1], FileLock, Converter); + } + + internal double[] ConvertIndicesToMz(IList indices) + { + double[] mzArray = new double[indices.Count()]; + for (int idx = 0; idx < indices.Count(); idx++) + { + if (indices[idx] >= MzLookupArray.Length) + throw new ArgumentException("Index out of range"); + mzArray[idx] = MzLookupArray[indices[idx]]; + } + return mzArray; + } + + /// + /// Accesses the file, then stores the index to m/z lookup in the mzLookup array + /// and the index to 1/k0 lookup in the OneOverK0LookupArray + /// + /// + internal void InitializeLookupTables(ulong handle) + { + uint[] lArray = new uint[MaxIndex]; + for (uint i = 0; i < MaxIndex; i++) + { + lArray[i] = i; + } + + // Each frame technically has slightly different index --> m/z mapping + // but in conversations with Sander Willem, I was told that the differences are negligible + // so we can use the median frame to generate the lookup table + long medianFrameId = FramesTable.OneBasedFrameIndex[FramesTable.OneBasedFrameIndex.Length / 2]; + + // Populate the mzLookupArray + double[] mzLookupIndices = Array + .ConvertAll(lArray, entry => (double)entry); + MzLookupArray = Converter.DoTransformation(handle, medianFrameId, mzLookupIndices, ConversionFunctions.IndexToMz); + + // Populate the 1/K0 lookup array + int scanMax = FramesTable.NumScans.Max(); + double[] oneOverK0LookupIndices = Array + .ConvertAll(Enumerable.Range(0, scanMax).ToArray(), entry => (double)entry); + OneOverK0LookupArray = Converter.DoTransformation(handle, medianFrameId, oneOverK0LookupIndices, ConversionFunctions.ScanToOneOverK0); + } + + internal Polarity GetPolarity(long frameId) + { + return FramesTable.Polarity[frameId - 1] == '+' ? Polarity.Positive : Polarity.Negative; + } + + internal double GetOneOverK0(double medianScanNumber) + { + // The lookup array is 0-indexed, so we need to subtract 1 from the scan number + if (medianScanNumber % 1 == 0) + return OneOverK0LookupArray[(int)medianScanNumber - 1]; + else + { + int floor = (int)Math.Floor(medianScanNumber); + int ceil = (int)Math.Ceiling(medianScanNumber); + return (OneOverK0LookupArray[floor - 1] + OneOverK0LookupArray[ceil - 1]) / 2; + } + } + + internal double GetRetentionTime(long frameId) + { + return (double)FramesTable.RetentionTime[frameId - 1]; + } + + internal double GetInjectionTime(long frameId) + { + return FramesTable.FillTime[frameId - 1]; + } + + internal double GetInjectionTimeSum(long firstFrameId, long lastFrameId) + { + double injectionTimeSum = 0; + for(long i = firstFrameId; i <= lastFrameId; i++) + { + injectionTimeSum += FramesTable.FillTime[i - 1]; + } + return injectionTimeSum; + } + } + + /// + /// Proxy class for accessing frame data. Each FrameProxy stores the raw information collected across all + /// ~1000 scans that make up a frame + /// + internal class FrameProxy + { + private int[] _scanOffsets; // Number of peaks that precede a given scan in a frame + /// + /// This is one huge array that stores ALLLL the information for the frame. + /// Specific scans are accessed by determining the number of data points that were collected + /// before the scan took place, then jumping forward by that amount to get the data for that scan + /// + public uint[] _rawData; + /// + /// default size for the raw data array + /// + private const int _defaultBufferSize = 4096; + internal UInt64 FileHandle { get; } + internal long FrameId { get; } + internal int NumberOfScans { get; } + internal TimsConversion Converter { get; } + + internal FrameProxy(UInt64 fileHandle, long frameId, int numScans, Object fileLock, TimsConversion converter) + { + NumberOfScans = numScans; + FileHandle = fileHandle; + FrameId = frameId; + Converter = converter; + + _rawData = GetScanRawData(fileHandle, frameId, (uint)numScans, fileLock); + _scanOffsets = PartialSum(_rawData, 0, numScans); + } + + /// + /// Gets the intensities for the specified scan. + /// + /// Zero-indexed scan number. + /// Array of intensities. + internal int[] GetScanIntensities(int zeroIndexedScanNumber) + { + return Array.ConvertAll(_rawData[GetYRange(zeroIndexedScanNumber)], entry => (int)entry); + } + + /// + /// Gets the indices for the specified scan. + /// + /// Zero-indexed scan number. + /// Array of indices. + internal uint[] GetScanIndices(int zeroIndexedScanNumber) + { + return _rawData[GetXRange(zeroIndexedScanNumber)]; + } + + /// + /// Read a range of scans from a single frame. + /// + /// Output layout: (N = scan_end - scan_begin = number of requested scans) + /// N x uint32_t: number of peaks in each of the N requested scans + /// N x (two uint32_t arrays: first indices, then intensities) + /// + /// Note: different threads must not read scans from the same storage handle + /// concurrently. + /// + internal static uint[] GetScanRawData(UInt64 fileHandle, long frameId, UInt32 numScans, Object fileLock) + { + int bufferSize = _defaultBufferSize; + // buffer expansion loop + while (true) + { + IntPtr pData = Marshal.AllocHGlobal(bufferSize * Marshal.SizeOf()); + try + { + uint outputLength; + + lock (fileLock) + { + outputLength = tims_read_scans_v2( + fileHandle, + frameId, + scan_begin: 0, + scan_end: numScans, + buffer: pData, + length: (uint)(bufferSize * 4)); + } + + if (4 * bufferSize > outputLength) + { + var dataArray = new uint[bufferSize]; + CopyToManaged(pData, dataArray, 0, bufferSize); + + return dataArray; + } + + if (outputLength > 16777216) // Arbitrary 16 mb frame limit + { + throw new Exception("Maximum frame size exceeded"); + } + + // Increase buffer size if necessary + bufferSize = ((int)outputLength / 4) + 1; + } + finally{ Marshal.FreeHGlobal(pData); } + } + } + + /// + /// Returns a range containing the start(inclusive) and end (exclusive) indices + /// for the segment of the _rawData array corresponding to the m/z lookup values for + /// a given scan + /// + /// Throws exception if scan number out of range + internal Range GetXRange(int zeroIndexedScanNumber) + { + ThrowIfInvalidScanNumber(zeroIndexedScanNumber); + return GetScanRange(zeroIndexedScanNumber, offset: 0); + } + + /// + /// Returns a range containing the start(inclusive) and end (exclusive) indices + /// for the segment of the _rawData array corresponding to raw intensity values for a given scan + /// + internal Range GetYRange(int zeroIndexedScanNumber) + { + ThrowIfInvalidScanNumber(zeroIndexedScanNumber); + return GetScanRange(zeroIndexedScanNumber, offset: (int)_rawData[zeroIndexedScanNumber]); + } + + /// Throws exception if scan number out of range + private void ThrowIfInvalidScanNumber(int zeroIndexedScanNumber) + { + if (zeroIndexedScanNumber < 0 || zeroIndexedScanNumber >= NumberOfScans) + throw new ArgumentException("Scan number out of range."); + } + + private Range GetScanRange(int zeroIndexedScanNumber, int offset) + { + int start = NumberOfScans + 2*_scanOffsets[zeroIndexedScanNumber] + offset; + return new Range(start, start + (int)_rawData[zeroIndexedScanNumber]); + } + + /// + /// Calculates the running total of an array, beginning with + /// the start index (inclusive) and ending with the end index (exclusive). + /// Used for determining scan offsets. + /// + /// Array to be summed + /// Where to begin summing + /// Where summing ends (exclusive) + /// An array of length (end - start) containing the + /// partial sums at each index of the input array + public static int[] PartialSum(uint[] array, int start, int end) + { + int runningTotal = 0; + int[] sums = new int[end - start + 1]; + sums[0] = 0; + + for(int i = 0; i < end; i++) + { + runningTotal += (int)array[i]; + sums[i+1] = runningTotal; + } + return sums; + } + + /// + /// This is reimplementation of the Marshal.Copy method that allows for arbitrary types + /// + /// + /// + /// + /// + /// + /// + /// + internal static unsafe void CopyToManaged(IntPtr source, T[] destination, int startIndex, int length) + { + if (source == IntPtr.Zero) throw new ArgumentNullException(nameof(source)); + if (destination is null) throw new ArgumentNullException(nameof(destination)); + if (startIndex < 0) throw new ArgumentOutOfRangeException(nameof(startIndex)); + if (length < 0) throw new ArgumentOutOfRangeException(nameof(length)); + + void* sourcePtr = (void*)source; + Span srcSpan = new Span(sourcePtr, length); + Span destSpan = new Span(destination, startIndex, length); + + srcSpan.CopyTo(destSpan); + } + + + /// + /// Read a range of scans from a single frame. + /// + /// Output layout: (N = scan_end - scan_begin = number of requested scans) + /// N x uint32_t: number of peaks in each of the N requested scans + /// N x (two uint32_t arrays: first indices, then intensities) + /// + /// Note: different threads must not read scans from the same storage handle + /// concurrently. + /// + /// Unique Handle of .d file ( returned on tims_open() ) + /// From .tdf SQLite: Frames.Id + /// first scan number to read (inclusive) + /// Last scan number (exclusive) + /// Destination buffer allocated by user + /// Length of the buffer (in bytes, i.e. 4 * buffer.length) + /// 0 on error, otherwise the number of buffer bytes necessary for the output + /// of this call (if this is larger than the provided buffer length, the result is not + /// complete). + [DllImport("timsdata.dll", CallingConvention = CallingConvention.Cdecl)] + unsafe static extern UInt32 tims_read_scans_v2 + (UInt64 handle, Int64 frame_id, UInt32 scan_begin, UInt32 scan_end, IntPtr buffer, UInt32 length); + + } +} diff --git a/mzLib/Readers/timsTOF/Records.cs b/mzLib/Readers/timsTOF/Records.cs new file mode 100644 index 000000000..34b779ae9 --- /dev/null +++ b/mzLib/Readers/timsTOF/Records.cs @@ -0,0 +1,68 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Readers +{ + internal readonly struct Ms1Record + { + internal int PrecursorId { get; } + internal int ScanStart { get; } + internal int ScanEnd { get; } + internal double ScanMedian { get; } + + public Ms1Record(int precursorId, int scanStart, int scanEnd, double scanMedian) + { + PrecursorId = precursorId; + ScanStart = scanStart; + ScanEnd = scanEnd; + ScanMedian = scanMedian; + } + } + + internal readonly struct PasefRecord + { + internal IEnumerable FrameList { get; } + internal int PrecursorId { get; } + internal int ScanStart { get; } + internal int ScanEnd { get; } + internal double ScanMedian { get; } + internal float IsolationMz { get; } + internal float IsolationWidth { get; } + internal float CollisionEnergy { get; } + internal float MostAbundantPrecursorMz { get; } + internal float PrecursorMonoisotopicMz { get; } + internal int Charge { get; } + internal float PrecursorIntensity { get; } + + public PasefRecord( + IEnumerable frameList, + int precursorId, + int scanStart, + int scanEnd, + double scanMedian, + float isolationMz, + float isolationWidth, + float collisionEnergy, + float mostAbundantPrecursorMz, + float precursorMonoisotopicMz, + int charge, + float precursorIntensity) + { + FrameList = frameList ?? throw new ArgumentNullException(nameof(frameList)); + PrecursorId = precursorId; + ScanStart = scanStart; + ScanEnd = scanEnd; + ScanMedian = scanMedian; + IsolationMz = isolationMz; + IsolationWidth = isolationWidth; + CollisionEnergy = collisionEnergy; + MostAbundantPrecursorMz = mostAbundantPrecursorMz; + PrecursorMonoisotopicMz = precursorMonoisotopicMz; + Charge = charge; + PrecursorIntensity = precursorIntensity; + } + } +} diff --git a/mzLib/Readers/timsTOF/Tables.cs b/mzLib/Readers/timsTOF/Tables.cs new file mode 100644 index 000000000..6e475be39 --- /dev/null +++ b/mzLib/Readers/timsTOF/Tables.cs @@ -0,0 +1,91 @@ +using MzLibUtil; +using System.Data.SQLite; + +namespace Readers +{ + internal enum TimsTofMsMsType + { + MS = 0, + MSMSFragment = 2, + PASEF = 8, + DIA = 9, + PRM = 10 + } + + internal enum TimsTofAcquisitionMode + { + MS = 0, + AutoMSMS = 1, + MRM = 2, + inSourceCID = 3, + broadbandCID = 4, + PASEF = 8, + DIA = 9, + PRM = 10, + Maldi = 20 + } + + /// + /// This class stores information take from the .tdf SQLite database file + /// Every frame in the file has 9 pieces of metadata that can be accessed by + /// selecting the appropriate array. All arrays are zero-based!!! + /// EX: ScanMode[0] will return the scan mode of the first frame (FrameID = 1) in the file + /// + internal class FrameTable + { + internal long[] OneBasedFrameIndex { get; } + internal char[] Polarity { get; } + internal int[] NumScans { get; } + internal int[] ScanMode { get; } + internal int[] MsMsType { get; } + internal int[] TotalNumberOfPeaks { get; } + internal int[] TotalIntensity { get; } + internal float[] RetentionTime { get; } + internal float[] FillTime { get; } + + internal TimsTofMsMsType GetAnalysisType(int frameId) + { + if (frameId == 0 || frameId > MsMsType.Length) throw new IndexOutOfRangeException("Invalid frame ID!"); + if (MsMsType[frameId - 1].ToEnum(out var analysisType)) + return analysisType; + else + throw new MzLibException("Unrecognized MS/MS method."); + } + + internal FrameTable(SQLiteConnection connection, int numberOfRows) + { + using var command = new SQLiteCommand(connection); + command.CommandText = @"SELECT f.Id, f.Polarity, f.NumScans," + + " f.ScanMode, f.MsMsType, f.NumPeaks, f.SummedIntensities," + + " f.Time, f.AccumulationTime FROM Frames f;"; + using var reader = command.ExecuteReader(); + + OneBasedFrameIndex = new long[numberOfRows]; + Polarity = new char[numberOfRows]; + NumScans = new int[numberOfRows]; + ScanMode = new int[numberOfRows]; + MsMsType = new int[numberOfRows]; + TotalNumberOfPeaks = new int[numberOfRows]; + TotalIntensity = new int[numberOfRows]; + RetentionTime = new float[numberOfRows]; + FillTime = new float[numberOfRows]; + + // Populate arrays by reading in the table + for (int i = 0; i < numberOfRows; i++) + { + if (!reader.Read()) break; + OneBasedFrameIndex[i] = reader.GetInt64(0); + Polarity[i] = reader.GetString(1)[0]; + NumScans[i] = reader.GetInt32(2); + ScanMode[i] = reader.GetInt32(3); + MsMsType[i] = reader.GetInt32(4); + TotalNumberOfPeaks[i] = reader.GetInt32(5); + TotalIntensity[i] = reader.GetInt32(6); + RetentionTime[i] = reader.GetFloat(7); + FillTime[i] = reader.GetFloat(8); + } + + } + + } +} diff --git a/mzLib/Readers/timsTOF/TimsConversion.cs b/mzLib/Readers/timsTOF/TimsConversion.cs new file mode 100644 index 000000000..b4b766fc6 --- /dev/null +++ b/mzLib/Readers/timsTOF/TimsConversion.cs @@ -0,0 +1,108 @@ +using Easy.Common.Extensions; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection.Metadata; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading.Tasks; + +namespace Readers +{ + + internal enum ConversionFunctions + { + IndexToMz, + MzToIndex, + ScanToOneOverK0, + OneOverK0ToScan, + ScanToVoltage, + VoltageToScan + } + + internal unsafe class TimsConversion + { + + private UInt64 _fileHandle; + private Object _fileLock; + + internal TimsConversion(UInt64 fileHandle, Object fileLock) + { + _fileHandle = fileHandle; + _fileLock = fileLock; + } + + /// + /// Takes an array of raw values and converts them according to the specified conversion function, + /// returning an equal length array containing the transformed values + /// + /// Unique identifier associated with the open timsTof .d data file + /// Frame identified + /// Double array containing the transformed input values + internal unsafe double[] DoTransformation(UInt64 fileHandle, long frameId, double[] input, ConversionFunctions function) + { + if(!input.IsNotNullOrEmpty()) + { + return Array.Empty(); + } + double[] transformedValues = new double[input.Length]; + fixed (double* inputPtr = &input[0]) + { + IntPtr outPtr = Marshal.AllocHGlobal(input.Length * Marshal.SizeOf()); + try + { + lock (_fileLock) + { + switch (function) + { + case ConversionFunctions.IndexToMz: + tims_index_to_mz(fileHandle, frameId, inputPtr, (double*)outPtr, (UInt32)input.Length); + break; + case ConversionFunctions.MzToIndex: + tims_mz_to_index(fileHandle, frameId, inputPtr, (double*)outPtr, (UInt32)input.Length); + break; + case ConversionFunctions.ScanToOneOverK0: + tims_scannum_to_oneoverk0(fileHandle, frameId, inputPtr, (double*)outPtr, (UInt32)input.Length); + break; + case ConversionFunctions.OneOverK0ToScan: + tims_oneoverk0_to_scannum(fileHandle, frameId, inputPtr, (double*)outPtr, (UInt32)input.Length); + break; + case ConversionFunctions.ScanToVoltage: + tims_scannum_to_voltage(fileHandle, frameId, inputPtr, (double*)outPtr, (UInt32)input.Length); + break; + case ConversionFunctions.VoltageToScan: + tims_voltage_to_scannum(fileHandle, frameId, inputPtr, (double*)outPtr, (UInt32)input.Length); + break; + default: + break; + + } + } + + Marshal.Copy(outPtr, transformedValues, 0, input.Length); + } + finally { Marshal.FreeHGlobal(outPtr); } + } + return transformedValues; + } + + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + unsafe static extern void tims_index_to_mz + (UInt64 fileHandle, Int64 frame_id, double* inputPtr, double* outPtr, UInt32 count); + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + unsafe static extern void tims_mz_to_index + (UInt64 fileHandle, Int64 frame_id, double* inputPtr, double* outPtr, UInt32 count); + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + unsafe static extern void tims_scannum_to_oneoverk0 + (UInt64 fileHandle, Int64 frame_id, double* inputPtr, double* outPtr, UInt32 count); + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + unsafe static extern void tims_oneoverk0_to_scannum + (UInt64 fileHandle, Int64 frame_id, double* inputPtr, double* outPtr, UInt32 count); + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + unsafe static extern void tims_scannum_to_voltage + (UInt64 fileHandle, Int64 frame_id, double* inputPtr, double* outPtr, UInt32 count); + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + unsafe static extern void tims_voltage_to_scannum + (UInt64 fileHandle, Int64 frame_id, double* inputPtr, double* outPtr, UInt32 count); + } +} diff --git a/mzLib/Readers/timsTOF/TimsDataScan.cs b/mzLib/Readers/timsTOF/TimsDataScan.cs new file mode 100644 index 000000000..2a4b8c7fa --- /dev/null +++ b/mzLib/Readers/timsTOF/TimsDataScan.cs @@ -0,0 +1,93 @@ +using MzLibUtil; +using Readers; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace MassSpectrometry +{ + public class TimsDataScan : MsDataScan + { + public int ScanNumberStart { get; } + public int ScanNumberEnd { get; } + public double OneOverK0 { get; } + public int? PrecursorId { get; } + public long FrameId { get; } + /// + /// For PASEF Aggregate scans, contains the list of Frames where the same precursor was samples + /// This is a list of succesive PASEF scans capturing data on the same ion-mobility scan range and quadrupole isolation window + /// + public List FrameIds { get; } + internal int ComponentSpectraTotalPeaks { get; private set; } + + // Need to incorporate scan range somehow + public TimsDataScan(MzSpectrum massSpectrum, + int oneBasedScanNumber, + int msnOrder, + bool isCentroid, + Polarity polarity, + double retentionTime, + MzRange scanWindowRange, + string scanFilter, + MZAnalyzerType mzAnalyzer, + double totalIonCurrent, + double? injectionTime, + double[,] noiseData, + string nativeId, + long frameId, + int scanNumberStart, + int scanNumberEnd, + double medianOneOverK0, + int? precursorId = null, + double? selectedIonMz = null, + int? selectedIonChargeStateGuess = null, + double? selectedIonIntensity = null, + double? isolationMZ = null, + double? isolationWidth = null, + DissociationType? dissociationType = null, + int? oneBasedPrecursorScanNumber = null, + double? selectedIonMonoisotopicGuessMz = null, + string hcdEnergy = null, + List frames = null) : + base(massSpectrum, oneBasedScanNumber, msnOrder, isCentroid, polarity, + retentionTime, scanWindowRange, scanFilter, mzAnalyzer, totalIonCurrent, + injectionTime, noiseData, nativeId, selectedIonMz, selectedIonChargeStateGuess, + selectedIonIntensity, isolationMZ, isolationWidth, dissociationType, + oneBasedPrecursorScanNumber, selectedIonMonoisotopicGuessMz, hcdEnergy) + { + FrameId = frameId; + FrameIds = frames; + ScanNumberStart = scanNumberStart; + ScanNumberEnd = scanNumberEnd; + OneOverK0 = medianOneOverK0; + PrecursorId = precursorId; + ComponentSpectraTotalPeaks = 0; + } + + internal void AverageComponentSpectra(FrameProxyFactory proxyFactory, FilteringParams filteringParams = null) + { + MassSpectrum = TofSpectraMerger.MergeArraysToMs2Spectrum(mzArrays, intensityArrays, filteringParams); + TotalIonCurrent = MassSpectrum.SumOfAllY; + mzArrays.Clear(); + intensityArrays.Clear(); + } + + internal List mzArrays; + internal List intensityArrays; + + internal void AddComponentArrays(double[] mzs, int[] intensities) + { + if (mzArrays == null) + { + mzArrays = new(); + intensityArrays = new(); + } + mzArrays.Add(mzs); + intensityArrays.Add(intensities); + } + + + } +} diff --git a/mzLib/Readers/timsTOF/TimsTofFileReader.cs b/mzLib/Readers/timsTOF/TimsTofFileReader.cs new file mode 100644 index 000000000..8875c89eb --- /dev/null +++ b/mzLib/Readers/timsTOF/TimsTofFileReader.cs @@ -0,0 +1,571 @@ +using System; +using System.Runtime.InteropServices; +using System.Text; +using MassSpectrometry; +using System.Data.SQLite; +using Easy.Common.Extensions; +using MzLibUtil; +using UsefulProteomicsDatabases; +using System.Data.Common; +using Readers; +using System.Data.SqlClient; +using System.Data; +using ThermoFisher.CommonCore.Data.Business; +using Polarity = MassSpectrometry.Polarity; +using System.Security.AccessControl; +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Security.Permissions; +using System.ComponentModel; + +namespace Readers +{ + public class TimsTofFileReader : MsDataFile, IDisposable + { + // timsTOF instruments collect frames, packets of ions collected by the tims, then analyzed + // over multiple scans with each scan corresponding to the same retention time but different + // ion mobility valuess. When reading the file, multiple scans from the same frame are collapsed into + // a single spectrum + + public TimsTofFileReader(string filePath) : base (filePath) { } + + private UInt64? _fileHandle; + private Object _fileLock; + private SQLiteConnection? _sqlConnection; + private int _maxThreads; + public int NumberOfFrames { get; private set; } + public List Ms1FrameIds { get; private set; } + internal FrameProxyFactory FrameProxyFactory { get; private set; } + + // I don't know what the default scan range is, and at this point I'm too afraid to ask... + private MzRange? _scanWindow; + public MzRange ScanWindow => _scanWindow ??= new MzRange(20, 2000); + public const string ScanFilter = "f"; + + public override void InitiateDynamicConnection() + { + if (!File.Exists(FilePath + @"\analysis.tdf") | !File.Exists(FilePath + @"\analysis.tdf_bin")) + { + throw new FileNotFoundException("Data file is missing .tdf and/or .tdf_bin file"); + } + + OpenSqlConnection(); + + if(_fileHandle != null) tims_close((UInt64)_fileHandle); + OpenBinaryFileConnection(); + _fileLock = new(); + + CountFrames(); + BuildProxyFactory(); + } + + internal void OpenSqlConnection() + { + if (_sqlConnection?.State == ConnectionState.Open) + return; + + _sqlConnection = new SQLiteConnection("Data Source=" + + Path.Combine(FilePath, "analysis.tdf") + + "; Version=3"); + try + { + _sqlConnection.Open(); + } + catch (Exception e) + { + throw new MzLibException("Error opening the .tdf file: " + e.Message); + } + } + + internal void OpenBinaryFileConnection() + { + byte[] binaryFileBytePath = BrukerFileReader.ConvertStringToUTF8ByteArray(FilePath); + _fileHandle = tims_open(binaryFileBytePath, 0); + if (_fileHandle == null || _fileHandle == 0) + throw new MzLibException("Could not open the analysis.tdf_bin file"); + } + + public override void CloseDynamicConnection() + { + if (_sqlConnection?.State == ConnectionState.Open) _sqlConnection.Close(); + _sqlConnection?.Dispose(); + if (_fileHandle != null) + { + tims_close((UInt64)_fileHandle); + _fileHandle = null; + } + } + + public void Dispose() + { + CloseDynamicConnection(); + } + + /// + /// WARNING! This method reads in the entire data file before + /// returning the requested scan! It is recommended to call the + /// GetScanFromPrecursorAndFrameIdFromDynamicConnection() + /// + public override MsDataScan GetOneBasedScanFromDynamicConnection(int oneBasedScanNumber, IFilteringParams filterParams = null) + { + if(oneBasedScanNumber <= 0) + throw new IndexOutOfRangeException("Invalid one-based index given when accessing data scans. Index: " + oneBasedScanNumber); + if (Scans != null && Scans.Length >= oneBasedScanNumber && Scans[oneBasedScanNumber - 1] != null) + return Scans[oneBasedScanNumber - 1]; + + LoadAllStaticData(filteringParams: (FilteringParams)filterParams); + if (oneBasedScanNumber > Scans.Length) + throw new IndexOutOfRangeException("Invalid one-based index given when accessing data scans. Index: " + oneBasedScanNumber); + return Scans[oneBasedScanNumber - 1]; + } + + /// + /// Returns a TimsDataScan with the specified frame and precursor id + /// WARNING! The returned data scan will have a OneBasedScanNumber of -1 + /// + /// + /// + /// + /// + /// + /// + /// + public TimsDataScan GetScanFromPrecursorAndFrameIdFromDynamicConnection(int precursorId, int frameId, IFilteringParams filteringParams = null) + { + if(_fileHandle == null || _fileHandle == 0 || _sqlConnection.IsCanceled() || FrameProxyFactory == null) + { + throw new MzLibException("The dynamic connection has not been created yet!"); + } + + var frameType = FrameProxyFactory.FramesTable.GetAnalysisType(frameId); + switch(frameType) + { + case TimsTofMsMsType.MS: + var records = GetMs1Records(frameId); + var recordForPrecursor = records.FirstOrDefault(x => x.PrecursorId == precursorId); + return GetMs1Scan(recordForPrecursor, FrameProxyFactory.GetFrameProxy(frameId), (FilteringParams)filteringParams); + case TimsTofMsMsType.PASEF: + return BuildPasefScanFromPrecursor([precursorId], (FilteringParams)filteringParams).FirstOrDefault(); + default: + throw new NotImplementedException(); + } + } + + internal void CountFrames() + { + if (_sqlConnection == null) return; + using var command = new SQLiteCommand(_sqlConnection); + command.CommandText = @"SELECT COUNT(*) FROM Frames;"; + using var sqliteReader = command.ExecuteReader(); + int count = 0; + while (sqliteReader.Read()) + { + count = sqliteReader.GetInt32(0); + break; + } + NumberOfFrames = count; + } + + internal void CountMS1Frames() + { + if (_sqlConnection == null) return; + using var command = new SQLiteCommand(_sqlConnection); + command.CommandText = @"SELECT f.Id FROM Frames f WHERE f.MsMsType = 0;"; + using var sqliteReader = command.ExecuteReader(); + Ms1FrameIds = new(); + + while (sqliteReader.Read()) + { + Ms1FrameIds.Add(sqliteReader.GetInt64(0)); + } + + } + + /// + /// Builds a new FrameProxyFactory to pull frames from the timsTOF data file + /// and sets the FrameProxyFactory property + /// + /// + internal void BuildProxyFactory() + { + if (_sqlConnection == null || _fileHandle == null) return; + var framesTable = new FrameTable(_sqlConnection, NumberOfFrames); + if (framesTable == null) + throw new MzLibException("Something went wrong while loading the Frames table from the analysis.tdf database."); + + int numberOfIndexedMzs = GetNumberOfDigitizerSamples(); + FrameProxyFactory = new FrameProxyFactory(framesTable, (ulong)_fileHandle, _fileLock, numberOfIndexedMzs); + } + + internal void CountPrecursors() + { + if (_sqlConnection == null) return; + using var command = new SQLiteCommand(_sqlConnection); + command.CommandText = @"SELECT MAX(Id) FROM Precursors;"; + using var sqliteReader = command.ExecuteReader(); + var columns = Enumerable.Range(0, sqliteReader.FieldCount) + .Select(sqliteReader.GetName).ToList(); + long maxPrecursorId = 0; + while (sqliteReader.Read()) + { + maxPrecursorId = sqliteReader.GetInt64(0); + } + Ms1ScanArray = new TimsDataScan[maxPrecursorId]; + PasefScanArray = new TimsDataScan[maxPrecursorId]; + } + + public ConcurrentBag Ms1ScansNoPrecursorsBag { internal get; set; } + public TimsDataScan[] Ms1ScanArray { internal get; set; } + public TimsDataScan[] PasefScanArray { internal get; set; } + + internal int GetNumberOfDigitizerSamples() + { + using var command = new SQLiteCommand(_sqlConnection); + command.CommandText = @"SELECT value FROM GlobalMetadata" + + " WHERE GlobalMetadata.Key = 'DigitizerNumSamples'"; + using var reader = command.ExecuteReader(); + reader.Read(); + return Int32.Parse(reader.GetString(0)); + } + + public override MsDataFile LoadAllStaticData(FilteringParams filteringParams = null, int maxThreads = 1) + { + InitiateDynamicConnection(); + + CountMS1Frames(); + CountPrecursors(); + + _maxThreads = maxThreads; + Ms1ScansNoPrecursorsBag = new(); + Parallel.ForEach( + Partitioner.Create(0, Ms1FrameIds.Count), + new ParallelOptions() { MaxDegreeOfParallelism = _maxThreads }, + (range) => + { + for (int i = range.Item1; i < range.Item2; i++) + { + BuildAllScans(Ms1FrameIds[i], filteringParams); + } + }); + + CloseDynamicConnection(); + AssignOneBasedPrecursorsToPasefScans(); + SourceFile = GetSourceFile(); + return this; + } + + internal void AssignOneBasedPrecursorsToPasefScans() + { + var localMs1Scans = this.Ms1ScanArray.Where(scan => scan != null).OrderBy(scan => scan.FrameId).ThenBy(scan => scan.PrecursorId).ToList(); + var localPasefScans = this.PasefScanArray.Where(scan => scan != null).OrderBy(scan => scan.PrecursorId).ToList(); + var localMs1ScansNoPrecursor = Ms1ScansNoPrecursorsBag.OrderBy(scan => scan.FrameId).ToList(); + TimsDataScan[] scanArray = new TimsDataScan[localMs1Scans.Count*2 + localMs1ScansNoPrecursor.Count]; + + int oneBasedScanIndex = 1; + int pasefScanIndex = 0; + int ms1NoPrecursorIndex = 0; + TimsDataScan? ms1ScanNoPrecursor = localMs1ScansNoPrecursor.IsNotNullOrEmpty() ? localMs1ScansNoPrecursor[ms1NoPrecursorIndex] : null; + //Write the scans to the scanArray and assign scan indices + for (int i = 0; i < localMs1Scans.Count; i++) + { + var ms1Scan = localMs1Scans[i]; + while (ms1ScanNoPrecursor != null && ms1ScanNoPrecursor.FrameId < ms1Scan.FrameId) + { + ms1ScanNoPrecursor.SetOneBasedScanNumber(oneBasedScanIndex); + scanArray[oneBasedScanIndex - 1] = ms1ScanNoPrecursor; + ms1NoPrecursorIndex++; + oneBasedScanIndex++; + ms1ScanNoPrecursor = ms1NoPrecursorIndex < localMs1ScansNoPrecursor.Count ? localMs1ScansNoPrecursor[ms1NoPrecursorIndex] : null; + } + ms1Scan.SetOneBasedScanNumber(oneBasedScanIndex); + scanArray[oneBasedScanIndex - 1] = ms1Scan; + oneBasedScanIndex++; + //if (ms1Scan.PrecursorId == -1) continue; // Continue if the scan didn't have any precursors (as there will be no MS2 scans) + + // This assumes that there is a one to one correspondence between the MS1 scans and the PASEF scans + var pasefScan = localPasefScans[pasefScanIndex]; + while(pasefScan.PrecursorId < ms1Scan.PrecursorId) + { + pasefScanIndex++; + pasefScan = localPasefScans[pasefScanIndex]; + } + if(pasefScan.PrecursorId == ms1Scan.PrecursorId) + { + pasefScan.SetOneBasedPrecursorScanNumber(ms1Scan.OneBasedScanNumber); + pasefScan.SetOneBasedScanNumber(oneBasedScanIndex); + scanArray[oneBasedScanIndex - 1] = pasefScan; + pasefScanIndex++; + oneBasedScanIndex++; + } + } + + if(oneBasedScanIndex < scanArray.Length) + { + // Some MS1 scans contain no peaks where the precursor was identified, so they are not included in the scanArray + scanArray = scanArray.Where(scan => scan != null).ToArray(); + } + + Scans = scanArray; + } + + /// + /// This function will create multiple MS1 scans from each MS1 frame in the timsTOF data file + /// One Ms1 Scan per precursor + /// It will then create an Ms2 scan for each precursor by averaging MS2 spectra for the precursor + /// collected over multiple frames + /// Created scans are then added to their respective scan arrays + /// + /// + /// + internal void BuildAllScans(long frameId, FilteringParams filteringParams) + { + FrameProxy frame = FrameProxyFactory.GetFrameProxy(frameId); + var records = GetMs1Records(frameId); + foreach(Ms1Record record in records) + { + TimsDataScan? dataScan = GetMs1Scan(record, frame, filteringParams); + if (dataScan != null) + { + if (dataScan.PrecursorId > 0) + Ms1ScanArray[(int)dataScan.PrecursorId - 1] = dataScan; + else + Ms1ScansNoPrecursorsBag.Add(dataScan); + } + } + + // Then, build ONE MS2 scan by averaging every PASEF frame that sampled that precursor + var pasefScans = BuildPasefScanFromPrecursor(precursorIds: records.Select(r => r.PrecursorId), filteringParams); + foreach (var scan in pasefScans) + { + if (scan?.PrecursorId != null) + PasefScanArray[(int)scan.PrecursorId - 1] = scan; + } + } + + internal List GetMs1Records(long frameId) + { + List records = new List(); + // Only do this if we have valid precursors (which we don't for like SRM/inclusion list type stuff) + using (var command = new SQLiteCommand(_sqlConnection)) + { + // This command finds all the precursors identified and fragmented in each MS/MS Pasef scan + // It is used to take an MS1 frame and create multiple "MsDataScans" by averaging the + // spectra from each scan within a given Ion Mobility (i.e. ScanNum) range + command.CommandText = + @"SELECT MIN(m.ScanNumBegin), MAX(m.ScanNumEnd), p.ScanNumber, p.Id" + + " FROM Precursors p" + + " INNER JOIN PasefFrameMsMsInfo m on m.Precursor = p.Id" + + " WHERE p.Parent = " + frameId.ToString() + + " GROUP BY p.Id;"; + using var sqliteReader = command.ExecuteReader(); + + while (sqliteReader.Read()) + { + var scanStart = sqliteReader.GetInt32(0); + var scanEnd = sqliteReader.GetInt32(1); + var scanMedian = sqliteReader.GetFloat(2); + int precursorId = sqliteReader.GetInt32(3); + records.Add(new Ms1Record(precursorId, scanStart, scanEnd, (double)scanMedian)); + } + } + // If no records were returned, then no precursors were observed in the frame + // In that case, create a record that contains every scan and a precursorID of -1 + if (records.Count == 0) + records.Add(new Ms1Record(-1, 1, FrameProxyFactory.FramesTable.NumScans[frameId - 1], FrameProxyFactory.FramesTable.NumScans[frameId - 1])); + return records; + } + + internal TimsDataScan? GetMs1Scan(Ms1Record record, FrameProxy frame, FilteringParams filteringParams) + { + List indexArrays = new(); + List intensityArrays = new(); + for (int scan = record.ScanStart; scan < record.ScanEnd; scan++) + { + indexArrays.Add(frame.GetScanIndices(scan-1)); + intensityArrays.Add(frame.GetScanIntensities(scan-1)); + } + // Step 2: Average those suckers + MzSpectrum averagedSpectrum = TofSpectraMerger.MergeArraysToMs1Spectrum(indexArrays, intensityArrays, FrameProxyFactory, filteringParams: filteringParams); + if (averagedSpectrum.Size < 1) + { + return null; + } + // Step 3: Make an MsDataScan bby + var dataScan = new TimsDataScan( + massSpectrum: averagedSpectrum, + oneBasedScanNumber: -1, // This gets adjusted once all data has been read + msnOrder: 1, + isCentroid: true, + polarity: FrameProxyFactory.GetPolarity(frame.FrameId), + retentionTime: FrameProxyFactory.GetRetentionTime(frame.FrameId), + scanWindowRange: ScanWindow, + scanFilter: ScanFilter, + mzAnalyzer: MZAnalyzerType.TOF, + totalIonCurrent: intensityArrays.Sum(array => array.Sum()), + injectionTime: FrameProxyFactory.GetInjectionTime(frame.FrameId), + noiseData: null, + nativeId: "frame=" + frame.FrameId.ToString() + + ";scans=" + record.ScanStart.ToString() + "-" + record.ScanEnd.ToString() + + ";precursor=" + record.PrecursorId.ToString(), + frameId: frame.FrameId, + scanNumberStart: record.ScanStart, + scanNumberEnd: record.ScanEnd, + medianOneOverK0: FrameProxyFactory.GetOneOverK0(record.ScanMedian), + precursorId: record.PrecursorId); + + return dataScan; + } + + internal List BuildPasefScanFromPrecursor(IEnumerable precursorIds, FilteringParams filteringParams) + { + HashSet allFrames = new(); + List pasefScans = new(); + + // Create TimsDataScans with all relevant metadata, but without an mzSpectrum + foreach (PasefRecord record in GetPasefRecords(precursorIds)) + { + allFrames.UnionWith(record.FrameList); + var dataScan = new TimsDataScan( + massSpectrum: null, + oneBasedScanNumber: -1, // This will be adjusted once all scans have been read + msnOrder: 2, + isCentroid: true, + polarity: FrameProxyFactory.GetPolarity(record.FrameList.First()), + retentionTime: FrameProxyFactory.GetRetentionTime(record.FrameList.First()), + scanWindowRange: ScanWindow, + scanFilter: ScanFilter, + mzAnalyzer: MZAnalyzerType.TOF, + totalIonCurrent: -1, // Will be set later + injectionTime: FrameProxyFactory.GetInjectionTimeSum(record.FrameList.First(), record.FrameList.Last()), + noiseData: null, + nativeId: "frames=" + record.FrameList.First().ToString() + "-" + record.FrameList.Last().ToString() + + ";scans=" + record.ScanStart.ToString() + "-" + record.ScanEnd.ToString(), + frameId: record.FrameList.First(), + scanNumberStart: record.ScanStart, + scanNumberEnd: record.ScanEnd, + medianOneOverK0: FrameProxyFactory.GetOneOverK0(record.ScanMedian), // Needs to be set later + precursorId: record.PrecursorId, + selectedIonMz: record.MostAbundantPrecursorMz, + selectedIonChargeStateGuess: record.Charge, + selectedIonIntensity: record.PrecursorIntensity, + isolationMZ: record.IsolationMz, + isolationWidth: record.IsolationWidth, + dissociationType: DissociationType.CID, + oneBasedPrecursorScanNumber: -1, // This will be set later + selectedIonMonoisotopicGuessMz: record.PrecursorMonoisotopicMz, + hcdEnergy: record.CollisionEnergy.ToString(), + frames: record.FrameList.ToList()); + pasefScans.Add(dataScan); + } + + // Grab all fragmentation spectra for each precursor + // Each TimsDataScan in pasefScans corresponds to one precursor. + // A precursor can be isolated and fragmented in multiple pasef frames + // Here, we iterate through each frame, averaging the scans that correspond to each precursor + foreach (long frameId in allFrames) + { + FrameProxy frame = FrameProxyFactory.GetFrameProxy(frameId); + //Iterate through all the datascans created above with this frame + foreach (var scan in pasefScans) + { + if (scan.FrameIds.Contains(frameId)) + { + List indexArrays = new(); + List intensityArrays = new(); + for (int mobilityScanIdx = scan.ScanNumberStart; mobilityScanIdx < scan.ScanNumberEnd; mobilityScanIdx++) + { + indexArrays.Add(frame.GetScanIndices(mobilityScanIdx-1)); + intensityArrays.Add(frame.GetScanIntensities(mobilityScanIdx-1)); + } + // Perform frame level averaging, where all scans from one frame associated with a given precursor are merged and centroided + // Need to convert indexArrays to one uint[] and intensityArrays to one int[] + (double[] Mzs, int[] Intensities) summedArrays = TofSpectraMerger.MergeArraysToMzArray(indexArrays, intensityArrays, FrameProxyFactory); + scan.AddComponentArrays(summedArrays.Mzs, summedArrays.Intensities); + } + } + } + + // Now, we average the fragmentation spectra (each spectra originating in a different frame) + // to yield one spectrum per precursor + foreach (TimsDataScan scan in pasefScans) + { + scan.AverageComponentSpectra(FrameProxyFactory, filteringParams); + } + + return pasefScans; + } + + internal IEnumerable GetPasefRecords(IEnumerable precursorIds) + { + using (var command = new SQLiteCommand(_sqlConnection)) + { + string multiplePrecursorString = "(" + + String.Join(',', precursorIds.Select(id => "\'" + id.ToString() + "\'")) + + ")"; + // SQL Command for getting some info from both PasefFrameMsMsInfo table and + // Precursors table + command.CommandText = + @"SELECT GROUP_CONCAT(m.Frame), m.ScanNumBegin, m.ScanNumEnd, m.IsolationMz, m.IsolationWidth," + + " m.CollisionEnergy, p.LargestPeakMz, p.MonoisotopicMz, p.Charge, p.Intensity, p.ScanNumber, p.Id" + + " FROM PasefFrameMsMsInfo m" + + " INNER JOIN Precursors p on m.Precursor = p.Id" + + " WHERE m.Precursor IN " + multiplePrecursorString + + " GROUP BY m.Precursor;"; + + using var sqliteReader = command.ExecuteReader(); + + // Each call to read returns the information associated with a given precursor + while (sqliteReader.Read()) + { + var frameList = sqliteReader.GetString(0).Split(',').Select(id => Int64.Parse(id)); + var scanStart = sqliteReader.GetInt32(1); + var scanEnd = sqliteReader.GetInt32(2); + var isolationMz = sqliteReader.GetFloat(3); + var isolationWidth = sqliteReader.GetFloat(4); + var collisionEnergy = sqliteReader.GetFloat(5); + var mostAbundantPrecursorPeak = sqliteReader.GetFloat(6); + float precursorMonoisotopicMz = sqliteReader.IsDBNull(7) ? isolationMz : sqliteReader.GetFloat(7); + int charge = sqliteReader.IsDBNull(8) ? 1 : sqliteReader.GetInt32(8); + var precursorIntensity = sqliteReader.GetFloat(9); + var scanMedian = sqliteReader.GetFloat(10); + var precursorId = sqliteReader.GetInt32(11); + + yield return new PasefRecord(frameList, precursorId, scanStart, scanEnd, scanMedian, isolationMz, isolationWidth, collisionEnergy, mostAbundantPrecursorPeak, precursorMonoisotopicMz, charge, precursorIntensity); + } + } + } + + private const string nativeIdFormat = "Frame ID + scan number range format"; + private const string massSpecFileFormat = ".D format"; + public override SourceFile GetSourceFile() + { + // append the analysis.baf because the constructor for SourceFile will look for the + // parent directory. + string fileName = FilePath + @"\analysis.tdf"; + return new SourceFile(nativeIdFormat, massSpecFileFormat, + null, null, id: null, filePath: fileName); + } + + #region Bruker Dll Functions + + /// + /// Returns a unique handle that references an open timsTOF data file + /// + /// + /// + /// + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + public static extern UInt64 tims_open + (byte[] analysis_directory_name_utf8, UInt32 use_recalibrated_state); + + /// + /// Closes a file connection to a .tdf binary file + /// + [DllImport("timsdata.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] + public static extern void tims_close + (UInt64 fileHandle); + + #endregion Bruker Dll Functions + + } +} diff --git a/mzLib/Readers/timsTOF/TofSpectraMerger.cs b/mzLib/Readers/timsTOF/TofSpectraMerger.cs new file mode 100644 index 000000000..afab2082c --- /dev/null +++ b/mzLib/Readers/timsTOF/TofSpectraMerger.cs @@ -0,0 +1,384 @@ +using Easy.Common.Extensions; +using MassSpectrometry; +using MzLibUtil; + +namespace Readers +{ + public static class TofSpectraMerger + { + public static readonly double DefaultPpmTolerance = 10; + + #region IndexLevelOperations + // The following methods are used to merge and collapse index arrays and intensity arrays + // The timsTOF data format doesn't store m/z values directly, but rather indices in a lookup table where the mz values are stored + // Keeping these indices as ints allows for more efficient storage and processing of the data + + /// + /// Merges two index and intensity arrays using a two-pointer technique. + /// The merged arrays are sorted by index, ascending + /// + /// First index array. + /// Second index array. + /// First intensity array. + /// Second intensity array. + /// A tuple containing the merged indices and intensities. + public static (uint[] Indices, int[] Intensities) TwoPointerMerge(uint[] indexArray1, uint[] indexArray2, int[] intensityArray1, int[] intensityArray2) + { + int p1 = 0; + int p2 = 0; + + uint[] mergedIndices = new uint[indexArray1.Length + indexArray2.Length]; + int[] mergedIntensities = new int[intensityArray1.Length + intensityArray2.Length]; + + while (p1 < indexArray1.Length || p2 < indexArray2.Length) + { + if (p1 == indexArray1.Length) + { + while (p2 < indexArray2.Length) + { + mergedIndices[p1 + p2] = indexArray2[p2]; + mergedIntensities[p1 + p2] = intensityArray2[p2]; + p2++; + } + } + else if (p2 == indexArray2.Length) + { + while (p1 < indexArray1.Length) + { + mergedIndices[p1 + p2] = indexArray1[p1]; + mergedIntensities[p1 + p2] = intensityArray1[p1]; + p1++; + } + } + else if (indexArray1[p1] < indexArray2[p2]) + { + mergedIndices[p1 + p2] = indexArray1[p1]; + mergedIntensities[p1 + p2] = intensityArray1[p1]; + p1++; + } + else + { + mergedIndices[p1 + p2] = indexArray2[p2]; + mergedIntensities[p1 + p2] = intensityArray2[p2]; + p2++; + } + } + + return (mergedIndices, mergedIntensities); + } + + /// + /// Collapses the given index and intensity arrays. + /// Adjacent index values (and their corresponding intensity values) are merged. + /// The idea here is to centroid a spectrum + /// + /// The index array to collapse. + /// The intensity array to collapse. + /// A tuple containing the collapsed indices and intensities. + public static (uint[] Indices, int[] Intensities) CollapseArrays(uint[] indexArray, int[] intensityArray) + { + // Define lists to store the collapsed indices and intensities + List collapsedIndices = new List(indexArray.Length); + List collapsedIntensities = new List(intensityArray.Length); + + // Initialize pointers to the first two elements in the index array + int p1 = 0; + int p2 = 1; + while (p1 < indexArray.Length) + { + uint currentIdx = indexArray[p1]; + + // Find clusters of indices that are close together + // increment pointer 2 until the cluster ends and we're further than 3 indices away + while (p2 < indexArray.Length && (2 + currentIdx) >= indexArray[p2]) + { + p2++; + } + p2--; // Move the pointer back by one + int medianPointer = (p1 + p2) / 2; + // Use the median index in each cluster as the collapsed index + collapsedIndices.Add(indexArray[medianPointer]); + + // Sum the intensities in each cluster to get the collapsed intensity + int summedIntensity = 0; + for (int i = p1; i <= p2; i++) + { + summedIntensity += intensityArray[i]; + } + collapsedIntensities.Add(summedIntensity); + + // Move the pointers forward + p1 = p2 + 1; + p2 = p1 + 1; + } + + collapsedIndices.TrimExcess(); + collapsedIntensities.TrimExcess(); + + return (collapsedIndices.ToArray(), collapsedIntensities.ToArray()); + } + + #endregion + #region MzLevelOperations + + internal static MzSpectrum CreateFilteredSpectrum(IList mzs, IList intensities, + FilteringParams filteringParams = null, int msnLevel = 1) + { + double[] mzsArray; + if (mzs is double[]) + mzsArray = (double[])mzs; + else + mzsArray = mzs.ToArray(); + + // Convert the intensities to an array + double[] intensitiesArray = intensities.Select(intensity => (double)intensity).ToArray(); + + if (mzsArray.Length != intensitiesArray.Length) + throw new Exception("Collapsed m/z and intensity arrays are not the same length."); + + if (filteringParams != null + && mzsArray.Length > 0 + && ((filteringParams.ApplyTrimmingToMs1 && msnLevel == 1) + || (filteringParams.ApplyTrimmingToMsMs && msnLevel > 1))) + { + WindowModeHelper.Run(ref intensitiesArray, + ref mzsArray, filteringParams, + mzsArray[0], mzsArray[^1]); + } + // TODO: This would be more performant if we kept the intensities as ints + return new MzSpectrum(mzsArray, intensitiesArray, shouldCopy: false); + } + + /// + /// Merges multiple index and intensity arrays into an MS1 spectrum. + /// This operation is somewhere between averaging and centroiding + /// In the TimsTofFileReader, MS1 scans are kept as index arrays and intensity arrays. + /// + /// List of index arrays. + /// List of intensity arrays. + /// Frame proxy factory. + /// Filtering parameters (optional). + /// A merged MS1 spectrum. + internal static MzSpectrum MergeArraysToMs1Spectrum( + List indexArrays, + List intensityArrays, + FrameProxyFactory proxyFactory, + FilteringParams filteringParams = null) + { + if (!indexArrays.IsNotNullOrEmpty() || intensityArrays == null || intensityArrays.Count() != indexArrays.Count()) + return null; + + // Merge all index arrays and intensity arrays into a single array + uint[] combinedIndices = indexArrays[0]; + int[] combinedIntensities = intensityArrays[0]; + for (int i = 1; i < indexArrays.Count(); i++) + { + var mergeResults = TwoPointerMerge(combinedIndices, indexArrays[i], combinedIntensities, intensityArrays[i]); + combinedIndices = mergeResults.Indices; + combinedIntensities = mergeResults.Intensities; + } + + // Collapse the combined arrays into a single array (centroiding, more or less) + var centroidedResults = CollapseArrays(proxyFactory.ConvertIndicesToMz(combinedIndices), combinedIntensities); + + return CreateFilteredSpectrum( + centroidedResults.Mzs, + centroidedResults.Intensities, + filteringParams, + msnLevel: 1); + } + + /// + /// Merges multiple m/z and intensity arrays into an MS2 spectrum. + /// This operation is somewhere between averaging and centroiding. + /// In the TimsTofFileReader, MS2 component spectrum are stored as + /// double[] m/z arrays and int[] intensity arrays. + /// Each component scan + /// + /// List of m/z arrays. + /// List of intensity arrays. + /// Filtering parameters (optional). + /// PPM tolerance value (default is -1). + /// A merged MS2 spectrum. + internal static MzSpectrum MergeArraysToMs2Spectrum( + List mzArrays, + List intensityArrays, + FilteringParams filteringParams = null, + double ppmTolerance = -1) + { + if (!mzArrays.IsNotNullOrEmpty() || intensityArrays == null || intensityArrays.Count() != mzArrays.Count()) + return null; + + // Merge all index arrays and intensity arrays into a single array + double[] combinedMzs = mzArrays[0]; + int[] combinedIntensities = intensityArrays[0]; + for (int i = 1; i < mzArrays.Count(); i++) + { + var mergeResults = TwoPointerMerge(combinedMzs, mzArrays[i], combinedIntensities, intensityArrays[i]); + combinedMzs = mergeResults.Mzs; + combinedIntensities = mergeResults.Intensities; + } + + // Collapse the combined arrays into a single array (centroiding, more or less) + var centroidedResults = CollapseArrays(combinedMzs, combinedIntensities, ppmTolerance); + + return CreateFilteredSpectrum( + centroidedResults.Mzs, + centroidedResults.Intensities, + filteringParams, + msnLevel: 2); + } + + /// + /// Merges two m/z and intensity arrays using a two-pointer technique. + /// Used when merging component spectra into one MS2 spectrum + /// + /// First m/z array. + /// Second m/z array. + /// First intensity array. + /// Second intensity array. + /// A tuple containing the merged m/z values and intensities. + public static (double[] Mzs, int[] Intensities) TwoPointerMerge(double[] mzArray1, double[] mzArray2, int[] intensityArray1, int[] intensityArray2) + { + int p1 = 0; + int p2 = 0; + + double[] mergedMzs = new double[mzArray1.Length + mzArray2.Length]; + int[] mergedIntensities = new int[intensityArray1.Length + intensityArray2.Length]; + + while (p1 < mzArray1.Length || p2 < mzArray2.Length) + { + if (p1 == mzArray1.Length) + { + while (p2 < mzArray2.Length) + { + mergedMzs[p1 + p2] = mzArray2[p2]; + mergedIntensities[p1 + p2] = intensityArray2[p2]; + p2++; + } + } + else if (p2 == mzArray2.Length) + { + while (p1 < mzArray1.Length) + { + mergedMzs[p1 + p2] = mzArray1[p1]; + mergedIntensities[p1 + p2] = intensityArray1[p1]; + p1++; + } + } + else if (mzArray1[p1] < mzArray2[p2]) + { + mergedMzs[p1 + p2] = mzArray1[p1]; + mergedIntensities[p1 + p2] = intensityArray1[p1]; + p1++; + } + else + { + mergedMzs[p1 + p2] = mzArray2[p2]; + mergedIntensities[p1 + p2] = intensityArray2[p2]; + p2++; + } + } + + return (mergedMzs, mergedIntensities); + } + + /// + /// Collapses the given mz and intensity arrays. + /// mz values within ppmTolerance (and their corresponding intensity values) are merged. + /// The idea here is to centroid a spectrum + /// + /// The mz array to collapse. + /// The intensity array to collapse. + /// /// PPM tolerance value (default is 10). + /// A tuple containing the collapsed mz and intensities. + internal static (double[] Mzs, int[] Intensities) CollapseArrays(double[] mzArray, int[] intensityArray, double ppmTolerance = 10) + { + // Define lists to store the collapsed indices and intensities + List collapsedMzs = new(); + List collapsedIntensities = new(); + + PpmTolerance tol = new(ppmTolerance < 1 ? DefaultPpmTolerance : ppmTolerance); + + // Initialize pointers to the first two elements in the index array + int p1 = 0; + int p2 = 1; + while (p1 < mzArray.Length) + { + double currentMz = mzArray[p1]; + double upperBoundMz = tol.GetMaximumValue(currentMz); + + // Find clusters of indices that are close together + // increment pointer 2 until the cluster ends and we're further than 3 indices away + while (p2 < mzArray.Length && upperBoundMz >= mzArray[p2]) + { + upperBoundMz = tol.GetMaximumValue(mzArray[p2]); + p2++; + } + p2--; // Move the pointer back by one + + if(p1 == p2) + { + collapsedIntensities.Add(intensityArray[p1]); + collapsedMzs.Add(mzArray[p1]); + } + else + { + // Calculate the summed intensity in the cluster + int summedIntensity = 0; + for (int i = p1; i <= p2; i++) + { + summedIntensity += intensityArray[i]; + } + collapsedIntensities.Add(summedIntensity); + + // weighted averaging to determine the collapsed m/z of the cluster + double collapsedMz = 0; + for (int i = p1; i <= p2; i++) + { + double weight = (double)intensityArray[i] / (double)summedIntensity; + collapsedMz += weight * mzArray[i]; + } + collapsedMzs.Add(collapsedMz); + } + + // Move the pointers forward + p1 = p2 + 1; + p2 = p1 + 1; + } + + return (collapsedMzs.ToArray(), collapsedIntensities.ToArray()); + } + + /// + /// Merges multiple index and intensity arrays into an m/z array. + /// Used when building the component spectra for an MS2 scan + /// + /// List of index arrays. + /// List of intensity arrays. + /// Frame proxy factory. + /// A tuple containing the merged m/z values and intensities. + internal static (double[] Mzs, int[] Intensities) MergeArraysToMzArray(List indexArrays, List intensityArrays, FrameProxyFactory proxyFactory) + { + if (!indexArrays.IsNotNullOrEmpty() || intensityArrays == null || intensityArrays.Count() != indexArrays.Count()) + return (new double[0], new int[0]); + + // Merge all index arrays and intensity arrays into a single array + uint[] combinedIndices = indexArrays[0]; + int[] combinedIntensities = intensityArrays[0]; + for (int i = 1; i < indexArrays.Count(); i++) + { + var mergeResults = TwoPointerMerge(combinedIndices, indexArrays[i], combinedIntensities, intensityArrays[i]); + combinedIndices = mergeResults.Indices; + combinedIntensities = mergeResults.Intensities; + } + double[] mzsArray = proxyFactory.ConvertIndicesToMz(combinedIndices); + + // Collapse the combined arrays into a single array (centroiding, more or less) + return CollapseArrays(mzsArray, combinedIntensities); + } + + #endregion + + } +} \ No newline at end of file diff --git a/mzLib/Readers/timsTOF/baf2sql_c.dll b/mzLib/Readers/timsTOF/baf2sql_c.dll new file mode 100644 index 000000000..3fb55e098 Binary files /dev/null and b/mzLib/Readers/timsTOF/baf2sql_c.dll differ diff --git a/mzLib/Readers/timsTOF/baf2sql_c.lib b/mzLib/Readers/timsTOF/baf2sql_c.lib new file mode 100644 index 000000000..a843d6add Binary files /dev/null and b/mzLib/Readers/timsTOF/baf2sql_c.lib differ diff --git a/mzLib/Readers/timsdata.dll b/mzLib/Readers/timsdata.dll new file mode 100644 index 000000000..200944d6f Binary files /dev/null and b/mzLib/Readers/timsdata.dll differ diff --git a/mzLib/Readers/timsdata.lib b/mzLib/Readers/timsdata.lib new file mode 100644 index 000000000..a97952a98 Binary files /dev/null and b/mzLib/Readers/timsdata.lib differ diff --git a/mzLib/Test/DataFiles/timsTOF_snippet.d/analysis.tdf b/mzLib/Test/DataFiles/timsTOF_snippet.d/analysis.tdf new file mode 100644 index 000000000..082f62298 Binary files /dev/null and b/mzLib/Test/DataFiles/timsTOF_snippet.d/analysis.tdf differ diff --git a/mzLib/Test/DataFiles/timsTOF_snippet.d/analysis.tdf_bin b/mzLib/Test/DataFiles/timsTOF_snippet.d/analysis.tdf_bin new file mode 100644 index 000000000..152a26cd1 Binary files /dev/null and b/mzLib/Test/DataFiles/timsTOF_snippet.d/analysis.tdf_bin differ diff --git a/mzLib/Test/FileReadingTests/TestBruker.cs b/mzLib/Test/FileReadingTests/TestBruker.cs index a7c450d8a..2d7d20710 100644 --- a/mzLib/Test/FileReadingTests/TestBruker.cs +++ b/mzLib/Test/FileReadingTests/TestBruker.cs @@ -1,4 +1,5 @@ -using System.IO; +using System.Diagnostics; +using System.IO; using MassSpectrometry; using NUnit; using NUnit.Framework; @@ -28,9 +29,8 @@ public void TestConstructors() public void TestFileDoesntExist() { string fakePath = "fakePath.d"; - var reader = MsDataFileReader.GetDataFile(fakePath); Assert.Throws(() => - reader.InitiateDynamicConnection()); + MsDataFileReader.GetDataFile(fakePath)); } [Test] @@ -140,15 +140,5 @@ public void TestPeakFiltering() var scan = MsDataFileReader.GetDataFile(_centroidPath).LoadAllStaticData(filteringParams).Scans[0]; Assert.That(scan.MassSpectrum.XArray.Length == 1); } - - [Test] - public void TestFileNotFoundExceptionThrown() - { - MsDataFile brukerReader = MsDataFileReader.GetDataFile("notrealfile.d"); - Assert.Throws(delegate - { - brukerReader.LoadAllStaticData(); - }); - } } } diff --git a/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs b/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs index 3c22e327b..868a6dc3a 100644 --- a/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs +++ b/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs @@ -16,7 +16,8 @@ internal class TestSupportedFileExtensions [TestCase("DataFiles/sliced_ethcd.raw", SupportedFileType.ThermoRaw)] [TestCase("DataFiles/SmallCalibratibleYeast.mzml", SupportedFileType.MzML)] [TestCase("DataFiles/tester.mgf", SupportedFileType.Mgf)] - [TestCase("DataFiles/tester.d", SupportedFileType.BrukerD)] + [TestCase("DataFiles/centroid_1x_MS1_4x_autoMS2.d", SupportedFileType.BrukerD)] + [TestCase("DataFiles/timsTOF_snippet.d", SupportedFileType.BrukerTimsTof)] [TestCase(@"FileReadingTests\ExternalFileTypes\Ms2Feature_FlashDeconvjurkat_td_rep1_fract2_ms2.feature", SupportedFileType.Ms2Feature)] [TestCase(@"FileReadingTests\ExternalFileTypes\TopFDMs1Feature_jurkat_td_rep1_fract2_ms1.feature", SupportedFileType.Ms1Feature)] [TestCase(@"FileReadingTests\ExternalFileTypes\TopFDmzrt_jurkat_td_rep1_fract2_frac.mzrt.csv", SupportedFileType.Mzrt_TopFd)] diff --git a/mzLib/Test/FileReadingTests/TestTimsTofFileReader.cs b/mzLib/Test/FileReadingTests/TestTimsTofFileReader.cs new file mode 100644 index 000000000..d1bd523ca --- /dev/null +++ b/mzLib/Test/FileReadingTests/TestTimsTofFileReader.cs @@ -0,0 +1,315 @@ +using MassSpectrometry; +using MathNet.Numerics; +using NUnit.Framework; +using NUnit.Framework.Legacy; +using Readers; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using Assert = NUnit.Framework.Legacy.ClassicAssert; + +namespace Test.FileReadingTests +{ + [TestFixture] + [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage] + public class TestTimsTofFileReader + { + + public string _testDataPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "timsTOF_snippet.d"); + public TimsTofFileReader _testReader; + public TimsDataScan _testMs2Scan; + public TimsDataScan _testMs1Scan; + public FilteringParams _filteringParams = new FilteringParams(numberOfPeaksToKeepPerWindow:200, minimumAllowedIntensityRatioToBasePeak: 0.01); + + [OneTimeSetUp] + public void SetUp() + { + _testReader = new TimsTofFileReader(_testDataPath); + _testReader.LoadAllStaticData(filteringParams: _filteringParams, maxThreads: 10); + _testMs2Scan = (TimsDataScan)_testReader.Scans.Skip(1000).First(scan => scan.MsnOrder > 1); + _testMs1Scan = (TimsDataScan)_testReader.Scans.Skip(500).First(scan => scan.MsnOrder == 1); + } + + [Test] + public void TestGetPasefScanFromDynamicConnectionUsingFrameId() + { + var dynamicReader = new TimsTofFileReader(_testDataPath); + dynamicReader.InitiateDynamicConnection(); + var dynamicScan = dynamicReader + .GetScanFromPrecursorAndFrameIdFromDynamicConnection((int)_testMs2Scan.PrecursorId, (int)_testMs2Scan.FrameId, _filteringParams); + Assert.IsNotNull(dynamicScan); + + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs2Scan.PrecursorId), "PrecursorId values are not equal."); + Assert.That(dynamicScan.ScanNumberStart, Is.EqualTo(_testMs2Scan.ScanNumberStart), "ScanStart values are not equal."); + Assert.That(dynamicScan.ScanNumberEnd, Is.EqualTo(_testMs2Scan.ScanNumberEnd), "ScanEnd values are not equal."); + Assert.That(dynamicScan.OneOverK0, Is.EqualTo(_testMs2Scan.OneOverK0), "ScanMedian values are not equal."); + Assert.That(dynamicScan.IsolationMz, Is.EqualTo(_testMs2Scan.IsolationMz), "IsolationMz values are not equal."); + Assert.That(dynamicScan.IsolationWidth, Is.EqualTo(_testMs2Scan.IsolationWidth), "IsolationWidth values are not equal."); + Assert.That(dynamicScan.HcdEnergy, Is.EqualTo(_testMs2Scan.HcdEnergy), "CollisionEnergy values are not equal."); + Assert.That(dynamicScan.SelectedIonMZ, Is.EqualTo(_testMs2Scan.SelectedIonMZ), "MostAbundantPrecursorMz values are not equal."); + Assert.That(dynamicScan.SelectedIonMonoisotopicGuessMz, Is.EqualTo(_testMs2Scan.SelectedIonMonoisotopicGuessMz), "PrecursorMonoisotopicMz values are not equal."); + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs2Scan.PrecursorId), "PrecursorID values are not equal."); + Assert.That(dynamicScan.MassSpectrum, Is.EqualTo(_testMs2Scan.MassSpectrum), "Mass spectra are not equal"); + } + + [Test] + public void TestGetMs1ScanFromDynamicConnectionUsingFrameId() + { + var dynamicReader = new TimsTofFileReader(_testDataPath); + dynamicReader.InitiateDynamicConnection(); + var dynamicScan = dynamicReader + .GetScanFromPrecursorAndFrameIdFromDynamicConnection((int)_testMs1Scan.PrecursorId, (int)_testMs1Scan.FrameId, _filteringParams); + Assert.IsNotNull(dynamicScan); + + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs1Scan.PrecursorId), "PrecursorId values are not equal."); + Assert.That(dynamicScan.ScanNumberStart, Is.EqualTo(_testMs1Scan.ScanNumberStart), "ScanStart values are not equal."); + Assert.That(dynamicScan.ScanNumberEnd, Is.EqualTo(_testMs1Scan.ScanNumberEnd), "ScanEnd values are not equal."); + Assert.That(dynamicScan.OneOverK0, Is.EqualTo(_testMs1Scan.OneOverK0), "ScanMedian values are not equal."); + Assert.That(dynamicScan.IsolationMz, Is.EqualTo(_testMs1Scan.IsolationMz), "IsolationMz values are not equal."); + Assert.That(dynamicScan.IsolationWidth, Is.EqualTo(_testMs1Scan.IsolationWidth), "IsolationWidth values are not equal."); + Assert.That(dynamicScan.HcdEnergy, Is.EqualTo(_testMs1Scan.HcdEnergy), "CollisionEnergy values are not equal."); + Assert.That(dynamicScan.SelectedIonMZ, Is.EqualTo(_testMs1Scan.SelectedIonMZ), "MostAbundantPrecursorMz values are not equal."); + Assert.That(dynamicScan.SelectedIonMonoisotopicGuessMz, Is.EqualTo(_testMs1Scan.SelectedIonMonoisotopicGuessMz), "PrecursorMonoisotopicMz values are not equal."); + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs1Scan.PrecursorId), "PrecursorID values are not equal."); + Assert.That(dynamicScan.MassSpectrum, Is.EqualTo(_testMs1Scan.MassSpectrum), "Mass spectra are not equal"); + } + + [Test] + public void TestGetScanFromDynamicConnectionUsingOneBasedScanNumber() + { + var dynamicReader = new TimsTofFileReader(_testDataPath); + dynamicReader.InitiateDynamicConnection(); + var scanBeforeCast = dynamicReader.GetOneBasedScanFromDynamicConnection(_testMs1Scan.OneBasedScanNumber, _filteringParams); + var dynamicScan = scanBeforeCast as TimsDataScan; + Assert.IsNotNull(dynamicScan); + + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs1Scan.PrecursorId), "PrecursorId values are not equal."); + Assert.That(dynamicScan.ScanNumberStart, Is.EqualTo(_testMs1Scan.ScanNumberStart), "ScanStart values are not equal."); + Assert.That(dynamicScan.ScanNumberEnd, Is.EqualTo(_testMs1Scan.ScanNumberEnd), "ScanEnd values are not equal."); + Assert.That(dynamicScan.OneOverK0, Is.EqualTo(_testMs1Scan.OneOverK0), "ScanMedian values are not equal."); + Assert.That(dynamicScan.IsolationMz, Is.EqualTo(_testMs1Scan.IsolationMz), "IsolationMz values are not equal."); + Assert.That(dynamicScan.IsolationWidth, Is.EqualTo(_testMs1Scan.IsolationWidth), "IsolationWidth values are not equal."); + Assert.That(dynamicScan.HcdEnergy, Is.EqualTo(_testMs1Scan.HcdEnergy), "CollisionEnergy values are not equal."); + Assert.That(dynamicScan.SelectedIonMZ, Is.EqualTo(_testMs1Scan.SelectedIonMZ), "MostAbundantPrecursorMz values are not equal."); + Assert.That(dynamicScan.SelectedIonMonoisotopicGuessMz, Is.EqualTo(_testMs1Scan.SelectedIonMonoisotopicGuessMz), "PrecursorMonoisotopicMz values are not equal."); + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs1Scan.PrecursorId), "PrecursorID values are not equal."); + Assert.That(dynamicScan.MassSpectrum, Is.EqualTo(_testMs1Scan.MassSpectrum), "Mass spectra are not equal"); + Assert.That(dynamicScan.OneBasedScanNumber, Is.EqualTo(_testMs1Scan.OneBasedScanNumber)); + + + scanBeforeCast = dynamicReader.GetOneBasedScanFromDynamicConnection(_testMs2Scan.OneBasedScanNumber, _filteringParams); + dynamicScan = scanBeforeCast as TimsDataScan; + Assert.IsNotNull(dynamicScan); + + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs2Scan.PrecursorId), "PrecursorId values are not equal."); + Assert.That(dynamicScan.ScanNumberStart, Is.EqualTo(_testMs2Scan.ScanNumberStart), "ScanStart values are not equal."); + Assert.That(dynamicScan.ScanNumberEnd, Is.EqualTo(_testMs2Scan.ScanNumberEnd), "ScanEnd values are not equal."); + Assert.That(dynamicScan.OneOverK0, Is.EqualTo(_testMs2Scan.OneOverK0), "ScanMedian values are not equal."); + Assert.That(dynamicScan.IsolationMz, Is.EqualTo(_testMs2Scan.IsolationMz), "IsolationMz values are not equal."); + Assert.That(dynamicScan.IsolationWidth, Is.EqualTo(_testMs2Scan.IsolationWidth), "IsolationWidth values are not equal."); + Assert.That(dynamicScan.HcdEnergy, Is.EqualTo(_testMs2Scan.HcdEnergy), "CollisionEnergy values are not equal."); + Assert.That(dynamicScan.SelectedIonMZ, Is.EqualTo(_testMs2Scan.SelectedIonMZ), "MostAbundantPrecursorMz values are not equal."); + Assert.That(dynamicScan.SelectedIonMonoisotopicGuessMz, Is.EqualTo(_testMs2Scan.SelectedIonMonoisotopicGuessMz), "PrecursorMonoisotopicMz values are not equal."); + Assert.That(dynamicScan.PrecursorId, Is.EqualTo(_testMs2Scan.PrecursorId), "PrecursorID values are not equal."); + Assert.That(dynamicScan.MassSpectrum, Is.EqualTo(_testMs2Scan.MassSpectrum), "Mass spectra are not equal"); + Assert.That(dynamicScan.OneBasedScanNumber, Is.EqualTo(_testMs2Scan.OneBasedScanNumber)); + } + + [Test] + public void TestTwoPointerMerge() + { + uint[] indices1 = new uint[] { 1, 3, 5, 7, 9, 11 }; + uint[] indices2 = new uint[] { 0, 2, 4, 6, 8, 10 }; + + int[] intensities1 = new int[] { 1, 3, 5, 7, 9, 11 }; + int[] intensities2 = new int[] { 0, 2, 4, 6, 8, 10 }; + + int[] intendedOutput = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + + var mergerOutput = TofSpectraMerger.TwoPointerMerge(indices1, indices2, intensities1, intensities2); + + Assert.That(mergerOutput.Intensities, Is.EqualTo(intendedOutput)); + Assert.That(mergerOutput.Indices.Select(i => (int)i).ToArray(), Is.EqualTo(intendedOutput)); + + indices2 = new uint[] { 0, 2, 4, 6, 8, 10, 12, 13, 14, 15, 16 }; + + intensities2 = new int[] { 0, 2, 4, 6, 8, 10, 12, 13, 14, 15, 16 }; + + intendedOutput = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + + mergerOutput = TofSpectraMerger.TwoPointerMerge(indices1, indices2, intensities1, intensities2); + + Assert.That(mergerOutput.Intensities, Is.EqualTo(intendedOutput)); + Assert.That(mergerOutput.Indices.Select(i => (int)i).ToArray(), Is.EqualTo(intendedOutput)); + } + + [Test] + public void TestCollapse() + { + uint[] indices = new uint[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + int[] intensities = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + + List intendedIdx = new List { 1, 4, 7, 10 }; + List intendedIntensities = new List { 3, 12, 21, 30 }; + + var collapsedOutput = TofSpectraMerger.CollapseArrays(indices, intensities); + + Assert.That(collapsedOutput.Indices, Is.EqualTo(intendedIdx)); + Assert.That(collapsedOutput.Intensities, Is.EqualTo(intendedIntensities)); + + + indices = new uint[] { 0, 1, 2, 3, 4, 5, 6, 7, 9, 11 }; + intensities = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 9, 11 }; + + intendedIdx = new List { 1, 4, 6, 9 }; + intendedIntensities = new List { 3, 12, 13, 20 }; + + collapsedOutput = TofSpectraMerger.CollapseArrays(indices, intensities); + + Assert.That(collapsedOutput.Indices, Is.EqualTo(intendedIdx)); + Assert.That(collapsedOutput.Intensities, Is.EqualTo(intendedIntensities)); + + indices = new uint[] { 0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 11, 18, 523, 1000, 1000, 1000 }; + intensities = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 11, 18, 523, 1000, 1000, 1000 }; + + intendedIdx = new List { 1, 4, 6, 11, 18, 523, 1000 }; + intendedIntensities = new List { 3, 12, 13, 31, 18, 523, 3000 }; + + collapsedOutput = TofSpectraMerger.CollapseArrays(indices, intensities); + + Assert.That(collapsedOutput.Indices, Is.EqualTo(intendedIdx)); + Assert.That(collapsedOutput.Intensities, Is.EqualTo(intendedIntensities)); + } + + [Test] + public void TestConstructor() + { + var reader = MsDataFileReader.GetDataFile(_testDataPath); + Assert.That(reader, !Is.Null); + } + + [Test] + public void TestFileDoesntExist() + { + string fakePath = "fakePath.d"; + Assert.Throws(() => + MsDataFileReader.GetDataFile(fakePath)); + + TimsTofFileReader reader = new TimsTofFileReader(fakePath); + + Assert.Throws(() => + reader.LoadAllStaticData()); + } + + + [Test] + public void TestLoadAllStaticData() + { + Assert.That(_testReader.NumSpectra, Is.EqualTo(4096)); + + Assert.That(_testMs2Scan.Polarity == Polarity.Positive); + Assert.That(_testMs2Scan.DissociationType == DissociationType.CID); + Assert.That(_testMs2Scan.TotalIonCurrent == 25130); + Assert.That(_testMs2Scan.NativeId == "frames=64-64;scans=410-435"); + Assert.That(_testMs2Scan.SelectedIonMZ, Is.EqualTo(739.3668).Within(0.001)); + Assert.That(_testMs2Scan.MsnOrder == 2); + Assert.That(_testMs2Scan.IsCentroid); + Assert.That(_testMs2Scan.ScanNumberStart == 410); + Assert.That(_testMs2Scan.OneOverK0, Is.EqualTo(1.0424).Within(0.0001)); + } + + [Test] + public void TestOneBasedPrecursor() + { + TimsDataScan ms1Scan = (TimsDataScan)_testReader.GetOneBasedScan((int)_testMs2Scan.OneBasedPrecursorScanNumber); + + Assert.AreEqual(_testMs2Scan.PrecursorId, ms1Scan.PrecursorId); + // Check that the child and parent scan are both looking at the same timsScans (i.e., the same region in the ion-mobility dimension) + Assert.AreEqual(_testMs2Scan.ScanNumberStart, ms1Scan.ScanNumberStart); + Assert.AreEqual(_testMs2Scan.ScanNumberEnd, ms1Scan.ScanNumberEnd); + Assert.AreEqual(_testMs2Scan.OneOverK0, ms1Scan.OneOverK0); + + } + + [Test] + public void TestSpectraMerger() + { + double[] mz1 = new double[] { 1, 3, 5, 7, 9 }; + double[] mz2 = new double[] { 2, 4, 6, 8, 10 }; + + int[] intensity1 = new int[] { 1, 3, 5, 7, 9 }; + int[] intensity2 = new int[] { 2, 4, 6, 8, 10 }; + + MzSpectrum outSpectrum = TofSpectraMerger.MergeArraysToMs2Spectrum( + new List { mz1, mz2 }, + new List { intensity1, intensity2 }); + + Assert.AreEqual(outSpectrum.Size, 10); + CollectionAssert.AreEqual(outSpectrum.XArray, new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }); + } + + [Test] + public void TestSpectraMerger2() + { + double[] mz1 = new double[] { 1, 3, 5, 7, 9, 10 }; + double[] mz2 = new double[] { 2, 4, 6, 8, 10 }; + + int[] intensity1 = new int[] { 1, 3, 5, 7, 9, 10 }; + int[] intensity2 = new int[] { 2, 4, 6, 8, 10 }; + + MzSpectrum outSpectrum = TofSpectraMerger.MergeArraysToMs2Spectrum( + new List { mz1, mz2 }, + new List { intensity1, intensity2 }); + + Assert.AreEqual(outSpectrum.Size, 10); + CollectionAssert.AreEqual(outSpectrum.XArray, new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }); + CollectionAssert.AreEqual(outSpectrum.YArray, new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 20 }); + } + + [Test] + public void TestSpectraMerger3() + { + double[] mz1 = new double[] { 1, 4, 7, 10 }; + double[] mz2 = new double[] { 2, 5, 8 }; + double[] mz3 = new double[] { 3, 6, 9 }; + + int[] intensity1 = new int[] { 1, 4, 7, 10 }; + int[] intensity2 = new int[] { 2, 5, 8 }; + int[] intensity3 = new int[] { 3, 6, 9 }; + + MzSpectrum outSpectrum = TofSpectraMerger.MergeArraysToMs2Spectrum( + new List { mz1, mz2, mz3 }, + new List { intensity1, intensity2, intensity3 }); + + Assert.AreEqual(outSpectrum.Size, 10); + CollectionAssert.AreEqual(outSpectrum.XArray, new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }); + CollectionAssert.AreEqual(outSpectrum.YArray, new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }); + } + + // Test that weighted averaging works when two peaks are close together + [Test] + public void TestSpectraMerger4() + { + double[] mz1 = new double[] { 1, 3, 5, 7, 9 }; + double[] mz2 = new double[] { 2, 4, 6, 8, 10 }; + double[] mz3 = new double[] { 1 + 1e-6, 2 + 1e-6, 11 + 1e-6 }; + + int[] intensity1 = new int[] { 1, 3, 5, 7, 9 }; + int[] intensity2 = new int[] { 2, 4, 6, 8, 10 }; + int[] intensity3 = new int[] { 10, 10, 11 }; + + MzSpectrum outSpectrum = TofSpectraMerger.MergeArraysToMs2Spectrum( + new List { mz1, mz2, mz3 }, + new List { intensity1, intensity2, intensity3 }); + + Assert.AreEqual(outSpectrum.Size, 11); + // Peaks (mz = 1, intensity = 1) and (mz = 1+1e-6, intensity = 10) are close together, so they should be averaged + // Same thing for (mz = 2, intensity = 2) and (mz = 2+1e-6, intensity = 10) + CollectionAssert.AreEqual(outSpectrum.XArray.Select(mz => mz.Round(7)).ToArray(), + new double[] { 1 + 9e-7, 2 + 8e-7, 3, 4, 5, 6, 7, 8, 9, 10, 11 + 1e-6 }); + CollectionAssert.AreEqual(outSpectrum.YArray, new double[] { 11, 12, 3, 4, 5, 6, 7, 8, 9, 10, 11 }); + } + } +} diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj index 223523ae4..5b4d5ca18 100644 --- a/mzLib/Test/Test.csproj +++ b/mzLib/Test/Test.csproj @@ -312,6 +312,12 @@ PreserveNewest + + PreserveNewest + + + PreserveNewest + Always diff --git a/mzLib/Test/TestMzLibUtil.cs b/mzLib/Test/TestMzLibUtil.cs index e2864c1e6..73fbdda41 100644 --- a/mzLib/Test/TestMzLibUtil.cs +++ b/mzLib/Test/TestMzLibUtil.cs @@ -1,6 +1,7 @@ using NUnit.Framework; using Assert = NUnit.Framework.Legacy.ClassicAssert; using MzLibUtil; +using Readers; namespace Test { @@ -32,5 +33,32 @@ public static void TestPeriodTolerantFilenameWithoutExtension(string filenameAnd Assert.AreEqual(expectedResult, result); Assert.AreEqual(expectedResult, extensionResult); } + + [Test] + public static void TestToEnum() + { + Assert.IsTrue(0.ToEnum(out var result)); + Assert.AreEqual(TimsTofMsMsType.MS, result); + + Assert.IsTrue(2.ToEnum(out result)); + Assert.AreEqual(TimsTofMsMsType.MSMSFragment, result); + + Assert.IsTrue(8.ToEnum(out result)); + Assert.AreEqual(TimsTofMsMsType.PASEF, result); + + Assert.IsTrue(9.ToEnum(out result)); + Assert.AreEqual(TimsTofMsMsType.DIA, result); + + Assert.IsTrue(10.ToEnum(out result)); + Assert.AreEqual(TimsTofMsMsType.PRM, result); + + Assert.IsTrue(0.ToEnum(out var result2)); + Assert.AreEqual(TimsTofAcquisitionMode.MS, result2); + + Assert.IsFalse(1.ToEnum(out result)); + Assert.IsFalse(11.ToEnum(out result)); + Assert.IsFalse(7.ToEnum(out result)); + + } } } diff --git a/mzLib/mzLib.nuspec b/mzLib/mzLib.nuspec index b0b4c3045..6829c9050 100644 --- a/mzLib/mzLib.nuspec +++ b/mzLib/mzLib.nuspec @@ -23,6 +23,7 @@ + @@ -37,6 +38,7 @@ + @@ -65,6 +67,10 @@ + + + + @@ -91,6 +97,10 @@ + + + +