Skip to content

Commit e79b58d

Browse files
authored
Merge pull request #29 from compomics/mzidentmlrefactoring
Mzmlrefactoring
2 parents cbe9d8d + 913a8a9 commit e79b58d

28 files changed

Lines changed: 3696 additions & 5211 deletions

MainClass.cs

Lines changed: 11 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,10 @@ public static void Main(string[] args)
1616
string rawFilePath = null;
1717
string outputDirectory = null;
1818
string outputFormatString = null;
19-
var outputFormat = OutputFormat.NON;
19+
var outputFormat = OutputFormat.NONE;
2020
var gzip = false;
2121
string outputMetadataString = null;
22-
var outputMetadataFormat = MetadataFormat.NON;
23-
var includeProfileData = false;
24-
string collection = null;
25-
string msRun = null;
26-
string subFolder = null;
22+
var outputMetadataFormat = MetadataFormat.NONE;
2723
string s3url = null;
2824
string s3AccessKeyId = null;
2925
string s3SecretAccessKey = null;
@@ -48,7 +44,8 @@ public static void Main(string[] args)
4844
v => outputDirectory = v
4945
},
5046
{
51-
"f=|format=", "The output format for the spectra (0 for MGF, 1 for MzMl, 2 for Parquet)",
47+
"f=|format=",
48+
"The output format for the spectra (0 for MGF, 1 for mzMl, 2 for indexed mzML, 3 for Parquet, 4 for MGF with profile data excluded)",
5249
v => outputFormatString = v
5350
},
5451
{
@@ -59,25 +56,6 @@ public static void Main(string[] args)
5956
"g|gzip", "GZip the output file if this flag is specified (without value).",
6057
v => gzip = v != null
6158
},
62-
{
63-
"p|profiledata",
64-
"Exclude MS2 profile data if this flag is specified (without value). Only for MGF format!",
65-
v => includeProfileData = v != null
66-
},
67-
{
68-
"c:|collection", "The optional collection identifier (PXD identifier for example).",
69-
v => collection = v
70-
},
71-
{
72-
"r:|run:",
73-
"The optional mass spectrometry run name used in the spectrum title. The RAW file name will be used if not specified.",
74-
v => msRun = v
75-
},
76-
{
77-
"s:|subfolder:",
78-
"Optional, to disambiguate instances where the same collection has 2 or more MS runs with the same name.",
79-
v => subFolder = v
80-
},
8159
{
8260
"u:|s3_url:",
8361
"Optional property to write directly the data into S3 Storage.",
@@ -140,18 +118,20 @@ public static void Main(string[] args)
140118
}
141119
catch (FormatException e)
142120
{
143-
throw new OptionException("unknown output format value (0 for MGF, 1 for MzMl, 2 for Parquet)",
121+
throw new OptionException(
122+
"unknown output format value (0 for MGF, 1 for mzMl, 2 for indexed mzML, 3 for Parquet, 4 for MGF with profile date excluded)",
144123
"-f, --format");
145124
}
146125

147126
if (Enum.IsDefined(typeof(OutputFormat), outPutFormatInt) &&
148-
((OutputFormat) outPutFormatInt) != OutputFormat.NON)
127+
((OutputFormat) outPutFormatInt) != OutputFormat.NONE)
149128
{
150129
outputFormat = (OutputFormat) outPutFormatInt;
151130
}
152131
else
153132
{
154-
throw new OptionException("unknown output format value (0 for MGF, 1 for MzMl, 2 for Parquet)",
133+
throw new OptionException(
134+
"unknown output format value (0 for MGF, 1 for mzMl, 2 for indexed mzML, 3 for Parquet, 4 for MGF with profile date excluded)",
155135
"-f, --format");
156136
}
157137
}
@@ -170,7 +150,7 @@ public static void Main(string[] args)
170150
}
171151

172152
if (Enum.IsDefined(typeof(MetadataFormat), metadataInt) &&
173-
((MetadataFormat) metadataInt) != MetadataFormat.NON)
153+
((MetadataFormat) metadataInt) != MetadataFormat.NONE)
174154
{
175155
outputMetadataFormat = (MetadataFormat) metadataInt;
176156
}
@@ -211,8 +191,7 @@ public static void Main(string[] args)
211191
}
212192

213193
var parseInput = new ParseInput(rawFilePath, outputDirectory, outputFormat, gzip, outputMetadataFormat,
214-
includeProfileData, collection, msRun, subFolder, Log, s3url, s3AccessKeyId, s3SecretAccessKey,
215-
bucketName, ignoreInstrumentErrors);
194+
s3url, s3AccessKeyId, s3SecretAccessKey, bucketName, ignoreInstrumentErrors);
216195
RawFileParser.Parse(parseInput);
217196
}
218197
catch (Exception ex)

OutputFormat.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@
22
{
33
public enum OutputFormat
44
{
5-
Mgf, Mzml , Parquet, NON
5+
Mgf,
6+
Mzml,
7+
IndexMzML,
8+
Parquet,
9+
MGFNoProfileData,
10+
NONE
611
}
712

813
public enum MetadataFormat
914
{
10-
JSON, TXT , PARQUET, NON
15+
JSON,
16+
TXT,
17+
PARQUET,
18+
NONE
1119
}
1220
}

ParseInput.cs

Lines changed: 16 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ namespace ThermoRawFileParser
77
{
88
public class ParseInput
99
{
10-
private string bucketName;
11-
private bool ignoreInstrumentErrors;
12-
1310
/// <summary>
1411
/// The RAW file path.
1512
/// </summary>
@@ -35,27 +32,6 @@ public class ParseInput
3532
/// </summary>
3633
public MetadataFormat OutputMetadata { get; }
3734

38-
/// <summary>
39-
/// Exclude the MS2 spectra in profile mode.
40-
/// </summary>
41-
public bool ExcludeProfileData { get; }
42-
43-
/// <summary>
44-
/// The data collection identifier.
45-
/// </summary>
46-
public string Collection { get; }
47-
48-
/// <summary>
49-
/// Mass spectrometry run name.
50-
/// </summary>
51-
public string MsRun { get; }
52-
53-
/// <summary>
54-
/// This property is used disambiguate instances where the same collection
55-
/// has two or more msRuns with the same name.
56-
/// </summary>
57-
public string SubFolder { get; }
58-
5935
/// <summary>
6036
/// The raw file name.
6137
/// </summary>
@@ -66,16 +42,17 @@ public class ParseInput
6642
/// </summary>
6743
public string RawFileNameWithoutExtension { get; }
6844

69-
public log4net.ILog Log { get; }
70-
7145
public S3Loader S3loader { get; set; }
72-
46+
7347
public string S3AccessKeyId { get; set; }
7448

7549
public string S3SecretAccessKey { get; set; }
76-
50+
7751
public string S3url { get; set; }
78-
52+
53+
private string bucketName;
54+
private bool ignoreInstrumentErrors;
55+
7956
public bool IgnoreInstrumentErrors
8057
{
8158
get => ignoreInstrumentErrors;
@@ -84,9 +61,10 @@ public bool IgnoreInstrumentErrors
8461

8562

8663
public ParseInput(string rawFilePath, string outputDirectory, OutputFormat outputFormat, bool gzip,
87-
MetadataFormat outputMetadata, bool excludeProfileData, string collection, string msRun, string subFolder,
88-
log4net.ILog log, string s3url, string s3AccessKeyId, string s3SecretAccessKey, string bucketName, bool ignoreInstrumentErrors
89-
)
64+
MetadataFormat outputMetadata, string s3url, string s3AccessKeyId,
65+
string s3SecretAccessKey, string bucketName,
66+
bool ignoreInstrumentErrors
67+
)
9068
{
9169
RawFilePath = rawFilePath;
9270
var splittedPath = RawFilePath.Split('/');
@@ -95,23 +73,18 @@ public ParseInput(string rawFilePath, string outputDirectory, OutputFormat outpu
9573
OutputDirectory = outputDirectory;
9674
OutputFormat = outputFormat;
9775
Gzip = gzip;
98-
OutputMetadata = outputMetadata;
99-
ExcludeProfileData = excludeProfileData;
100-
Collection = collection;
101-
MsRun = msRun;
102-
SubFolder = subFolder;
103-
Log = log;
104-
S3url = s3url;
76+
OutputMetadata = outputMetadata;
77+
S3url = s3url;
10578
S3AccessKeyId = s3AccessKeyId;
10679
S3SecretAccessKey = s3SecretAccessKey;
10780
this.bucketName = bucketName;
108-
this.ignoreInstrumentErrors = ignoreInstrumentErrors;
81+
this.ignoreInstrumentErrors = ignoreInstrumentErrors;
10982

11083
if (S3url != null && S3AccessKeyId != null && S3SecretAccessKey != null)
111-
initializeS3bucket(s3url, s3AccessKeyId, s3SecretAccessKey, bucketName);
112-
84+
InitializeS3Bucket(s3url, s3AccessKeyId, s3SecretAccessKey, bucketName);
11385
}
114-
private void initializeS3bucket(string s3url, string s3AccessKeyId, string s3SecretAccessKey, string bucketName)
86+
87+
private void InitializeS3Bucket(string s3url, string s3AccessKeyId, string s3SecretAccessKey, string bucketName)
11588
{
11689
S3loader = new S3Loader(s3url, s3AccessKeyId, s3SecretAccessKey, bucketName);
11790
}

Properties/AssemblyInfo.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
// You can specify all the values or you can default the Build and Revision Numbers
3333
// by using the '*' as shown below:
3434
// [assembly: AssemblyVersion("1.0.*")]
35-
[assembly: AssemblyVersion("1.0.7.0")]
36-
[assembly: AssemblyFileVersion("1.0.7.0")]
35+
[assembly: AssemblyVersion("1.1.0.0")]
36+
[assembly: AssemblyFileVersion("1.1.0.0")]
3737

3838
[assembly: log4net.Config.XmlConfigurator(ConfigFile = "log4net.config")]

RawFileParser.cs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66

77
namespace ThermoRawFileParser
88
{
9-
public class RawFileParser
9+
public static class RawFileParser
1010
{
11-
1211
private static readonly log4net.ILog Log =
1312
log4net.LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
1413

@@ -21,9 +20,8 @@ public static void Parse(ParseInput parseInput)
2120
// Check to see if the RAW file name was supplied as an argument to the program
2221
if (string.IsNullOrEmpty(parseInput.RawFilePath))
2322
{
24-
parseInput.Log.Debug("No raw file specified or found in path");
23+
Log.Debug("No raw file specified or found in path");
2524
throw new Exception("No RAW file specified!");
26-
2725
}
2826

2927
// Check to see if the specified RAW file exists
@@ -64,27 +62,34 @@ public static void Parse(ParseInput parseInput)
6462
var firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum;
6563
var lastScanNumber = rawFile.RunHeaderEx.LastSpectrum;
6664

67-
if (parseInput.OutputMetadata != MetadataFormat.NON)
65+
if (parseInput.OutputMetadata != MetadataFormat.NONE)
6866
{
69-
var metadataWriter = new MetadataWriter(parseInput.OutputDirectory, parseInput.RawFileNameWithoutExtension);
70-
if(parseInput.OutputMetadata == MetadataFormat.JSON)
71-
metadataWriter.WriteJsonMetada(rawFile, firstScanNumber, lastScanNumber);
72-
if(parseInput.OutputMetadata == MetadataFormat.TXT)
73-
metadataWriter.WriteMetada(rawFile, firstScanNumber, lastScanNumber);
74-
67+
var metadataWriter = new MetadataWriter(parseInput.OutputDirectory,
68+
parseInput.RawFileNameWithoutExtension);
69+
switch (parseInput.OutputMetadata)
70+
{
71+
case MetadataFormat.JSON:
72+
metadataWriter.WriteJsonMetada(rawFile, firstScanNumber, lastScanNumber);
73+
break;
74+
case MetadataFormat.TXT:
75+
metadataWriter.WriteMetada(rawFile, firstScanNumber, lastScanNumber);
76+
break;
77+
}
7578
}
7679

77-
if (parseInput.OutputFormat != OutputFormat.NON)
80+
if (parseInput.OutputFormat != OutputFormat.NONE)
7881
{
7982
SpectrumWriter spectrumWriter;
8083
switch (parseInput.OutputFormat)
8184
{
8285
case OutputFormat.Mgf:
86+
case OutputFormat.MGFNoProfileData:
8387
spectrumWriter = new MgfSpectrumWriter(parseInput);
8488
spectrumWriter.Write(rawFile, firstScanNumber, lastScanNumber);
8589
break;
8690
case OutputFormat.Mzml:
87-
spectrumWriter = new MzMlSpectrumWriter(parseInput, Log);
91+
case OutputFormat.IndexMzML:
92+
spectrumWriter = new MzMlSpectrumWriter(parseInput);
8893
spectrumWriter.Write(rawFile, firstScanNumber, lastScanNumber);
8994
break;
9095
case OutputFormat.Parquet:
@@ -93,7 +98,6 @@ public static void Parse(ParseInput parseInput)
9398
break;
9499
}
95100
}
96-
97101
Log.Info("Finished parsing " + parseInput.RawFilePath);
98102
}
99103
}

ThermoRawFileParser.csproj

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,17 +105,13 @@
105105
<Reference Include="System.Data.DataSetExtensions" />
106106
<Reference Include="System.Data" />
107107
<Reference Include="System.Xml" />
108-
<Reference Include="ThermoFisher.CommonCore.BackgroundSubtraction, Version=1.0.0.0, Culture=neutral, PublicKeyToken=1aef06afb5abd953">
109-
<HintPath>packages\ThermoFisher.CommonCore.RawFileReader.4.0.26\lib\ThermoFisher.CommonCore.BackgroundSubtraction.dll</HintPath>
110-
</Reference>
111-
<Reference Include="ThermoFisher.CommonCore.Data, Version=1.0.0.0, Culture=neutral, PublicKeyToken=1aef06afb5abd953">
112-
<HintPath>packages\ThermoFisher.CommonCore.RawFileReader.4.0.26\lib\ThermoFisher.CommonCore.Data.dll</HintPath>
113-
</Reference>
114-
<Reference Include="ThermoFisher.CommonCore.MassPrecisionEstimator, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null">
115-
<HintPath>packages\ThermoFisher.CommonCore.RawFileReader.4.0.26\lib\ThermoFisher.CommonCore.MassPrecisionEstimator.dll</HintPath>
108+
<Reference Include="ThermoFisher.CommonCore.Data, Version=4.0.89.0, Culture=neutral, PublicKeyToken=1aef06afb5abd953">
109+
<HintPath>packages\ThermoFisher.CommonCore.Data.4.0.89\lib\net451\ThermoFisher.CommonCore.Data.dll</HintPath>
110+
<Private>True</Private>
116111
</Reference>
117-
<Reference Include="ThermoFisher.CommonCore.RawFileReader, Version=1.0.0.0, Culture=neutral, PublicKeyToken=1aef06afb5abd953">
118-
<HintPath>packages\ThermoFisher.CommonCore.RawFileReader.4.0.26\lib\ThermoFisher.CommonCore.RawFileReader.dll</HintPath>
112+
<Reference Include="ThermoFisher.CommonCore.RawFileReader, Version=4.0.89.0, Culture=neutral, PublicKeyToken=1aef06afb5abd953">
113+
<HintPath>packages\ThermoFisher.CommonCore.RawFileReader.4.0.89\lib\net451\ThermoFisher.CommonCore.RawFileReader.dll</HintPath>
114+
<Private>True</Private>
119115
</Reference>
120116
<Reference Include="zlib.net, Version=1.0.3.0, Culture=neutral, PublicKeyToken=47d7877cb3620160">
121117
<HintPath>packages\zlib.net.1.0.4.0\lib\zlib.net.dll</HintPath>

0 commit comments

Comments
 (0)