Skip to content

Commit 5065b30

Browse files
Merge branch 'ypriverol-master'
2 parents c00b11e + a53445e commit 5065b30

3 files changed

Lines changed: 58 additions & 53 deletions

File tree

Dockerfile

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ FROM mono:latest
55
LABEL base_image="mono:latest"
66
LABEL version="1"
77
LABEL software="ThermoRawFileParser"
8-
LABEL software.version="1.0.0"
8+
LABEL software.version="1.1.0"
99
LABEL about.summary="A software to convert Thermo RAW files to mgf and mzML"
1010
LABEL about.home="https://github.com/compomics/ThermoRawFileParser"
1111
LABEL about.documentation="https://github.com/compomics/ThermoRawFileParser"
@@ -15,7 +15,7 @@ LABEL about.tags="Proteomics"
1515

1616
################## MAINTAINER ######################
1717
MAINTAINER Niels Hulstaert <niels.hulstaert@ugent.be>
18-
MAINTAINER Yasset PErez-Riverol <ypriverol@gmail.com>
18+
MAINTAINER Yasset Perez-Riverol <ypriverol@gmail.com>
1919

2020
################## INSTALLATION ######################
2121

@@ -44,14 +44,16 @@ RUN git clone -b master --single-branch https://github.com/compomics/ThermoRawF
4444
RUN msbuild
4545
RUN ls -l -R
4646

47-
4847
COPY ThermoRawFileParser /home/biodocker/bin/bin/x64/Debug/
4948

50-
USER root
51-
RUN chmod +x /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser
52-
RUN chmod +x /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe
49+
USER root
5350
RUN chown biodocker:biodocker /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser
51+
RUN rm -rfv /usr/share/man/
5452

5553
USER biodocker
54+
55+
RUN chmod +x /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser
56+
RUN chmod +x /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe
5657
ENV PATH=/home/biodocker/bin/bin/x64/Debug/:$PATH
58+
RUN ls -la -R /home/biodocker/bin/bin/x64/Debug/
5759

README.md

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Wrapper around the .net (C#) ThermoFisher ThermoRawFileReader library for running on Linux with mono (works on Windows too). It takes a thermo RAW file as input and outputs a metadata file and the spectra in 3 possible formats
44
* MGF: only MS2 spectra
5-
* mzML: both MS1 and MS2 spectra
5+
* mzML and indexed mzML: both MS1 and MS2 spectra
66
* Apache Parquet: under development
77

88
RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved
@@ -12,44 +12,34 @@ RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc.
1212

1313
## Usage
1414
```
15-
mono ThermoRawFileParser.exe -i=/home/user/data_input/raw_file.raw -o=/home/user/data_input/output/ -f=0 -g -m=0 -c=PXD00001
15+
mono ThermoRawFileParser.exe -i=/home/user/data_input/raw_file.raw -o=/home/user/data_input/output/ -f=0 -g -m=0
1616
```
17-
For running on Windows, omit `mono`. The optional parameters only work in the -option=value format. The tool can output some RAW file metadata `-m=0|1` (0 for JSON format, 1 for TXT format) and the spectra file `-f` or both. For the MGF format, `-p` flag is used to exclude MS2 profile mode data (the MGF files can get big when the MS2 spectra were acquired in profile mode).
17+
For running on Windows, omit `mono`. The optional parameters only work in the -option=value format. The tool can output some RAW file metadata `-m=0|1` (0 for JSON, 1 for TXT) and the spectra file `-f=0|1|2|3|4` (0 for MGF, 1 for mzML, 2 for indexed mzML, 3 for Parquet, 4 for MGF with profile data excluded) or both. The 'MGF with profile data excluded format' is used to exclude MS2 profile mode data (the MGF files can get big when the MS2 spectra were acquired in profile mode).
1818

1919
```
20-
ThermoRawFileParser.exe usage is (use -option=value for the optional arguments):
20+
ThermoRawFileParser.exe usage is (use -option=value for the optional arguments):
2121
-h, --help Prints out the options.
2222
-i, --input=VALUE The raw file input.
2323
-o, --output=VALUE The output directory.
2424
-f, --format=VALUE The output format for the spectra (0 for MGF, 1
25-
for MzMl, 2 for Parquet)
25+
for mzML, 2 for indexed mzML, 3 for Parquet, 4
26+
for MGF with profile data excluded)
2627
-m, --metadata=VALUE The metadata output format (0 for JSON, 1 for TXT).
2728
-g, --gzip GZip the output file if this flag is specified (
2829
without value).
29-
-p, --profiledata Exclude MS2 profile data if this flag is specified
30-
(without value). Only for MGF format!
31-
-c, --collection[=VALUE] The optional collection identifier (PXD identifier
32-
for example).
33-
-r, --run[=VALUE] The optional mass spectrometry run name used in
34-
the spectrum title. The RAW file name will be
35-
used if not specified.
36-
-s, --subfolder[=VALUE] Optional, to disambiguate instances where the same
37-
collection has 2 or more MS runs with the same
38-
name.
3930
-u, --s3_url[=VALUE] Optional property to write directly the data into
40-
S3 Storage
31+
S3 Storage.
4132
-k, --s3_accesskeyid[=VALUE]
4233
Optional key for the S3 bucket to write the file
43-
output
34+
output.
4435
-t, --s3_secretaccesskey[=VALUE]
4536
Optional key for the S3 bucket to write the file
46-
output
37+
output.
4738
-n, --s3_bucketName[=VALUE]
4839
S3 bucket name
49-
-v, --verbose Verbose the programm and the individual steps
40+
-v, --verbose Enable verbose logging.
5041
-e, --ignoreInstrumentErrors
5142
Ignore missing properties by the instrument.
52-
5343
```
5444

5545
## Download
@@ -74,14 +64,14 @@ docker build --no-cache -t thermorawparser -f Dockerfile_basic .
7464
```
7565
Run example:
7666
```
77-
docker run -v /home/user/raw:/data_input -i -t thermorawparser mono /src/bin/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
67+
docker run -v /home/user/raw:/data_input -i -t thermorawparser mono /src/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0
7868
```
7969
Create example for reusing the container:
8070
```
8171
docker create -v /home/user/raw:/data_input --name=rawparser -it thermorawparser
8272
docker start rawparser
83-
docker exec rawparser mono /src/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
84-
docker exec rawparser mono /src/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/another_raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
73+
docker exec rawparser mono /src/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0
74+
docker exec rawparser mono /src/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/another_raw_file.raw -o=/data_input/output/ -f=0 -g -m=0
8575
docker stop rawparser
8676
```
8777

@@ -93,9 +83,9 @@ docker build --no-cache -t thermorawparser .
9383
```
9484
Run example:
9585
```
96-
docker run -v /home/user/raw:/data_input -i -t --user biodocker thermorawparser mono /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
86+
docker run -v /home/user/raw:/data_input -i -t --user biodocker thermorawparser mono /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0
9787
```
9888
or with the bash script (`ThermoRawFileParser.sh`):
9989
```
100-
docker run -v /home/user/raw:/data_input -i -t --user biodocker thermorawparser /bin/bash /home/biodocker/bin/ThermoRawFileParser.sh -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
90+
docker run -v /home/user/raw:/data_input -i -t --user biodocker thermorawparser /bin/bash /home/biodocker/bin/ThermoRawFileParser.sh -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0
10191
```

Writer/MzMlSpectrumWriter.cs

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
223223
_writer.WriteEndElement(); // software
224224
_writer.WriteEndElement(); // softwareList
225225

226-
PopulateInstrumentConfigurationList(firstScanNumber, instrumentModel);
226+
PopulateInstrumentConfigurationList(firstScanNumber, lastScanNumber, instrumentModel);
227227

228228
// dataProcessingList
229229
_writer.WriteStartElement("dataProcessingList");
@@ -447,8 +447,9 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
447447
/// Populate the instrument configuration list
448448
/// </summary>
449449
/// <param name="firstScanNumber"></param>
450+
/// <param name="lastScanNumber"></param>
450451
/// <param name="instrumentModel"></param>
451-
private void PopulateInstrumentConfigurationList(int firstScanNumber, CVParamType instrumentModel)
452+
private void PopulateInstrumentConfigurationList(int firstScanNumber, int lastScanNumber, CVParamType instrumentModel)
452453
{
453454
// go over the first scans until an MS2 scan is encountered
454455
// to collect all mass analyzer and ionization types
@@ -457,40 +458,52 @@ private void PopulateInstrumentConfigurationList(int firstScanNumber, CVParamTyp
457458
do
458459
{
459460
// Get the scan filter for this scan number
460-
var scanFilter = _rawFile.GetFilterForScanNumber(scanNumber);
461461

462-
// Add the ionization type if necessary
463462
try
464463
{
465-
if (!_ionizationTypes.ContainsKey(scanFilter.IonizationMode))
464+
var scanFilter = _rawFile.GetFilterForScanNumber(scanNumber);
465+
466+
// Add the ionization type if necessary
467+
try
468+
{
469+
if (!_ionizationTypes.ContainsKey(scanFilter.IonizationMode))
470+
{
471+
_ionizationTypes.Add(scanFilter.IonizationMode,
472+
OntologyMapping.IonizationTypes[scanFilter.IonizationMode]);
473+
}
474+
}
475+
catch (Exception e)
476+
{
477+
Log.Warn("The IonizationMode does not contains the following property --" + e.Message);
478+
if (!ParseInput.IgnoreInstrumentErrors)
479+
{
480+
throw e;
481+
}
482+
}
483+
484+
// Add the mass analyzer if necessary
485+
if (!_massAnalyzers.ContainsKey(scanFilter.MassAnalyzer) &&
486+
OntologyMapping.MassAnalyzerTypes.ContainsKey(scanFilter.MassAnalyzer))
466487
{
467-
_ionizationTypes.Add(scanFilter.IonizationMode,
468-
OntologyMapping.IonizationTypes[scanFilter.IonizationMode]);
488+
_massAnalyzers.Add(scanFilter.MassAnalyzer, "IC" + (_massAnalyzers.Count + 1));
469489
}
490+
491+
if (scanFilter.MSOrder == MSOrderType.Ms2)
492+
{
493+
encounteredMs2 = true;
494+
}
495+
470496
}
471497
catch (Exception e)
472498
{
473-
Log.Warn("The IonizationMode does not contains the following property --" + e.Message);
499+
Log.Warn("No Scan Filter found for the following scan --" + scanNumber);
474500
if (!ParseInput.IgnoreInstrumentErrors)
475501
{
476502
throw e;
477503
}
478504
}
479-
480-
// Add the mass analyzer if necessary
481-
if (!_massAnalyzers.ContainsKey(scanFilter.MassAnalyzer) &&
482-
OntologyMapping.MassAnalyzerTypes.ContainsKey(scanFilter.MassAnalyzer))
483-
{
484-
_massAnalyzers.Add(scanFilter.MassAnalyzer, "IC" + (_massAnalyzers.Count + 1));
485-
}
486-
487-
if (scanFilter.MSOrder == MSOrderType.Ms2)
488-
{
489-
encounteredMs2 = true;
490-
}
491-
492505
scanNumber++;
493-
} while (!encounteredMs2);
506+
} while (!encounteredMs2 && scanNumber <= lastScanNumber);
494507

495508
// Add a default analyzer if none were found
496509
if (_massAnalyzers.Count == 0)

0 commit comments

Comments
 (0)