Skip to content

Commit f46aac5

Browse files
Merge branch 'ypriverol-master'
2 parents 9bbf800 + 92dc895 commit f46aac5

144 files changed

Lines changed: 430113 additions & 28 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Dockerfile

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,16 @@ RUN mkdir -p /home/biodocker/bin/
4242
WORKDIR /home/biodocker/bin/
4343
RUN git clone -b master --single-branch https://github.com/compomics/ThermoRawFileParser /home/biodocker/bin
4444
RUN msbuild
45+
RUN ls -l -R
4546

4647

47-
COPY ThermoRawFileParser /home/biodocker/bin/bin/Debug/
48+
COPY ThermoRawFileParser /home/biodocker/bin/bin/x64/Debug/
4849

4950
USER root
50-
RUN chmod +x /home/biodocker/bin/bin/Debug/ThermoRawFileParser
51-
RUN chmod +x /home/biodocker/bin/bin/Debug/ThermoRawFileParser.exe
52-
RUN chown biodocker:biodocker /home/biodocker/bin/bin/Debug/ThermoRawFileParser
51+
RUN chmod +x /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser
52+
RUN chmod +x /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe
53+
RUN chown biodocker:biodocker /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser
5354

5455
USER biodocker
55-
ENV PATH=/home/biodocker/bin/bin/Debug/:$PATH
56+
ENV PATH=/home/biodocker/bin/bin/x64/Debug/:$PATH
5657

MainClass.cs

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
using System;
2+
using log4net;
3+
using log4net.Core;
24
using Mono.Options;
35
using ThermoFisher.CommonCore.Data;
46

57
namespace ThermoRawFileParser
68
{
79
public static class MainClass
810
{
9-
private static readonly log4net.ILog Log =
10-
log4net.LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
11+
private static readonly log4net.ILog Log = log4net.LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
1112

1213
public static void Main(string[] args)
1314
{
@@ -22,6 +23,13 @@ public static void Main(string[] args)
2223
string collection = null;
2324
string msRun = null;
2425
string subFolder = null;
26+
string s3url = null;
27+
string s3AccessKeyId = null;
28+
string s3SecretAccessKey = null;
29+
var verbose = false;
30+
string bucketName = null;
31+
var ignoreInstrumentErrors = false;
32+
2533
var help = false;
2634

2735
var optionSet = new OptionSet
@@ -68,7 +76,38 @@ public static void Main(string[] args)
6876
"s:|subfolder:",
6977
"Optional, to disambiguate instances where the same collection has 2 or more MS runs with the same name.",
7078
v => subFolder = v
79+
},
80+
{
81+
"u:|s3_url:",
82+
"Optional property to write directly the data into S3 Storage",
83+
v => s3url = v
84+
},
85+
{
86+
"k:|s3_accesskeyid:",
87+
"Optional key for the S3 bucket to write the file output",
88+
v => s3AccessKeyId = v
89+
},
90+
{
91+
"t:|s3_secretaccesskey:",
92+
"Optional key for the S3 bucket to write the file output",
93+
v => s3SecretAccessKey = v
94+
},
95+
{
96+
"n:|s3_bucketName:",
97+
"S3 bucket name",
98+
v => bucketName = v
99+
},
100+
{
101+
"v|verbose", "Verbose the programm and the individual steps",
102+
v => verbose = v != null
103+
},
104+
{
105+
"ignoreInstrumentErrors", "Ignore missing properties by the instrument",
106+
v => ignoreInstrumentErrors = v != null
71107
}
108+
109+
110+
72111
};
73112

74113
try
@@ -165,9 +204,14 @@ public static void Main(string[] args)
165204

166205
try
167206
{
168-
var parseInput = new ParseInput(rawFilePath, outputDirectory, outputFormat, gzip,
169-
outputMetadataFormat,
170-
includeProfileData, collection, msRun, subFolder);
207+
if (verbose)
208+
{
209+
((log4net.Repository.Hierarchy.Hierarchy)LogManager.GetLoggerRepository()).Root.Level = Level.Debug;
210+
((log4net.Repository.Hierarchy.Hierarchy)LogManager.GetLoggerRepository()).RaiseConfigurationChanged(EventArgs.Empty);
211+
212+
}
213+
var parseInput = new ParseInput(rawFilePath, outputDirectory, outputFormat, gzip, outputMetadataFormat,
214+
includeProfileData, collection, msRun, subFolder, Log, s3url, s3AccessKeyId, s3SecretAccessKey, bucketName, ignoreInstrumentErrors);
171215
RawFileParser.Parse(parseInput);
172216
}
173217
catch (Exception ex)

ParseInput.cs

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
1-
using System.IO;
1+
using System.ComponentModel;
2+
using System.IO;
3+
using NUnit.Framework.Constraints;
4+
using ThermoRawFileParser.Writer;
25

36
namespace ThermoRawFileParser
47
{
58
public class ParseInput
69
{
10+
private string bucketName;
11+
private bool ignoreInstrumentErrors;
12+
713
/// <summary>
814
/// The RAW file path.
915
/// </summary>
@@ -60,8 +66,27 @@ public class ParseInput
6066
/// </summary>
6167
public string RawFileNameWithoutExtension { get; }
6268

69+
public log4net.ILog Log { get; }
70+
71+
public S3Loader S3loader { get; set; }
72+
73+
public string S3AccessKeyId { get; set; }
74+
75+
public string S3SecretAccessKey { get; set; }
76+
77+
public string S3url { get; set; }
78+
79+
public bool IgnoreInstrumentErrors
80+
{
81+
get => ignoreInstrumentErrors;
82+
set => ignoreInstrumentErrors = value;
83+
}
84+
85+
6386
public ParseInput(string rawFilePath, string outputDirectory, OutputFormat outputFormat, bool gzip,
64-
MetadataFormat outputMetadata, bool excludeProfileData, string collection, string msRun, string subFolder)
87+
MetadataFormat outputMetadata, bool excludeProfileData, string collection, string msRun, string subFolder,
88+
log4net.ILog log, string s3url, string s3AccessKeyId, string s3SecretAccessKey, string bucketName, bool ignoreInstrumentErrors
89+
)
6590
{
6691
RawFilePath = rawFilePath;
6792
var splittedPath = RawFilePath.Split('/');
@@ -75,6 +100,20 @@ public ParseInput(string rawFilePath, string outputDirectory, OutputFormat outpu
75100
Collection = collection;
76101
MsRun = msRun;
77102
SubFolder = subFolder;
103+
Log = log;
104+
S3url = s3url;
105+
S3AccessKeyId = s3AccessKeyId;
106+
S3SecretAccessKey = s3SecretAccessKey;
107+
this.bucketName = bucketName;
108+
this.ignoreInstrumentErrors = ignoreInstrumentErrors;
109+
110+
if (S3url != null && S3AccessKeyId != null && S3SecretAccessKey != null)
111+
initializeS3bucket(s3url, s3AccessKeyId, s3SecretAccessKey, bucketName);
112+
113+
}
114+
private void initializeS3bucket(string s3url, string s3AccessKeyId, string s3SecretAccessKey, string bucketName)
115+
{
116+
S3loader = new S3Loader(s3url, s3AccessKeyId, s3SecretAccessKey, bucketName);
78117
}
79118
}
80119
}

README.md

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
# ThermoRawFileParser
22

3-
Wrapper around the .net (C#) ThermoFisher ThermoRawFileReader library for running on Linux with mono. It takes a thermo RAW file as input and outputs a metadata file and the spectra in 3 possible formats
3+
Wrapper around the .net (C#) ThermoFisher ThermoRawFileReader library for running on Linux with mono (works on Windows too). It takes a thermo RAW file as input and outputs a metadata file and the spectra in 3 possible formats
44
* MGF: only MS2 spectra
55
* mzML: both MS1 and MS2 spectra
66
* Apache Parquet: under development
77

88
RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved
99

10-
## Requirements
10+
## (Linux) Requirements
1111
[Mono](https://www.mono-project.com/download/stable/#download-lin) (install mono-complete if you encounter "assembly not found" errors).
1212

1313
## Usage
1414
```
1515
mono ThermoRawFileParser.exe -i=/home/user/data_input/raw_file.raw -o=/home/user/data_input/output/ -f=0 -g -m=0 -c=PXD00001
1616
```
17-
The optional parameters only work in the -option=value format. The tool can output some RAW file metadata `-m=0|1` (0 for JSON format, 1 for TXT format) and the spectra file `-f` or both. For the MGF format, `-p` flag is used to exclude MS2 profile mode data (the MGF files can get big when the MS2 spectra were acquired in profile mode).
17+
For running on Windows, omit `mono`. The optional parameters only work in the -option=value format. The tool can output some RAW file metadata `-m=0|1` (0 for JSON format, 1 for TXT format) and the spectra file `-f` or both. For the MGF format, `-p` flag is used to exclude MS2 profile mode data (the MGF files can get big when the MS2 spectra were acquired in profile mode).
1818

1919
```
2020
ThermoRawFileParser.exe usage is (use -option=value for the optional arguments):
@@ -36,6 +36,20 @@ ThermoRawFileParser.exe usage is (use -option=value for the optional arguments):
3636
-s, --subfolder[=VALUE] Optional, to disambiguate instances where the same
3737
collection has 2 or more MS runs with the same
3838
name.
39+
-u, --s3_url[=VALUE] Optional property to write directly the data into
40+
S3 Storage
41+
-k, --s3_accesskeyid[=VALUE]
42+
Optional key for the S3 bucket to write the file
43+
output
44+
-t, --s3_secretaccesskey[=VALUE]
45+
Optional key for the S3 bucket to write the file
46+
output
47+
-n, --s3_bucketName[=VALUE]
48+
S3 bucket name
49+
-v, --verbose Verbose the programm and the individual steps
50+
--ignoreInstrumentErrors
51+
Ignore missing properties by the instrument
52+
3953
```
4054

4155
## Download
@@ -62,6 +76,14 @@ Run example:
6276
```
6377
docker run -v /home/user/raw:/data_input -i -t thermorawparser mono /src/bin/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
6478
```
79+
Create example for reusing the container:
80+
```
81+
docker create -v /home/user/raw:/data_input --name=rawparser -it thermorawparser
82+
docker start rawparser
83+
docker exec rawparser mono /src/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
84+
docker exec rawparser mono /src/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/another_raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
85+
docker stop rawparser
86+
```
6587

6688
### Biocontainers docker
6789

@@ -71,7 +93,7 @@ docker build --no-cache -t thermorawparser .
7193
```
7294
Run example:
7395
```
74-
docker run -v /home/user/raw:/data_input -i -t --user biodocker thermorawparser mono /home/biodocker/bin/bin/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
96+
docker run -v /home/user/raw:/data_input -i -t --user biodocker thermorawparser mono /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe -i=/data_input/raw_file.raw -o=/data_input/output/ -f=0 -g -m=0 -c=PXD00001
7597
```
7698
or with the bash script (`ThermoRawFileParser.sh`):
7799
```

RawFileParser.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ public static void Parse(ParseInput parseInput)
2121
// Check to see if the RAW file name was supplied as an argument to the program
2222
if (string.IsNullOrEmpty(parseInput.RawFilePath))
2323
{
24+
parseInput.Log.Debug("No raw file specified or found in path");
2425
throw new Exception("No RAW file specified!");
26+
2527
}
2628

2729
// Check to see if the specified RAW file exists
@@ -82,7 +84,7 @@ public static void Parse(ParseInput parseInput)
8284
spectrumWriter.Write(rawFile, firstScanNumber, lastScanNumber);
8385
break;
8486
case OutputFormat.Mzml:
85-
spectrumWriter = new MzMlSpectrumWriter(parseInput);
87+
spectrumWriter = new MzMlSpectrumWriter(parseInput, Log);
8688
spectrumWriter.Write(rawFile, firstScanNumber, lastScanNumber);
8789
break;
8890
case OutputFormat.Parquet:

ThermoRawFileParser

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#!/bin/bash
22
echo "Analysing Thermo RW file input"
3-
mono /home/biodocker/bin/bin/Debug/ThermoRawFileParser.exe "$@"
3+
mono /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe "$@"

ThermoRawFileParser.csproj

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@
5656
<Prefer32Bit>false</Prefer32Bit>
5757
</PropertyGroup>
5858
<ItemGroup>
59+
<Reference Include="AWS.Logger.Core, Version=1.2.0.0, Culture=neutral, PublicKeyToken=71c852f8be1c371d">
60+
<HintPath>packages\AWS.Logger.Core.1.2.0\lib\net45\AWS.Logger.Core.dll</HintPath>
61+
<Private>True</Private>
62+
</Reference>
63+
<Reference Include="AWSSDK.CloudWatchLogs, Version=3.3.0.0, Culture=neutral, PublicKeyToken=885c28607f98e604">
64+
<HintPath>packages\AWSSDK.CloudWatchLogs.3.3.4\lib\net45\AWSSDK.CloudWatchLogs.dll</HintPath>
65+
<Private>True</Private>
66+
</Reference>
67+
<Reference Include="AWSSDK.Core, Version=3.3.0.0, Culture=neutral, PublicKeyToken=885c28607f98e604">
68+
<HintPath>packages\AWSSDK.Core.3.3.32.2\lib\net45\AWSSDK.Core.dll</HintPath>
69+
<Private>True</Private>
70+
</Reference>
71+
<Reference Include="AWSSDK.S3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=885c28607f98e604">
72+
<HintPath>packages\AWSSDK.S3.3.3.31.24\lib\net45\AWSSDK.S3.dll</HintPath>
73+
<Private>True</Private>
74+
</Reference>
5975
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a">
6076
<HintPath>packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
6177
</Reference>
@@ -120,6 +136,7 @@
120136
<Compile Include="Writer\MzML\mzML1_1_1_idx.cs" />
121137
<Compile Include="Writer\OntologyMapping.cs" />
122138
<Compile Include="Writer\ParquetSpectrumWriter.cs" />
139+
<Compile Include="Writer\S3Loader.cs" />
123140
<Compile Include="Writer\SpectrumWriter.cs" />
124141
</ItemGroup>
125142
<ItemGroup>
@@ -131,6 +148,10 @@
131148
</Content>
132149
<Content Include="Writer\MzML\mzML1.1.1_idx.xsd" />
133150
</ItemGroup>
151+
<ItemGroup>
152+
<Analyzer Include="packages\AWSSDK.CloudWatchLogs.3.3.4\analyzers\dotnet\cs\AWSSDK.CloudWatchLogs.CodeAnalysis.dll" />
153+
<Analyzer Include="packages\AWSSDK.S3.3.3.31.24\analyzers\dotnet\cs\AWSSDK.S3.CodeAnalysis.dll" />
154+
</ItemGroup>
134155
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
135156
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
136157
<PropertyGroup>

ThermoRawFileParserTest/Tests.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88

99
namespace ThermoRawFileParserTest
1010
{
11+
1112
[TestFixture]
1213
public class Tests
1314
{
15+
private static readonly log4net.ILog Log = log4net.LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
16+
1417
[Test]
1518
public void TestMgf()
1619
{
@@ -20,7 +23,7 @@ public void TestMgf()
2023
var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"small.RAW");
2124
var parseInput = new ParseInput(testRawFile, tempFilePath, OutputFormat.Mgf, false, MetadataFormat.NON, false,
2225
"coll",
23-
"run", "sub");
26+
"run", "sub", Log, null, null, null, null ,false);
2427

2528
RawFileParser.Parse(parseInput);
2629

@@ -41,7 +44,7 @@ public void TestMzml()
4144

4245
var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"small.RAW");
4346
var parseInput = new ParseInput(testRawFile, tempFilePath, OutputFormat.Mzml, false, MetadataFormat.NON, false,
44-
"coll", "run", "sub");
47+
"coll", "run", "sub", Log, null, null, null, null, false);
4548

4649
RawFileParser.Parse(parseInput);
4750

ThermoRawFileParserTest/ThermoRawFileParserTest.csproj

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,18 @@
3434
<WarningLevel>4</WarningLevel>
3535
</PropertyGroup>
3636
<ItemGroup>
37+
<Reference Include="AWS.Logger.Core, Version=1.2.0.0, Culture=neutral, PublicKeyToken=71c852f8be1c371d">
38+
<HintPath>..\packages\AWS.Logger.Core.1.2.0\lib\net45\AWS.Logger.Core.dll</HintPath>
39+
<Private>True</Private>
40+
</Reference>
41+
<Reference Include="AWSSDK.CloudWatchLogs, Version=3.3.0.0, Culture=neutral, PublicKeyToken=885c28607f98e604">
42+
<HintPath>..\packages\AWSSDK.CloudWatchLogs.3.3.4\lib\net45\AWSSDK.CloudWatchLogs.dll</HintPath>
43+
<Private>True</Private>
44+
</Reference>
45+
<Reference Include="AWSSDK.Core, Version=3.3.0.0, Culture=neutral, PublicKeyToken=885c28607f98e604">
46+
<HintPath>..\packages\AWSSDK.Core.3.3.17\lib\net45\AWSSDK.Core.dll</HintPath>
47+
<Private>True</Private>
48+
</Reference>
3749
<Reference Include="Chemistry, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null">
3850
<HintPath>..\packages\mzLib.1.0.329\lib\net471\Chemistry.dll</HintPath>
3951
<Private>True</Private>
@@ -50,6 +62,10 @@
5062
<HintPath>..\packages\mzLib.1.0.329\lib\net471\FlashLFQ.dll</HintPath>
5163
<Private>True</Private>
5264
</Reference>
65+
<Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a">
66+
<HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
67+
<Private>True</Private>
68+
</Reference>
5369
<Reference Include="ManagedThermoHelperLayer, Version=1.0.329.0, Culture=neutral, PublicKeyToken=null">
5470
<HintPath>..\packages\mzLib.1.0.329\lib\net471\ManagedThermoHelperLayer.dll</HintPath>
5571
<Private>True</Private>
@@ -151,6 +167,14 @@
151167
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
152168
</None>
153169
</ItemGroup>
170+
<ItemGroup>
171+
<Analyzer Include="..\packages\AWSSDK.CloudWatchLogs.3.3.4\analyzers\dotnet\cs\AWSSDK.CloudWatchLogs.CodeAnalysis.dll" />
172+
</ItemGroup>
173+
<ItemGroup>
174+
<Content Include="log4net.config">
175+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
176+
</Content>
177+
</ItemGroup>
154178
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
155179
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
156180
<PropertyGroup>

0 commit comments

Comments
 (0)