Skip to content

Commit ca35579

Browse files
committed
Standardize file extensions
1 parent 82ff97c commit ca35579

4 files changed

Lines changed: 146 additions & 8 deletions

File tree

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
Format Gzip UserInput FileOutput
2+
MGF FALSE rawfile rawfile.mgf
3+
MGF FALSE rawfile. rawfile.mgf
4+
MGF FALSE rawfile.mgf rawfile.mgf
5+
MGF FALSE rawfile.MGF rawfile.mgf
6+
MGF FALSE rawfile.. rawfile.mgf
7+
MGF TRUE rawfile.mgf.gz rawfile.mgf.gz
8+
MGF TRUE rawfile rawfile.mgf.gz
9+
MGF TRUE rawfile.. rawfile.mgf.gz
10+
MGF TRUE rawfile.mgf.GZ rawfile.mgf.gz
11+
MGF TRUE rawfile.mgf rawfile.mgf.gz
12+
MGF TRUE rawfile.MGF. rawfile.mgf.gz
13+
mzML FALSE rawfile rawfile.mzML
14+
mzML FALSE rawfile. rawfile.mzML
15+
mzML FALSE rawfile.mgf rawfile.mgf.mzML
16+
mzML FALSE rawfile.MzML rawfile.mzML
17+
mzML FALSE rawfile.. rawfile.mzML
18+
mzML TRUE rawfile.mzML.gz rawfile.mzML.gz
19+
mzML TRUE rawfile rawfile.mzML.gz
20+
mzML TRUE rawfile.. rawfile.mzML.gz
21+
mzML TRUE rawfile.mzml.GZ rawfile.mzML.gz
22+
mzML TRUE rawfile.mzML rawfile.mzML.gz
23+
mzML TRUE rawfile.MZml. rawfile.mzML.gz

ThermoRawFileParserTest/ThermoRawFileParserTest.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,9 @@
187187
</ItemGroup>
188188
<ItemGroup>
189189
<None Include="app.config" />
190+
<None Include="Data\ExtensionTest.tsv">
191+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
192+
</None>
190193
<None Include="Data\TestFolderMgfs\small1.RAW">
191194
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
192195
</None>

ThermoRawFileParserTest/WriterTests.cs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using MzLibUtil;
99
using NUnit.Framework;
1010
using ThermoRawFileParser;
11+
using ThermoRawFileParser.Writer;
1112
using ThermoRawFileParser.Writer.MzML;
1213
using UsefulProteomicsDatabases;
1314

@@ -16,6 +17,97 @@ namespace ThermoRawFileParserTest
1617
[TestFixture]
1718
public class WriterTests
1819
{
20+
[Test]
21+
public void TestExtensionsNull()
22+
{
23+
// Get temp path for writing the test files
24+
var tempFilePath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
25+
Directory.CreateDirectory(tempFilePath);
26+
27+
var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/small.RAW");
28+
var parseInput = new ParseInput();
29+
parseInput.RawFilePath = testRawFile;
30+
parseInput.OutputDirectory = tempFilePath;
31+
32+
//empty filename
33+
parseInput.OutputFormat = OutputFormat.MGF;
34+
RawFileParser.Parse(parseInput);
35+
Assert.IsTrue(File.Exists(Path.Combine(tempFilePath, "small.mgf")));
36+
37+
parseInput.OutputFormat = OutputFormat.MzML;
38+
RawFileParser.Parse(parseInput);
39+
Assert.IsTrue(File.Exists(Path.Combine(tempFilePath, "small.mzML")));
40+
File.Delete(Path.Combine(tempFilePath, "small.mzML"));
41+
42+
parseInput.OutputFormat = OutputFormat.IndexMzML;
43+
RawFileParser.Parse(parseInput);
44+
Assert.IsTrue(File.Exists(Path.Combine(tempFilePath, "small.mzML")));
45+
46+
parseInput.Gzip = true;
47+
RawFileParser.Parse(parseInput);
48+
Assert.IsTrue(File.Exists(Path.Combine(tempFilePath, "small.mzML.gz")));
49+
File.Delete(Path.Combine(tempFilePath, "small.mzML.gz"));
50+
51+
parseInput.OutputFormat = OutputFormat.MGF;
52+
RawFileParser.Parse(parseInput);
53+
Assert.IsTrue(File.Exists(Path.Combine(tempFilePath, "small.mgf.gz")));
54+
55+
parseInput.OutputFormat = OutputFormat.MzML;
56+
RawFileParser.Parse(parseInput);
57+
Assert.IsTrue(File.Exists(Path.Combine(tempFilePath, "small.mzML.gz")));
58+
59+
Directory.Delete(tempFilePath, true);
60+
}
61+
62+
[Test]
63+
public void TestExtensionsFull()
64+
{
65+
// Get temp path for writing the test files
66+
var tempFilePath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
67+
Directory.CreateDirectory(tempFilePath);
68+
69+
var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/small.RAW");
70+
var parseInput = new ParseInput();
71+
parseInput.RawFilePath = testRawFile;
72+
73+
List<OutputFormat> formats = new List<OutputFormat>();
74+
string userInput;
75+
string expectedOutput;
76+
77+
foreach (string line in File.ReadLines(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/ExtensionTest.tsv")).Skip(1))
78+
{
79+
string[] words = line.Split('\t');
80+
81+
switch (words[0].ToLower())
82+
{
83+
case "mgf": formats = new List<OutputFormat> { OutputFormat.MGF }; break;
84+
case "mzml": formats = new List<OutputFormat> { OutputFormat.MzML, OutputFormat.IndexMzML }; break;
85+
}
86+
87+
switch (words[1].ToLower())
88+
{
89+
case "true": parseInput.Gzip = true; break;
90+
default: parseInput.Gzip = false; break;
91+
}
92+
93+
userInput = words[2];
94+
expectedOutput = words[3];
95+
96+
parseInput.OutputFile = Path.Combine(tempFilePath, userInput);
97+
98+
foreach (var format in formats)
99+
{
100+
parseInput.OutputFormat = format;
101+
RawFileParser.Parse(parseInput);
102+
Assert.IsTrue(File.Exists(Path.Combine(tempFilePath, expectedOutput)));
103+
File.Delete(Path.Combine(tempFilePath, expectedOutput));
104+
}
105+
106+
}
107+
108+
Directory.Delete(tempFilePath, true);
109+
}
110+
19111
[Test]
20112
public void TestMgf()
21113
{

Writer/SpectrumWriter.cs

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ protected void ConfigureWriter(string extension)
7070

7171
if (ParseInput.OutputFile == null)
7272
{
73-
var fullExtension = ParseInput.Gzip ? extension + ".gzip" : extension;
73+
var fullExtension = ParseInput.Gzip ? extension + ".gz" : extension;
7474
if (!ParseInput.Gzip || ParseInput.OutputFormat == OutputFormat.IndexMzML)
7575
{
7676
Writer = File.CreateText(ParseInput.OutputDirectory + "//" +
@@ -87,25 +87,45 @@ protected void ConfigureWriter(string extension)
8787
}
8888
else
8989
{
90+
var fileName = NormalizeFileName(ParseInput.OutputFile, extension, ParseInput.Gzip);
9091
if (!ParseInput.Gzip || ParseInput.OutputFormat == OutputFormat.IndexMzML)
9192
{
92-
Writer = File.CreateText(ParseInput.OutputFile);
93+
Writer = File.CreateText(fileName);
9394
}
9495
else
9596
{
96-
var fileName = ParseInput.OutputFile;
97-
if (ParseInput.Gzip && !Path.GetExtension(fileName).Equals(".gzip"))
98-
{
99-
fileName = ParseInput.OutputFile + ".gzip";
100-
}
101-
10297
var fileStream = File.Create(fileName);
10398
var compress = new GZipStream(fileStream, CompressionMode.Compress);
10499
Writer = new StreamWriter(compress);
105100
}
106101
}
107102
}
108103

104+
private string NormalizeFileName(string outputFile, string extension, bool gzip)
105+
{
106+
string result = outputFile;
107+
string tail = "";
108+
109+
string[] extensions;
110+
if (ParseInput.Gzip)
111+
extensions = new string[] { ".gz", extension };
112+
else
113+
extensions = new string[] { extension };
114+
115+
result = result.TrimEnd('.');
116+
117+
foreach (var ext in extensions)
118+
{
119+
if (result.ToLower().EndsWith(ext.ToLower()))
120+
result = result.Substring(0, result.Length - ext.Length);
121+
122+
tail = ext + tail;
123+
result = result.TrimEnd('.');
124+
}
125+
126+
return result + tail;
127+
}
128+
109129
/// <summary>
110130
/// Construct the spectrum title.
111131
/// </summary>

0 commit comments

Comments
 (0)