Skip to content

Commit 70714c4

Browse files
Copilotgfs
andcommitted
Add ARJ and ARC decompression support with SharpCompress 0.44.3
Co-authored-by: gfs <98900+gfs@users.noreply.github.com>
1 parent aede26f commit 70714c4

10 files changed

Lines changed: 381 additions & 3 deletions

File tree

RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ public static IEnumerable<object[]> ArchiveData
4343
new object[] { "TestDataArchivesNested.Zip", 54 },
4444
new object[] { "UdfTest.iso", 3 },
4545
new object[] { "UdfTestWithMultiSystem.iso", 3 },
46+
new object[] { "TestData.arj", 1 },
47+
new object[] { "TestData.arc", 1 },
4648
// new object[] { "HfsSampleUDCO.dmg", 2 }
4749
};
4850
}
@@ -75,6 +77,8 @@ public static IEnumerable<object[]> NoRecursionData
7577
new object[] { "EmptyFile.txt", 1 },
7678
new object[] { "TestDataArchivesNested.Zip", 14 },
7779
new object[] { "UdfTestWithMultiSystem.iso", 3 },
80+
new object[] { "TestData.arj", 1 },
81+
new object[] { "TestData.arc", 1 },
7882
// new object[] { "HfsSampleUDCO.dmg", 2 }
7983
};
8084
}

RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ public class MiniMagicTests : BaseExtractorTestClass
2424
[DataRow("TestData.wim", ArchiveFileType.WIM)]
2525
[DataRow("Empty.vmdk", ArchiveFileType.VMDK)]
2626
[DataRow("HfsSampleUDCO.dmg", ArchiveFileType.DMG)]
27+
[DataRow("TestData.arj", ArchiveFileType.ARJ)]
28+
[DataRow("TestData.arc", ArchiveFileType.ARC)]
2729
[DataRow("EmptyFile.txt", ArchiveFileType.UNKNOWN)]
2830
public void TestMiniMagic(string fileName, ArchiveFileType expectedArchiveFileType)
2931
{
88 Bytes
Binary file not shown.
182 Bytes
Binary file not shown.

RecursiveExtractor/Extractor.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ public Extractor(IEnumerable<ICustomAsyncExtractor> customExtractors) : this()
7070
/// </summary>
7171
public void SetDefaultExtractors()
7272
{
73+
SetExtractor(ArchiveFileType.ARC, new ArcExtractor(this));
74+
SetExtractor(ArchiveFileType.ARJ, new ArjExtractor(this));
7375
SetExtractor(ArchiveFileType.BZIP2, new BZip2Extractor(this));
7476
SetExtractor(ArchiveFileType.DEB, new DebExtractor(this));
7577
SetExtractor(ArchiveFileType.AR, new GnuArExtractor(this));
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
using SharpCompress.Readers.Arc;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.IO;
5+
6+
namespace Microsoft.CST.RecursiveExtractor.Extractors
7+
{
8+
/// <summary>
9+
/// The ARC Archive extractor implementation
10+
/// </summary>
11+
public class ArcExtractor : AsyncExtractorInterface
12+
{
13+
/// <summary>
14+
/// The constructor takes the Extractor context for recursion.
15+
/// </summary>
16+
/// <param name="context">The Extractor context.</param>
17+
public ArcExtractor(Extractor context)
18+
{
19+
Context = context;
20+
}
21+
private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
22+
23+
internal Extractor Context { get; }
24+
25+
/// <summary>
26+
/// Safely gets the size of an entry, returning 0 if not available.
27+
/// </summary>
28+
private long GetEntrySize(SharpCompress.Common.IEntry entry)
29+
{
30+
try
31+
{
32+
return entry.Size;
33+
}
34+
catch (NotImplementedException)
35+
{
36+
return 0;
37+
}
38+
}
39+
40+
/// <summary>
41+
/// Extracts an ARC archive
42+
/// </summary>
43+
///<inheritdoc />
44+
public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true)
45+
{
46+
ArcReader? arcReader = null;
47+
try
48+
{
49+
arcReader = ArcReader.Open(fileEntry.Content, new SharpCompress.Readers.ReaderOptions()
50+
{
51+
LeaveStreamOpen = true
52+
});
53+
}
54+
catch (Exception e)
55+
{
56+
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath, string.Empty, e.GetType());
57+
}
58+
59+
if (arcReader != null)
60+
{
61+
using (arcReader)
62+
{
63+
while (arcReader.MoveToNextEntry())
64+
{
65+
var entry = arcReader.Entry;
66+
if (entry.IsDirectory)
67+
{
68+
continue;
69+
}
70+
71+
var entrySize = GetEntrySize(entry);
72+
governor.CheckResourceGovernor(entrySize);
73+
var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar);
74+
if (string.IsNullOrEmpty(name))
75+
{
76+
Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath);
77+
continue;
78+
}
79+
80+
var newFileEntry = await FileEntry.FromStreamAsync(name, arcReader.OpenEntryStream(), fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false);
81+
if (newFileEntry != null)
82+
{
83+
if (options.Recurse || topLevel)
84+
{
85+
await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false))
86+
{
87+
yield return innerEntry;
88+
}
89+
}
90+
else
91+
{
92+
yield return newFileEntry;
93+
}
94+
}
95+
}
96+
}
97+
}
98+
else
99+
{
100+
if (options.ExtractSelfOnFail)
101+
{
102+
fileEntry.EntryStatus = FileEntryStatus.FailedArchive;
103+
yield return fileEntry;
104+
}
105+
}
106+
}
107+
108+
/// <summary>
109+
/// Extracts an ARC archive
110+
/// </summary>
111+
///<inheritdoc />
112+
public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true)
113+
{
114+
ArcReader? arcReader = null;
115+
try
116+
{
117+
arcReader = ArcReader.Open(fileEntry.Content, new SharpCompress.Readers.ReaderOptions()
118+
{
119+
LeaveStreamOpen = true
120+
});
121+
}
122+
catch (Exception e)
123+
{
124+
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath, string.Empty, e.GetType());
125+
}
126+
127+
if (arcReader != null)
128+
{
129+
using (arcReader)
130+
{
131+
while (arcReader.MoveToNextEntry())
132+
{
133+
var entry = arcReader.Entry;
134+
if (entry.IsDirectory)
135+
{
136+
continue;
137+
}
138+
139+
var entrySize = GetEntrySize(entry);
140+
governor.CheckResourceGovernor(entrySize);
141+
FileEntry? newFileEntry = null;
142+
try
143+
{
144+
var stream = arcReader.OpenEntryStream();
145+
var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar);
146+
if (string.IsNullOrEmpty(name))
147+
{
148+
Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath);
149+
continue;
150+
}
151+
newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff);
152+
}
153+
catch (Exception e)
154+
{
155+
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath, entry.Key, e.GetType());
156+
}
157+
if (newFileEntry != null)
158+
{
159+
if (options.Recurse || topLevel)
160+
{
161+
foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false))
162+
{
163+
yield return innerEntry;
164+
}
165+
}
166+
else
167+
{
168+
yield return newFileEntry;
169+
}
170+
}
171+
}
172+
}
173+
}
174+
else
175+
{
176+
if (options.ExtractSelfOnFail)
177+
{
178+
fileEntry.EntryStatus = FileEntryStatus.FailedArchive;
179+
yield return fileEntry;
180+
}
181+
}
182+
}
183+
}
184+
}
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
using SharpCompress.Readers.Arj;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.IO;
5+
6+
namespace Microsoft.CST.RecursiveExtractor.Extractors
7+
{
8+
/// <summary>
9+
/// The ARJ Archive extractor implementation
10+
/// </summary>
11+
public class ArjExtractor : AsyncExtractorInterface
12+
{
13+
/// <summary>
14+
/// The constructor takes the Extractor context for recursion.
15+
/// </summary>
16+
/// <param name="context">The Extractor context.</param>
17+
public ArjExtractor(Extractor context)
18+
{
19+
Context = context;
20+
}
21+
private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
22+
23+
internal Extractor Context { get; }
24+
25+
/// <summary>
26+
/// Extracts an ARJ archive
27+
/// </summary>
28+
///<inheritdoc />
29+
public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true)
30+
{
31+
ArjReader? arjReader = null;
32+
try
33+
{
34+
arjReader = ArjReader.Open(fileEntry.Content, new SharpCompress.Readers.ReaderOptions()
35+
{
36+
LeaveStreamOpen = true
37+
});
38+
}
39+
catch (Exception e)
40+
{
41+
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath, string.Empty, e.GetType());
42+
}
43+
44+
if (arjReader != null)
45+
{
46+
using (arjReader)
47+
{
48+
while (arjReader.MoveToNextEntry())
49+
{
50+
var entry = arjReader.Entry;
51+
if (entry.IsDirectory)
52+
{
53+
continue;
54+
}
55+
56+
governor.CheckResourceGovernor(entry.Size);
57+
var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar);
58+
if (string.IsNullOrEmpty(name))
59+
{
60+
Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath);
61+
continue;
62+
}
63+
64+
var newFileEntry = await FileEntry.FromStreamAsync(name, arjReader.OpenEntryStream(), fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false);
65+
if (newFileEntry != null)
66+
{
67+
if (options.Recurse || topLevel)
68+
{
69+
await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false))
70+
{
71+
yield return innerEntry;
72+
}
73+
}
74+
else
75+
{
76+
yield return newFileEntry;
77+
}
78+
}
79+
}
80+
}
81+
}
82+
else
83+
{
84+
if (options.ExtractSelfOnFail)
85+
{
86+
fileEntry.EntryStatus = FileEntryStatus.FailedArchive;
87+
yield return fileEntry;
88+
}
89+
}
90+
}
91+
92+
/// <summary>
93+
/// Extracts an ARJ archive
94+
/// </summary>
95+
///<inheritdoc />
96+
public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true)
97+
{
98+
ArjReader? arjReader = null;
99+
try
100+
{
101+
arjReader = ArjReader.Open(fileEntry.Content, new SharpCompress.Readers.ReaderOptions()
102+
{
103+
LeaveStreamOpen = true
104+
});
105+
}
106+
catch (Exception e)
107+
{
108+
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath, string.Empty, e.GetType());
109+
}
110+
111+
if (arjReader != null)
112+
{
113+
using (arjReader)
114+
{
115+
while (arjReader.MoveToNextEntry())
116+
{
117+
var entry = arjReader.Entry;
118+
if (entry.IsDirectory)
119+
{
120+
continue;
121+
}
122+
123+
governor.CheckResourceGovernor(entry.Size);
124+
FileEntry? newFileEntry = null;
125+
try
126+
{
127+
var stream = arjReader.OpenEntryStream();
128+
var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar);
129+
if (string.IsNullOrEmpty(name))
130+
{
131+
Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath);
132+
continue;
133+
}
134+
newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff);
135+
}
136+
catch (Exception e)
137+
{
138+
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath, entry.Key, e.GetType());
139+
}
140+
if (newFileEntry != null)
141+
{
142+
if (options.Recurse || topLevel)
143+
{
144+
foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false))
145+
{
146+
yield return innerEntry;
147+
}
148+
}
149+
else
150+
{
151+
yield return newFileEntry;
152+
}
153+
}
154+
}
155+
}
156+
}
157+
else
158+
{
159+
if (options.ExtractSelfOnFail)
160+
{
161+
fileEntry.EntryStatus = FileEntryStatus.FailedArchive;
162+
yield return fileEntry;
163+
}
164+
}
165+
}
166+
}
167+
}

0 commit comments

Comments
 (0)