Skip to content

Commit d791347

Browse files
Copilotgfs
andauthored
Add FileEntryMetadata to FileEntry for permissions, ownership, and special bits (#196)
Add FileEntryMetadata to FileEntry for permissions, ownership, and special bits for formats that use SharpCompress as the underlying extraction library and recursive extractor implemented formats. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: gfs <98900+gfs@users.noreply.github.com>
1 parent 4bc33bc commit d791347

8 files changed

Lines changed: 345 additions & 12 deletions

File tree

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
2+
3+
using Microsoft.CST.RecursiveExtractor;
4+
using System.IO;
5+
using System.Linq;
6+
using System.Threading.Tasks;
7+
using Xunit;
8+
9+
namespace RecursiveExtractor.Tests.ExtractorTests;
10+
11+
public class FileMetadataTests
12+
{
13+
[Fact]
14+
public async Task TarEntries_HaveMetadata()
15+
{
16+
var extractor = new Extractor();
17+
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.tar");
18+
var results = await extractor.ExtractAsync(path, new ExtractorOptions() { Recurse = false }).ToListAsync();
19+
20+
Assert.NotEmpty(results);
21+
foreach (var entry in results)
22+
{
23+
Assert.NotNull(entry.Metadata);
24+
Assert.NotNull(entry.Metadata!.Mode);
25+
// Regular files in TestData.tar have mode 0644 (octal) = 420 (decimal)
26+
Assert.Equal(420, entry.Metadata.Mode);
27+
Assert.False(entry.Metadata.IsExecutable);
28+
Assert.False(entry.Metadata.IsSetUid);
29+
Assert.False(entry.Metadata.IsSetGid);
30+
Assert.NotNull(entry.Metadata.Uid);
31+
Assert.NotNull(entry.Metadata.Gid);
32+
}
33+
}
34+
35+
[Fact]
36+
public void TarEntries_HaveMetadata_Sync()
37+
{
38+
var extractor = new Extractor();
39+
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.tar");
40+
var results = extractor.Extract(path, new ExtractorOptions() { Recurse = false }).ToList();
41+
42+
Assert.NotEmpty(results);
43+
foreach (var entry in results)
44+
{
45+
Assert.NotNull(entry.Metadata);
46+
Assert.NotNull(entry.Metadata!.Mode);
47+
Assert.Equal(420, entry.Metadata.Mode);
48+
Assert.False(entry.Metadata.IsExecutable);
49+
Assert.NotNull(entry.Metadata.Uid);
50+
Assert.NotNull(entry.Metadata.Gid);
51+
}
52+
}
53+
54+
[Fact]
55+
public async Task ArEntries_HaveMetadata()
56+
{
57+
var extractor = new Extractor();
58+
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.a");
59+
var results = await extractor.ExtractAsync(path, new ExtractorOptions() { Recurse = false }).ToListAsync();
60+
61+
Assert.NotEmpty(results);
62+
foreach (var entry in results)
63+
{
64+
Assert.NotNull(entry.Metadata);
65+
Assert.NotNull(entry.Metadata!.Mode);
66+
// ar files in TestData.a have mode 0644 (octal) = 420 (decimal)
67+
Assert.Equal(420, entry.Metadata.Mode);
68+
Assert.False(entry.Metadata.IsExecutable);
69+
Assert.NotNull(entry.Metadata.Uid);
70+
Assert.Equal(0L, entry.Metadata.Uid);
71+
Assert.NotNull(entry.Metadata.Gid);
72+
Assert.Equal(0L, entry.Metadata.Gid);
73+
}
74+
}
75+
76+
[Fact]
77+
public void ArEntries_HaveMetadata_Sync()
78+
{
79+
var extractor = new Extractor();
80+
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.a");
81+
var results = extractor.Extract(path, new ExtractorOptions() { Recurse = false }).ToList();
82+
83+
Assert.NotEmpty(results);
84+
foreach (var entry in results)
85+
{
86+
Assert.NotNull(entry.Metadata);
87+
Assert.NotNull(entry.Metadata!.Mode);
88+
Assert.Equal(420, entry.Metadata.Mode);
89+
Assert.NotNull(entry.Metadata.Uid);
90+
Assert.NotNull(entry.Metadata.Gid);
91+
}
92+
}
93+
94+
[Fact]
95+
public void MetadataDefaults_AreNull()
96+
{
97+
var metadata = new FileEntryMetadata();
98+
Assert.Null(metadata.Mode);
99+
Assert.Null(metadata.Uid);
100+
Assert.Null(metadata.Gid);
101+
Assert.Null(metadata.IsExecutable);
102+
Assert.Null(metadata.IsSetUid);
103+
Assert.Null(metadata.IsSetGid);
104+
}
105+
106+
[Fact]
107+
public void IsExecutable_DerivedFromMode()
108+
{
109+
// 0755 (octal) = 493 (decimal)
110+
var metadata = new FileEntryMetadata { Mode = 493 };
111+
Assert.True(metadata.IsExecutable);
112+
Assert.False(metadata.IsSetUid);
113+
Assert.False(metadata.IsSetGid);
114+
115+
// 0644 (octal) = 420 (decimal)
116+
metadata = new FileEntryMetadata { Mode = 420 };
117+
Assert.False(metadata.IsExecutable);
118+
}
119+
120+
[Fact]
121+
public void SetUidSetGid_DerivedFromMode()
122+
{
123+
// 04755 (octal) = 2541 (decimal) — setuid + rwxr-xr-x
124+
var metadata = new FileEntryMetadata { Mode = 2541 };
125+
Assert.True(metadata.IsSetUid);
126+
Assert.False(metadata.IsSetGid);
127+
Assert.True(metadata.IsExecutable);
128+
129+
// 02755 (octal) = 1517 (decimal) — setgid + rwxr-xr-x
130+
metadata = new FileEntryMetadata { Mode = 1517 };
131+
Assert.False(metadata.IsSetUid);
132+
Assert.True(metadata.IsSetGid);
133+
Assert.True(metadata.IsExecutable);
134+
}
135+
136+
[Fact]
137+
public void FileEntry_MetadataDefaultsToNull()
138+
{
139+
using var stream = new MemoryStream(new byte[] { 0 });
140+
var entry = new FileEntry("test.txt", stream);
141+
Assert.Null(entry.Metadata);
142+
}
143+
}

RecursiveExtractor/ArFile.cs

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,10 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract
8383
// The name length is included in the total size reported in the header
8484
CopyStreamBytes(fileEntry.Content, entryStream, size - nameLength);
8585

86-
yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff);
86+
yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff)
87+
{
88+
Metadata = ParseArMetadata(headerBuffer)
89+
};
8790
}
8891
}
8992
else if (filename.Equals('/'))
@@ -149,7 +152,10 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract
149152

150153
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
151154
CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
152-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
155+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
156+
{
157+
Metadata = ParseArMetadata(headerBuffer)
158+
};
153159
}
154160
}
155161
fileEntry.Content.Position = fileEntry.Content.Length - 1;
@@ -220,7 +226,10 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract
220226

221227
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
222228
CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
223-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
229+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
230+
{
231+
Metadata = ParseArMetadata(headerBuffer)
232+
};
224233
}
225234
}
226235
fileEntry.Content.Position = fileEntry.Content.Length - 1;
@@ -241,14 +250,20 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract
241250
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
242251
CopyStreamBytes(fileEntry.Content, entryStream, size);
243252

244-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true); ;
253+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
254+
{
255+
Metadata = ParseArMetadata(headerBuffer)
256+
};
245257
}
246258
else
247259
{
248260
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
249261
CopyStreamBytes(fileEntry.Content, entryStream, size);
250262

251-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
263+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
264+
{
265+
Metadata = ParseArMetadata(headerBuffer)
266+
};
252267
}
253268
}
254269
else
@@ -329,7 +344,10 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi
329344
// The name length is included in the total size reported in the header
330345
await CopyStreamBytesAsync(fileEntry.Content, entryStream, size - nameLength).ConfigureAwait(false);
331346

332-
yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true);
347+
yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true)
348+
{
349+
Metadata = ParseArMetadata(headerBuffer)
350+
};
333351
}
334352
}
335353
else if (filename.Equals('/'))
@@ -394,7 +412,10 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi
394412
}
395413
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
396414
await CopyStreamBytesAsync(fileEntry.Content, entryStream, innerSize).ConfigureAwait(false);
397-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
415+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
416+
{
417+
Metadata = ParseArMetadata(headerBuffer)
418+
};
398419
}
399420
}
400421
fileEntry.Content.Position = fileEntry.Content.Length - 1;
@@ -465,7 +486,10 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi
465486

466487
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
467488
await CopyStreamBytesAsync(fileEntry.Content, entryStream, innerSize).ConfigureAwait(false);
468-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
489+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
490+
{
491+
Metadata = ParseArMetadata(headerBuffer)
492+
};
469493
}
470494
}
471495
fileEntry.Content.Position = fileEntry.Content.Length - 1;
@@ -485,13 +509,19 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi
485509
}
486510
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
487511
CopyStreamBytes(fileEntry.Content, entryStream, size);
488-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
512+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
513+
{
514+
Metadata = ParseArMetadata(headerBuffer)
515+
};
489516
}
490517
else
491518
{
492519
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
493520
await CopyStreamBytesAsync(fileEntry.Content, entryStream, size).ConfigureAwait(false);
494-
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
521+
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
522+
{
523+
Metadata = ParseArMetadata(headerBuffer)
524+
};
495525
}
496526
}
497527
else
@@ -570,6 +600,46 @@ internal static async Task<long> CopyStreamBytesAsync(Stream input, Stream outpu
570600

571601
private const int bufferSize = 4096;
572602

603+
/// <summary>
604+
/// Parse file metadata (UID, GID, mode) from an ar file header buffer.
605+
/// </summary>
606+
/// <param name="headerBuffer">The 60-byte ar header</param>
607+
/// <returns>A <see cref="FileEntryMetadata"/> with parsed values, or null if parsing fails.</returns>
608+
internal static FileEntryMetadata? ParseArMetadata(byte[] headerBuffer)
609+
{
610+
var metadata = new FileEntryMetadata();
611+
var hasData = false;
612+
613+
// ar_uid: bytes 28-33 (6 bytes), decimal
614+
if (int.TryParse(Encoding.ASCII.GetString(headerBuffer[28..34]).Trim(), out var uid))
615+
{
616+
metadata.Uid = uid;
617+
hasData = true;
618+
}
619+
620+
// ar_gid: bytes 34-39 (6 bytes), decimal
621+
if (int.TryParse(Encoding.ASCII.GetString(headerBuffer[34..40]).Trim(), out var gid))
622+
{
623+
metadata.Gid = gid;
624+
hasData = true;
625+
}
626+
627+
// ar_mode: bytes 40-47 (8 bytes), octal
628+
var modeString = Encoding.ASCII.GetString(headerBuffer[40..48]).Trim();
629+
try
630+
{
631+
if (!string.IsNullOrEmpty(modeString))
632+
{
633+
metadata.Mode = Convert.ToInt64(modeString, 8);
634+
hasData = true;
635+
}
636+
}
637+
catch (FormatException) { }
638+
catch (OverflowException) { }
639+
640+
return hasData ? metadata : null;
641+
}
642+
573643
private readonly static NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
574644
}
575645
}

RecursiveExtractor/Extractors/RarExtractor.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, Extra
108108
var newFileEntry = await FileEntry.FromStreamAsync(name, entry.OpenEntryStream(), fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false);
109109
if (newFileEntry != null)
110110
{
111+
try
112+
{
113+
if (entry.Attrib.HasValue)
114+
{
115+
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
116+
}
117+
}
118+
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.RAR, fileEntry.FullPath, e.Message); }
111119
if (options.Recurse || topLevel)
112120
{
113121
await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false))
@@ -158,6 +166,14 @@ public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions opti
158166
}
159167
if (newFileEntry != null)
160168
{
169+
try
170+
{
171+
if (entry.Attrib.HasValue)
172+
{
173+
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
174+
}
175+
}
176+
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.RAR, fileEntry.FullPath, e.Message); }
161177
if (options.Recurse || topLevel)
162178
{
163179
foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false))

RecursiveExtractor/Extractors/SevenZipExtractor.cs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, Extra
4646

4747
if (newFileEntry != null)
4848
{
49+
try
50+
{
51+
if (entry.Attrib.HasValue)
52+
{
53+
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
54+
}
55+
}
56+
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.P7ZIP, fileEntry.FullPath, e.Message); }
4957
if (options.Recurse || topLevel)
5058
{
5159
await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false))
@@ -157,6 +165,15 @@ public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions opti
157165
var name = (entry.Key ?? string.Empty).Replace('/', Path.DirectorySeparatorChar);
158166
var newFileEntry = new FileEntry(name, entry.OpenEntryStream(), fileEntry, createTime: entry.CreatedTime, modifyTime: entry.LastModifiedTime, accessTime: entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff);
159167

168+
try
169+
{
170+
if (entry.Attrib.HasValue)
171+
{
172+
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
173+
}
174+
}
175+
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.P7ZIP, fileEntry.FullPath, e.Message); }
176+
160177
if (options.Recurse || topLevel)
161178
{
162179
foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false))

RecursiveExtractor/Extractors/TarExtractor.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,10 @@ public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, Extra
7575
name = name[2..];
7676
}
7777

78-
var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff);
78+
var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff)
79+
{
80+
Metadata = new FileEntryMetadata { Mode = tarEntry.Mode, Uid = tarEntry.UserID, Gid = tarEntry.GroupId }
81+
};
7982

8083
if (options.Recurse || topLevel)
8184
{
@@ -144,7 +147,10 @@ public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions opti
144147
{
145148
name = name[2..];
146149
}
147-
var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff);
150+
var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff)
151+
{
152+
Metadata = new FileEntryMetadata { Mode = tarEntry.Mode, Uid = tarEntry.UserID, Gid = tarEntry.GroupId }
153+
};
148154

149155
if (options.Recurse || topLevel)
150156
{

0 commit comments

Comments
 (0)