Skip to content

Commit c836c0d

Browse files
committed
ext4: Implement extended attribute reading
Add GetXattr(path) public method that reads both inline (ibody) and external block xattrs. Support all standard name index prefixes (user, trusted, security, system, posix_acl). Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
1 parent 5880e7f commit c836c0d

3 files changed

Lines changed: 628 additions & 21 deletions

File tree

filesystem/ext4/ext4.go

Lines changed: 157 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,29 +1633,11 @@ func (fs *FileSystem) readInode(inodeNumber uint32) (*inode, error) {
16331633
if inodeNumber == 0 {
16341634
return nil, fmt.Errorf("cannot read inode 0")
16351635
}
1636-
sb := fs.superblock
1637-
inodeSize := sb.inodeSize
1638-
inodesPerGroup := sb.inodesPerGroup
1639-
// figure out which block group the inode is on
1640-
bg := (inodeNumber - 1) / inodesPerGroup
1641-
// read the group descriptor to find out the location of the inode table
1642-
gd := fs.groupDescriptors.descriptors[bg]
1643-
inodeTableBlock := gd.inodeTableLocation
1644-
inodeBytes := make([]byte, inodeSize)
1645-
// bytesStart is beginning byte for the inodeTableBlock
1646-
byteStart := inodeTableBlock * uint64(sb.blockSize)
1647-
// offsetInode is how many inodes in our inode is
1648-
offsetInode := (inodeNumber - 1) % inodesPerGroup
1649-
// offset is how many bytes in our inode is
1650-
offset := offsetInode * uint32(inodeSize)
1651-
read, err := fs.backend.ReadAt(inodeBytes, int64(byteStart)+int64(offset))
1636+
inodeBytes, err := fs.readInodeRaw(inodeNumber)
16521637
if err != nil {
1653-
return nil, fmt.Errorf("failed to read inode %d from offset %d of block %d from block group %d: %v", inodeNumber, offset, inodeTableBlock, bg, err)
1654-
}
1655-
if read != int(inodeSize) {
1656-
return nil, fmt.Errorf("read %d bytes for inode %d instead of inode size of %d", read, inodeNumber, inodeSize)
1638+
return nil, fmt.Errorf("could not read inode %d: %w", inodeNumber, err)
16571639
}
1658-
inode, err := inodeFromBytes(inodeBytes, sb, inodeNumber)
1640+
inode, err := inodeFromBytes(inodeBytes, fs.superblock, inodeNumber)
16591641
if err != nil {
16601642
return nil, fmt.Errorf("could not interpret inode data: %v", err)
16611643
}
@@ -3396,3 +3378,157 @@ func validatePath(name string) error {
33963378
}
33973379
return nil
33983380
}
3381+
3382+
// GetXattr reads extended attributes for the file at path p.
3383+
//
3384+
// Extended attributes are stored either inline in the inode (ibody) or in a
3385+
// separate block referenced by inode.i_file_acl. This method reads both locations
3386+
// and merges the results, with ibody xattrs taking precedence.
3387+
//
3388+
// References:
3389+
// - Kernel source: https://github.com/torvalds/linux/blob/master/fs/ext4/xattr.c (ext4_xattr_get)
3390+
// - Disk layout: https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#extended-attributes
3391+
func (fs *FileSystem) GetXattr(p string) (map[string][]byte, error) {
3392+
_, entry, err := fs.getEntryAndParent(p)
3393+
if err != nil {
3394+
return nil, err
3395+
}
3396+
if entry == nil {
3397+
return nil, fmt.Errorf("file does not exist: %s", p)
3398+
}
3399+
inodeBytes, err := fs.readInodeRaw(entry.inode)
3400+
if err != nil {
3401+
return nil, fmt.Errorf("could not read inode %d: %w", entry.inode, err)
3402+
}
3403+
in, err := inodeFromBytes(inodeBytes, fs.superblock, entry.inode)
3404+
if err != nil {
3405+
return nil, fmt.Errorf("could not interpret inode data: %w", err)
3406+
}
3407+
return fs.readXattrs(in, inodeBytes)
3408+
}
3409+
3410+
// readXattrs reads all extended attributes from an inode.
3411+
//
3412+
// Extended attributes can be stored in two locations:
3413+
// 1. Inline in the inode body (ibody) - stored after i_extra_isize
3414+
// 2. In a dedicated block referenced by inode.i_file_acl
3415+
//
3416+
// Both locations are read and merged. The same key should not exist in both
3417+
// locations, but if it does, the ibody value is kept.
3418+
func (fs *FileSystem) readXattrs(in *inode, inodeBytes []byte) (map[string][]byte, error) {
3419+
result := make(map[string][]byte)
3420+
3421+
ibodyXattrs, err := fs.readIbodyXattrs(inodeBytes)
3422+
if err != nil {
3423+
return nil, fmt.Errorf("error reading ibody xattrs: %w", err)
3424+
}
3425+
for k, v := range ibodyXattrs {
3426+
result[k] = v
3427+
}
3428+
3429+
if in.extendedAttributeBlock != 0 {
3430+
blockXattrs, err := fs.readBlockXattrs(in.extendedAttributeBlock)
3431+
if err != nil {
3432+
return nil, fmt.Errorf("error reading xattr block: %w", err)
3433+
}
3434+
for k, v := range blockXattrs {
3435+
if _, exists := result[k]; !exists {
3436+
result[k] = v
3437+
}
3438+
}
3439+
}
3440+
3441+
return result, nil
3442+
}
3443+
3444+
// readIbodyXattrs reads extended attributes stored inline in the inode.
3445+
//
3446+
// For inodes larger than 128 bytes, the extra space after i_extra_isize can be
3447+
// used to store extended attributes. The layout is:
3448+
//
3449+
// [128-byte base inode][i_extra_isize bytes][xattr magic][xattr entries]
3450+
//
3451+
// The inline storage is indicated by the ext4_xattr_ibody_header magic number
3452+
// (0xEA020000). If not present, the inode has no inline xattrs.
3453+
//
3454+
// Reference: ext4_xattr_ibody_find in fs/ext4/xattr.c
3455+
func (fs *FileSystem) readIbodyXattrs(inodeBytes []byte) (map[string][]byte, error) {
3456+
sb := fs.superblock
3457+
if sb.inodeSize <= ext2InodeSize {
3458+
return nil, nil
3459+
}
3460+
3461+
if len(inodeBytes) < int(ext2InodeSize)+4 {
3462+
return nil, nil
3463+
}
3464+
extraIsize := binary.LittleEndian.Uint16(inodeBytes[ext2InodeSize : ext2InodeSize+2])
3465+
3466+
xattrStart := int(ext2InodeSize) + int(extraIsize)
3467+
xattrEnd := int(sb.inodeSize)
3468+
if xattrStart >= xattrEnd || xattrEnd-xattrStart < 4 {
3469+
return nil, nil
3470+
}
3471+
3472+
magic := binary.LittleEndian.Uint32(inodeBytes[xattrStart : xattrStart+4])
3473+
if magic != xattrMagic {
3474+
return nil, nil
3475+
}
3476+
3477+
data := inodeBytes[xattrStart+4 : xattrEnd]
3478+
return parseXattrEntries(data, data)
3479+
}
3480+
3481+
// readBlockXattrs reads extended attributes from a dedicated block.
3482+
//
3483+
// The block format is:
3484+
//
3485+
// [ext4_xattr_header (32 bytes)][xattr entries][xattr values]
3486+
//
3487+
// The ext4_xattr_header contains a magic number (0xEA020000), reference count,
3488+
// and checksums. Entries are stored in sorted order (by name_index, then name).
3489+
//
3490+
// Reference: ext4_xattr_block_find in fs/ext4/xattr.c
3491+
func (fs *FileSystem) readBlockXattrs(block uint64) (map[string][]byte, error) {
3492+
blockSize := int(fs.superblock.blockSize)
3493+
data := make([]byte, blockSize)
3494+
offset := int64(block) * int64(blockSize)
3495+
_, err := fs.backend.ReadAt(data, offset)
3496+
if err != nil {
3497+
return nil, fmt.Errorf("could not read xattr block at %d: %w", block, err)
3498+
}
3499+
3500+
magic := binary.LittleEndian.Uint32(data[0:4])
3501+
if magic != xattrMagic {
3502+
return nil, fmt.Errorf("invalid xattr block magic: %x", magic)
3503+
}
3504+
3505+
entryData := data[xattrHeaderSize:]
3506+
return parseXattrEntries(entryData, data)
3507+
}
3508+
3509+
// readInodeRaw reads the raw bytes of an inode from disk.
3510+
//
3511+
// This is a helper function extracted from readInode to allow reading the full
3512+
// inode structure including the extended area (i_extra_isize) which may contain
3513+
// inline extended attributes.
3514+
func (fs *FileSystem) readInodeRaw(inodeNumber uint32) ([]byte, error) {
3515+
sb := fs.superblock
3516+
inodeSize := sb.inodeSize
3517+
inodesPerGroup := sb.inodesPerGroup
3518+
bg := (inodeNumber - 1) / inodesPerGroup
3519+
gd := fs.groupDescriptors.descriptors[bg]
3520+
inodeTableBlock := gd.inodeTableLocation
3521+
byteStart := inodeTableBlock * uint64(sb.blockSize)
3522+
offsetInode := (inodeNumber - 1) % inodesPerGroup
3523+
offset := offsetInode * uint32(inodeSize)
3524+
3525+
inodeBytes := make([]byte, inodeSize)
3526+
read, err := fs.backend.ReadAt(inodeBytes, int64(byteStart)+int64(offset))
3527+
if err != nil {
3528+
return nil, fmt.Errorf("failed to read inode %d from offset %d of block %d from block group %d: %w", inodeNumber, offset, inodeTableBlock, bg, err)
3529+
}
3530+
if read != int(inodeSize) {
3531+
return nil, fmt.Errorf("read %d bytes for inode %d instead of inode size of %d", read, inodeNumber, inodeSize)
3532+
}
3533+
return inodeBytes, nil
3534+
}

filesystem/ext4/xattr.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package ext4
2+
3+
import (
4+
"encoding/binary"
5+
"fmt"
6+
)
7+
8+
// Extended Attributes (xattrs) implementation.
9+
//
10+
// References:
11+
// - Kernel documentation: https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#extended-attributes
12+
// - Legacy wiki: https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Extended_Attributes
13+
// - Kernel source (structures): https://github.com/torvalds/linux/blob/master/fs/ext4/xattr.h
14+
// - Kernel source (implementation): https://github.com/torvalds/linux/blob/master/fs/ext4/xattr.c
15+
// - e2fsprogs implementation: https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/ext2fs/ext_attr.c
16+
17+
const (
18+
// xattrMagic is the magic number identifying extended attribute blocks.
19+
// See ext4_xattr_header in fs/ext4/xattr.h
20+
xattrMagic = 0xEA020000
21+
22+
// xattrHeaderSize is the size of ext4_xattr_header (32 bytes).
23+
xattrHeaderSize = 32
24+
25+
// xattrEntrySize is the fixed size of ext4_xattr_entry before the variable-length name.
26+
// See struct ext4_xattr_entry in fs/ext4/xattr.h
27+
xattrEntrySize = 16
28+
29+
// Attribute name index values from fs/ext4/xattr.h
30+
xattrIndexUser = 1 // EXT4_XATTR_INDEX_USER
31+
xattrIndexPosixACLAccess = 2 // EXT4_XATTR_INDEX_POSIX_ACL_ACCESS
32+
xattrIndexPosixACLDefault = 3 // EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT
33+
xattrIndexTrusted = 4 // EXT4_XATTR_INDEX_TRUSTED
34+
xattrIndexSecurity = 6 // EXT4_XATTR_INDEX_SECURITY
35+
xattrIndexSystem = 7 // EXT4_XATTR_INDEX_SYSTEM
36+
)
37+
38+
// xattrPrefixes maps name index values to their corresponding key prefixes.
39+
// This reduces on-disk space consumption by storing only the index instead of
40+
// the full prefix string.
41+
//
42+
// POSIX ACL entries have no trailing dot because their e_name_len is always 0;
43+
// the full attribute name is exactly the prefix (e.g. "system.posix_acl_access").
44+
//
45+
// See ext4_xattr_prefix_type in fs/ext4/xattr.h
46+
var xattrPrefixes = map[uint8]string{
47+
0: "",
48+
xattrIndexUser: "user.",
49+
xattrIndexPosixACLAccess: "system.posix_acl_access",
50+
xattrIndexPosixACLDefault: "system.posix_acl_default",
51+
xattrIndexTrusted: "trusted.",
52+
xattrIndexSecurity: "security.",
53+
xattrIndexSystem: "system.",
54+
}
55+
56+
// parseXattrEntries parses extended attribute entries from a byte slice.
57+
//
58+
// entries contains the ext4_xattr_entry structures; values is the region from which
59+
// e_value_offs is relative. For inline (ibody) xattrs, both point to the same region.
60+
// For block xattrs, entries points to the region after the header, and values points
61+
// to the entire block (including header).
62+
//
63+
// The on-disk format is defined in struct ext4_xattr_entry in fs/ext4/xattr.h:
64+
//
65+
// struct ext4_xattr_entry {
66+
// __u8 e_name_len; /* length of name */
67+
// __u8 e_name_index; /* attribute name index */
68+
// __le16 e_value_offs; /* offset in disk block of value */
69+
// __le32 e_value_inum; /* inode in which the value is stored */
70+
// __le32 e_value_size; /* size of attribute value */
71+
// __le32 e_hash; /* hash value of name and value */
72+
// char e_name[]; /* attribute name */
73+
// };
74+
func parseXattrEntries(entries, values []byte) (map[string][]byte, error) {
75+
result := make(map[string][]byte)
76+
pos := 0
77+
for pos+xattrEntrySize <= len(entries) {
78+
nameLen := entries[pos]
79+
nameIndex := entries[pos+1]
80+
// The entry list is terminated by a zero-filled entry (e_name_len == 0
81+
// and e_name_index == 0). See EXT4_IS_LAST_ENTRY in fs/ext4/xattr.h.
82+
// POSIX ACL entries have e_name_len == 0 but e_name_index != 0.
83+
if nameLen == 0 && nameIndex == 0 {
84+
break
85+
}
86+
valueOffs := binary.LittleEndian.Uint16(entries[pos+2 : pos+4])
87+
valueInum := binary.LittleEndian.Uint32(entries[pos+4 : pos+8])
88+
valueSize := binary.LittleEndian.Uint32(entries[pos+8 : pos+12])
89+
90+
nameStart := pos + xattrEntrySize
91+
nameEnd := nameStart + int(nameLen)
92+
if nameEnd > len(entries) {
93+
return nil, fmt.Errorf("xattr entry name extends past buffer")
94+
}
95+
96+
prefix, ok := xattrPrefixes[nameIndex]
97+
if !ok {
98+
prefix = fmt.Sprintf("unknown_%d.", nameIndex)
99+
}
100+
fullName := prefix + string(entries[nameStart:nameEnd])
101+
102+
if valueInum != 0 {
103+
return nil, fmt.Errorf("xattr %q: ea_inode values not supported", fullName)
104+
}
105+
if valueSize > 0 {
106+
vStart := int(valueOffs)
107+
vEnd := vStart + int(valueSize)
108+
if vEnd > len(values) {
109+
return nil, fmt.Errorf("xattr value for %q extends past buffer", fullName)
110+
}
111+
val := make([]byte, valueSize)
112+
copy(val, values[vStart:vEnd])
113+
result[fullName] = val
114+
}
115+
116+
// Advance to next entry, aligned to 4 bytes.
117+
pos = (nameEnd + 3) &^ 3
118+
}
119+
return result, nil
120+
}

0 commit comments

Comments
 (0)