Skip to content

Commit 934a575

Browse files
committed
ext4: implement extended attribute reading
Add GetXattr(path) public method that reads both inline (ibody) and external block xattrs. Support all standard name index prefixes (user, trusted, security, system, posix_acl). Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
1 parent 80ac6f8 commit 934a575

3 files changed

Lines changed: 628 additions & 21 deletions

File tree

filesystem/ext4/ext4.go

Lines changed: 157 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1630,29 +1630,11 @@ func (fs *FileSystem) readInode(inodeNumber uint32) (*inode, error) {
16301630
if inodeNumber == 0 {
16311631
return nil, fmt.Errorf("cannot read inode 0")
16321632
}
1633-
sb := fs.superblock
1634-
inodeSize := sb.inodeSize
1635-
inodesPerGroup := sb.inodesPerGroup
1636-
// figure out which block group the inode is on
1637-
bg := (inodeNumber - 1) / inodesPerGroup
1638-
// read the group descriptor to find out the location of the inode table
1639-
gd := fs.groupDescriptors.descriptors[bg]
1640-
inodeTableBlock := gd.inodeTableLocation
1641-
inodeBytes := make([]byte, inodeSize)
1642-
// bytesStart is beginning byte for the inodeTableBlock
1643-
byteStart := inodeTableBlock * uint64(sb.blockSize)
1644-
// offsetInode is how many inodes in our inode is
1645-
offsetInode := (inodeNumber - 1) % inodesPerGroup
1646-
// offset is how many bytes in our inode is
1647-
offset := offsetInode * uint32(inodeSize)
1648-
read, err := fs.backend.ReadAt(inodeBytes, int64(byteStart)+int64(offset))
1633+
inodeBytes, err := fs.readInodeRaw(inodeNumber)
16491634
if err != nil {
1650-
return nil, fmt.Errorf("failed to read inode %d from offset %d of block %d from block group %d: %v", inodeNumber, offset, inodeTableBlock, bg, err)
1651-
}
1652-
if read != int(inodeSize) {
1653-
return nil, fmt.Errorf("read %d bytes for inode %d instead of inode size of %d", read, inodeNumber, inodeSize)
1635+
return nil, fmt.Errorf("could not read inode %d: %w", inodeNumber, err)
16541636
}
1655-
inode, err := inodeFromBytes(inodeBytes, sb, inodeNumber)
1637+
inode, err := inodeFromBytes(inodeBytes, fs.superblock, inodeNumber)
16561638
if err != nil {
16571639
return nil, fmt.Errorf("could not interpret inode data: %v", err)
16581640
}
@@ -3393,3 +3375,157 @@ func validatePath(name string) error {
33933375
}
33943376
return nil
33953377
}
3378+
3379+
// GetXattr reads extended attributes for the file at path p.
3380+
//
3381+
// Extended attributes are stored either inline in the inode (ibody) or in a
3382+
// separate block referenced by inode.i_file_acl. This method reads both locations
3383+
// and merges the results, with ibody xattrs taking precedence.
3384+
//
3385+
// References:
3386+
// - Kernel source: https://github.com/torvalds/linux/blob/master/fs/ext4/xattr.c (ext4_xattr_get)
3387+
// - Disk layout: https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#extended-attributes
3388+
func (fs *FileSystem) GetXattr(p string) (map[string][]byte, error) {
3389+
_, entry, err := fs.getEntryAndParent(p)
3390+
if err != nil {
3391+
return nil, err
3392+
}
3393+
if entry == nil {
3394+
return nil, fmt.Errorf("file does not exist: %s", p)
3395+
}
3396+
inodeBytes, err := fs.readInodeRaw(entry.inode)
3397+
if err != nil {
3398+
return nil, fmt.Errorf("could not read inode %d: %w", entry.inode, err)
3399+
}
3400+
in, err := inodeFromBytes(inodeBytes, fs.superblock, entry.inode)
3401+
if err != nil {
3402+
return nil, fmt.Errorf("could not interpret inode data: %w", err)
3403+
}
3404+
return fs.readXattrs(in, inodeBytes)
3405+
}
3406+
3407+
// readXattrs reads all extended attributes from an inode.
3408+
//
3409+
// Extended attributes can be stored in two locations:
3410+
// 1. Inline in the inode body (ibody) - stored after i_extra_isize
3411+
// 2. In a dedicated block referenced by inode.i_file_acl
3412+
//
3413+
// Both locations are read and merged. The same key should not exist in both
3414+
// locations, but if it does, the ibody value is kept.
3415+
func (fs *FileSystem) readXattrs(in *inode, inodeBytes []byte) (map[string][]byte, error) {
3416+
result := make(map[string][]byte)
3417+
3418+
ibodyXattrs, err := fs.readIbodyXattrs(inodeBytes)
3419+
if err != nil {
3420+
return nil, fmt.Errorf("error reading ibody xattrs: %w", err)
3421+
}
3422+
for k, v := range ibodyXattrs {
3423+
result[k] = v
3424+
}
3425+
3426+
if in.extendedAttributeBlock != 0 {
3427+
blockXattrs, err := fs.readBlockXattrs(in.extendedAttributeBlock)
3428+
if err != nil {
3429+
return nil, fmt.Errorf("error reading xattr block: %w", err)
3430+
}
3431+
for k, v := range blockXattrs {
3432+
if _, exists := result[k]; !exists {
3433+
result[k] = v
3434+
}
3435+
}
3436+
}
3437+
3438+
return result, nil
3439+
}
3440+
3441+
// readIbodyXattrs reads extended attributes stored inline in the inode.
3442+
//
3443+
// For inodes larger than 128 bytes, the extra space after i_extra_isize can be
3444+
// used to store extended attributes. The layout is:
3445+
//
3446+
// [128-byte base inode][i_extra_isize bytes][xattr magic][xattr entries]
3447+
//
3448+
// The inline storage is indicated by the ext4_xattr_ibody_header magic number
3449+
// (0xEA020000). If not present, the inode has no inline xattrs.
3450+
//
3451+
// Reference: ext4_xattr_ibody_find in fs/ext4/xattr.c
3452+
func (fs *FileSystem) readIbodyXattrs(inodeBytes []byte) (map[string][]byte, error) {
3453+
sb := fs.superblock
3454+
if sb.inodeSize <= ext2InodeSize {
3455+
return nil, nil
3456+
}
3457+
3458+
if len(inodeBytes) < int(ext2InodeSize)+4 {
3459+
return nil, nil
3460+
}
3461+
extraIsize := binary.LittleEndian.Uint16(inodeBytes[ext2InodeSize : ext2InodeSize+2])
3462+
3463+
xattrStart := int(ext2InodeSize) + int(extraIsize)
3464+
xattrEnd := int(sb.inodeSize)
3465+
if xattrStart >= xattrEnd || xattrEnd-xattrStart < 4 {
3466+
return nil, nil
3467+
}
3468+
3469+
magic := binary.LittleEndian.Uint32(inodeBytes[xattrStart : xattrStart+4])
3470+
if magic != xattrMagic {
3471+
return nil, nil
3472+
}
3473+
3474+
data := inodeBytes[xattrStart+4 : xattrEnd]
3475+
return parseXattrEntries(data, data)
3476+
}
3477+
3478+
// readBlockXattrs reads extended attributes from a dedicated block.
3479+
//
3480+
// The block format is:
3481+
//
3482+
// [ext4_xattr_header (32 bytes)][xattr entries][xattr values]
3483+
//
3484+
// The ext4_xattr_header contains a magic number (0xEA020000), reference count,
3485+
// and checksums. Entries are stored in sorted order (by name_index, then name).
3486+
//
3487+
// Reference: ext4_xattr_block_find in fs/ext4/xattr.c
3488+
func (fs *FileSystem) readBlockXattrs(block uint64) (map[string][]byte, error) {
3489+
blockSize := int(fs.superblock.blockSize)
3490+
data := make([]byte, blockSize)
3491+
offset := int64(block) * int64(blockSize)
3492+
_, err := fs.backend.ReadAt(data, offset)
3493+
if err != nil {
3494+
return nil, fmt.Errorf("could not read xattr block at %d: %w", block, err)
3495+
}
3496+
3497+
magic := binary.LittleEndian.Uint32(data[0:4])
3498+
if magic != xattrMagic {
3499+
return nil, fmt.Errorf("invalid xattr block magic: %x", magic)
3500+
}
3501+
3502+
entryData := data[xattrHeaderSize:]
3503+
return parseXattrEntries(entryData, data)
3504+
}
3505+
3506+
// readInodeRaw reads the raw bytes of an inode from disk.
3507+
//
3508+
// This is a helper function extracted from readInode to allow reading the full
3509+
// inode structure including the extended area (i_extra_isize) which may contain
3510+
// inline extended attributes.
3511+
func (fs *FileSystem) readInodeRaw(inodeNumber uint32) ([]byte, error) {
3512+
sb := fs.superblock
3513+
inodeSize := sb.inodeSize
3514+
inodesPerGroup := sb.inodesPerGroup
3515+
bg := (inodeNumber - 1) / inodesPerGroup
3516+
gd := fs.groupDescriptors.descriptors[bg]
3517+
inodeTableBlock := gd.inodeTableLocation
3518+
byteStart := inodeTableBlock * uint64(sb.blockSize)
3519+
offsetInode := (inodeNumber - 1) % inodesPerGroup
3520+
offset := offsetInode * uint32(inodeSize)
3521+
3522+
inodeBytes := make([]byte, inodeSize)
3523+
read, err := fs.backend.ReadAt(inodeBytes, int64(byteStart)+int64(offset))
3524+
if err != nil {
3525+
return nil, fmt.Errorf("failed to read inode %d from offset %d of block %d from block group %d: %w", inodeNumber, offset, inodeTableBlock, bg, err)
3526+
}
3527+
if read != int(inodeSize) {
3528+
return nil, fmt.Errorf("read %d bytes for inode %d instead of inode size of %d", read, inodeNumber, inodeSize)
3529+
}
3530+
return inodeBytes, nil
3531+
}

filesystem/ext4/xattr.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package ext4
2+
3+
import (
4+
"encoding/binary"
5+
"fmt"
6+
)
7+
8+
// Extended Attributes (xattrs) implementation.
9+
//
10+
// References:
11+
// - Kernel documentation: https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#extended-attributes
12+
// - Legacy wiki: https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Extended_Attributes
13+
// - Kernel source (structures): https://github.com/torvalds/linux/blob/master/fs/ext4/xattr.h
14+
// - Kernel source (implementation): https://github.com/torvalds/linux/blob/master/fs/ext4/xattr.c
15+
// - e2fsprogs implementation: https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/ext2fs/ext_attr.c
16+
17+
const (
18+
// xattrMagic is the magic number identifying extended attribute blocks.
19+
// See ext4_xattr_header in fs/ext4/xattr.h
20+
xattrMagic = 0xEA020000
21+
22+
// xattrHeaderSize is the size of ext4_xattr_header (32 bytes).
23+
xattrHeaderSize = 32
24+
25+
// xattrEntrySize is the fixed size of ext4_xattr_entry before the variable-length name.
26+
// See struct ext4_xattr_entry in fs/ext4/xattr.h
27+
xattrEntrySize = 16
28+
29+
// Attribute name index values from fs/ext4/xattr.h
30+
xattrIndexUser = 1 // EXT4_XATTR_INDEX_USER
31+
xattrIndexPosixACLAccess = 2 // EXT4_XATTR_INDEX_POSIX_ACL_ACCESS
32+
xattrIndexPosixACLDefault = 3 // EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT
33+
xattrIndexTrusted = 4 // EXT4_XATTR_INDEX_TRUSTED
34+
xattrIndexSecurity = 6 // EXT4_XATTR_INDEX_SECURITY
35+
xattrIndexSystem = 7 // EXT4_XATTR_INDEX_SYSTEM
36+
)
37+
38+
// xattrPrefixes maps name index values to their corresponding key prefixes.
39+
// This reduces on-disk space consumption by storing only the index instead of
40+
// the full prefix string.
41+
//
42+
// POSIX ACL entries have no trailing dot because their e_name_len is always 0;
43+
// the full attribute name is exactly the prefix (e.g. "system.posix_acl_access").
44+
//
45+
// See ext4_xattr_prefix_type in fs/ext4/xattr.h
46+
var xattrPrefixes = map[uint8]string{
47+
0: "",
48+
xattrIndexUser: "user.",
49+
xattrIndexPosixACLAccess: "system.posix_acl_access",
50+
xattrIndexPosixACLDefault: "system.posix_acl_default",
51+
xattrIndexTrusted: "trusted.",
52+
xattrIndexSecurity: "security.",
53+
xattrIndexSystem: "system.",
54+
}
55+
56+
// parseXattrEntries parses extended attribute entries from a byte slice.
57+
//
58+
// entries contains the ext4_xattr_entry structures; values is the region from which
59+
// e_value_offs is relative. For inline (ibody) xattrs, both point to the same region.
60+
// For block xattrs, entries points to the region after the header, and values points
61+
// to the entire block (including header).
62+
//
63+
// The on-disk format is defined in struct ext4_xattr_entry in fs/ext4/xattr.h:
64+
//
65+
// struct ext4_xattr_entry {
66+
// __u8 e_name_len; /* length of name */
67+
// __u8 e_name_index; /* attribute name index */
68+
// __le16 e_value_offs; /* offset in disk block of value */
69+
// __le32 e_value_inum; /* inode in which the value is stored */
70+
// __le32 e_value_size; /* size of attribute value */
71+
// __le32 e_hash; /* hash value of name and value */
72+
// char e_name[]; /* attribute name */
73+
// };
74+
func parseXattrEntries(entries, values []byte) (map[string][]byte, error) {
75+
result := make(map[string][]byte)
76+
pos := 0
77+
for pos+xattrEntrySize <= len(entries) {
78+
nameLen := entries[pos]
79+
nameIndex := entries[pos+1]
80+
// The entry list is terminated by a zero-filled entry (e_name_len == 0
81+
// and e_name_index == 0). See EXT4_IS_LAST_ENTRY in fs/ext4/xattr.h.
82+
// POSIX ACL entries have e_name_len == 0 but e_name_index != 0.
83+
if nameLen == 0 && nameIndex == 0 {
84+
break
85+
}
86+
valueOffs := binary.LittleEndian.Uint16(entries[pos+2 : pos+4])
87+
valueInum := binary.LittleEndian.Uint32(entries[pos+4 : pos+8])
88+
valueSize := binary.LittleEndian.Uint32(entries[pos+8 : pos+12])
89+
90+
nameStart := pos + xattrEntrySize
91+
nameEnd := nameStart + int(nameLen)
92+
if nameEnd > len(entries) {
93+
return nil, fmt.Errorf("xattr entry name extends past buffer")
94+
}
95+
96+
prefix, ok := xattrPrefixes[nameIndex]
97+
if !ok {
98+
prefix = fmt.Sprintf("unknown_%d.", nameIndex)
99+
}
100+
fullName := prefix + string(entries[nameStart:nameEnd])
101+
102+
if valueInum != 0 {
103+
return nil, fmt.Errorf("xattr %q: ea_inode values not supported", fullName)
104+
}
105+
if valueSize > 0 {
106+
vStart := int(valueOffs)
107+
vEnd := vStart + int(valueSize)
108+
if vEnd > len(values) {
109+
return nil, fmt.Errorf("xattr value for %q extends past buffer", fullName)
110+
}
111+
val := make([]byte, valueSize)
112+
copy(val, values[vStart:vEnd])
113+
result[fullName] = val
114+
}
115+
116+
// Advance to next entry, aligned to 4 bytes.
117+
pos = (nameEnd + 3) &^ 3
118+
}
119+
return result, nil
120+
}

0 commit comments

Comments
 (0)