Skip to content

Commit e3a057a

Browse files
committed
Use APIC IDs instead of logical CPU numbers
Validate CPU specifications against physical APIC IDs read from /sys/devices/system/cpu/*/topology/apic_id rather than logical CPU count. This allows specifying CPUs on multi-socket systems where APIC IDs are non-contiguous (e.g., socket 1 uses APIC IDs 128+). Signed-off-by: Cong Wang <cwang@multikernel.io>
1 parent c9aa47e commit e3a057a

3 files changed

Lines changed: 82 additions & 47 deletions

File tree

src/kerf/create/main.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def _allocate_spread(
137137
numa_cpu_lists[numa_node_id] = sorted(numa_cpus)
138138

139139
if not numa_cpu_lists:
140-
raise ResourceError(f"No available CPUs in specified NUMA nodes: {numa_nodes}")
140+
raise ResourceError(f"No available APIC IDs in specified NUMA nodes: {numa_nodes}")
141141

142142
allocated = []
143143
numa_indices = {node_id: 0 for node_id in numa_cpu_lists}
@@ -187,7 +187,7 @@ def _allocate_local(
187187
if len(numa_cpus) >= count:
188188
return sorted(numa_cpus[:count])
189189
raise ResourceError(
190-
f"Not enough CPUs in NUMA node {numa_node_id}: "
190+
f"Not enough APIC IDs in NUMA node {numa_node_id}: "
191191
f"requested {count}, but only {len(numa_cpus)} available"
192192
)
193193

@@ -200,7 +200,7 @@ def _allocate_local(
200200
if len(numa_cpus) >= count:
201201
return sorted(numa_cpus[:count])
202202

203-
raise ResourceError(f"No single NUMA node has {count} available CPUs for 'local' affinity")
203+
raise ResourceError(f"No single NUMA node has {count} available APIC IDs for 'local' affinity")
204204

205205

206206
def allocate_cpus_from_pool(
@@ -236,11 +236,11 @@ def allocate_cpus_from_pool(
236236
if len(available) < count:
237237
if numa_nodes:
238238
raise ResourceError(
239-
f"Not enough CPUs available in NUMA nodes {numa_nodes}: "
239+
f"Not enough APIC IDs available in NUMA nodes {numa_nodes}: "
240240
f"requested {count}, but only {len(available)} available"
241241
)
242242
raise ResourceError(
243-
f"Not enough CPUs available: requested {count}, "
243+
f"Not enough APIC IDs available: requested {count}, "
244244
f"but only {len(available)} available in pool"
245245
)
246246

@@ -399,8 +399,9 @@ def dump_overlay_for_debug(
399399
@click.option(
400400
"--cpus",
401401
"-c",
402-
help='Explicit CPU allocation: CPU IDs (e.g., "4" for CPU 4, "4-7" for range, '
403-
'"4,5,6,7" for list, "4-7,10-12" for mixed). Mutually exclusive with --cpu-count',
402+
help='Explicit APIC ID allocation (e.g., "128" for APIC ID 128, "128-134" for range, '
403+
'"128,130,132" for list). Use physical APIC IDs, not logical CPU numbers. '
404+
'Mutually exclusive with --cpu-count',
404405
)
405406
@click.option(
406407
"--cpu-count",
@@ -472,20 +473,20 @@ def create( # pylint: disable=too-many-arguments,too-many-positional-arguments
472473
473474
Examples:
474475
475-
# Create instance with CPUs 4-7 and 2GB memory (auto-assigned base)
476-
kerf create web-server --cpus=4-7 --memory=2GB
476+
# Create instance with APIC IDs 128-134 and 2GB memory (auto-assigned base)
477+
kerf create web-server --cpus=128-134 --memory=2GB
477478
478479
# Create instance with name after options
479-
kerf create --cpus=8-15 --memory=8GB web-server
480+
kerf create --cpus=128,130,132 --memory=8GB web-server
480481
481482
# Create instance with specific memory base address
482-
kerf create database --cpus=8-15 --memory=8GB --memory-base=0x100000000
483+
kerf create database --cpus=128-142 --memory=8GB --memory-base=0x100000000
483484
484485
# Create instance with devices
485-
kerf create compute --cpus=16-23 --memory=4GB --devices=enp9s0_dev
486+
kerf create compute --cpus=128-142 --memory=4GB --devices=enp9s0_dev
486487
487-
# Create instance with explicit single CPU
488-
kerf create web-server --cpus=4 --memory=2GB
488+
# Create instance with explicit single APIC ID
489+
kerf create web-server --cpus=128 --memory=2GB
489490
490491
# Create instance with auto-allocated CPU count
491492
kerf create web-server --cpu-count=4 --memory=2GB
@@ -539,7 +540,7 @@ def create( # pylint: disable=too-many-arguments,too-many-positional-arguments
539540
try:
540541
cpu_spec_value = parse_cpu_spec(cpus)
541542
except ValueError as e:
542-
click.echo(f"Error: Invalid CPU specification '{cpus}': {e}", err=True)
543+
click.echo(f"Error: Invalid APIC ID specification '{cpus}': {e}", err=True)
543544
sys.exit(2)
544545

545546
# Parse memory specification

src/kerf/init/main.py

Lines changed: 59 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,37 @@ def get_total_cpus_from_system() -> Optional[int]:
346346
return None
347347

348348

349+
def get_valid_apic_ids_from_system() -> Optional[set]:
350+
"""
351+
Get set of valid APIC IDs from the system via sysfs.
352+
Reads /sys/devices/system/cpu/cpuN/topology/apic_id for each CPU.
353+
Returns set of valid APIC IDs or None if not available.
354+
"""
355+
try:
356+
cpu_dir = Path('/sys/devices/system/cpu')
357+
if not cpu_dir.exists():
358+
return None
359+
360+
apic_ids = set()
361+
cpu_files = [f for f in cpu_dir.iterdir() if f.name.startswith('cpu') and f.name[3:].isdigit()]
362+
363+
for cpu_path in cpu_files:
364+
apic_id_file = cpu_path / 'topology' / 'apic_id'
365+
if apic_id_file.exists():
366+
try:
367+
with open(apic_id_file, 'r', encoding='utf-8') as f:
368+
apic_id = int(f.read().strip())
369+
apic_ids.add(apic_id)
370+
except (ValueError, IOError):
371+
pass
372+
373+
return apic_ids if apic_ids else None
374+
except (OSError, ValueError):
375+
pass
376+
377+
return None
378+
379+
349380
def build_baseline_from_cmdline(
350381
cpus: str,
351382
devices: Optional[str] = None,
@@ -371,27 +402,30 @@ def build_baseline_from_cmdline(
371402
except ValueError as e:
372403
raise ValueError(f"Invalid CPU specification '{cpus}': {e}") from e
373404

374-
system_total_cpus = get_total_cpus_from_system()
375-
if system_total_cpus is None:
376-
max_cpu = max(cpu_list) if cpu_list else 0
377-
total_cpus = max_cpu + 1
378-
if verbose:
379-
click.echo(f"Warning: Could not determine total CPUs from system, using max from specification: {total_cpus}", err=True)
380-
else:
381-
total_cpus = system_total_cpus
382-
max_specified = max(cpu_list) if cpu_list else -1
383-
if max_specified >= total_cpus:
384-
raise ValueError(
385-
f"CPU {max_specified} specified but system only has {total_cpus} CPUs (0-{total_cpus-1})"
386-
)
405+
# Validate against valid APIC IDs on the system
406+
valid_apic_ids = get_valid_apic_ids_from_system()
407+
if valid_apic_ids is None:
408+
raise KernelInterfaceError(
409+
"Could not read APIC IDs from /sys/devices/system/cpu/*/topology/apic_id. "
410+
"Ensure the system exposes CPU topology information."
411+
)
412+
413+
invalid_cpus = set(cpu_list) - valid_apic_ids
414+
if invalid_cpus:
415+
raise ValueError(
416+
f"Invalid APIC ID(s) specified: {sorted(invalid_cpus)}. "
417+
f"Valid APIC IDs on this system: {sorted(valid_apic_ids)}"
418+
)
387419

420+
# Total CPUs is based on the max APIC ID + 1 for sizing purposes
421+
total_cpus = max(valid_apic_ids) + 1
422+
# Host reserved are all valid APIC IDs not in the available list
388423
available_cpus = set(cpu_list)
389-
all_cpus = set(range(total_cpus))
390-
host_reserved_cpus = sorted(list(all_cpus - available_cpus))
424+
host_reserved_cpus = sorted(list(valid_apic_ids - available_cpus))
391425

392426
if 0 in available_cpus and len(host_reserved_cpus) == 0:
393427
if verbose:
394-
click.echo("Warning: CPU 0 is in available list but no host-reserved CPUs. Moving CPU 0 to host-reserved.", err=True)
428+
click.echo("Warning: APIC ID 0 is in available list but no host-reserved CPUs. Moving APIC ID 0 to host-reserved.", err=True)
395429
available_cpus.discard(0)
396430
host_reserved_cpus = [0]
397431
cpu_list = sorted(list(available_cpus))
@@ -407,10 +441,10 @@ def build_baseline_from_cmdline(
407441
total_bytes = memory_pool_base + memory_pool_bytes
408442
host_reserved_bytes = memory_pool_base
409443
if verbose:
410-
click.echo(f"Parsed CPU specification: {cpus}")
411-
click.echo(f" Total CPUs: {total_cpus}")
412-
click.echo(f" Host-reserved CPUs: {host_reserved_cpus}")
413-
click.echo(f" Available CPUs: {cpu_list}")
444+
click.echo(f"Parsed APIC ID specification: {cpus}")
445+
click.echo(f" Valid APIC IDs on system: {sorted(valid_apic_ids)}")
446+
click.echo(f" Host-reserved APIC IDs: {host_reserved_cpus}")
447+
click.echo(f" Available APIC IDs: {cpu_list}")
414448
click.echo("Memory pool from /proc/iomem:")
415449
click.echo(f" Base: {hex(memory_pool_base)}")
416450
click.echo(f" Size: {memory_pool_bytes} bytes ({memory_pool_bytes / (1024**3):.2f} GB)")
@@ -473,7 +507,7 @@ def build_baseline_from_cmdline(
473507
@click.command()
474508
@click.pass_context
475509
@click.option('--input', '-i', help='Input DTS or DTB file containing all resources. Mutually exclusive with --cpus and --devices. When used, all resources must come from the file.')
476-
@click.option('--cpus', '-c', help='CPU specification for baseline (e.g., "4-7" or "4,5,6,7"). Mutually exclusive with --input. Memory will be parsed from /proc/iomem.')
510+
@click.option('--cpus', '-c', help='APIC ID specification for baseline (e.g., "128-134" or "128,130,132"). Use physical APIC IDs, not logical CPU numbers. Mutually exclusive with --input. Memory will be parsed from /proc/iomem.')
477511
@click.option('--devices', '-d', help='Device names (comma-separated, e.g., "enp9s0_dev,nvme0"). Mutually exclusive with --input. Creates minimal device entries in baseline.')
478512
@click.option('--dry-run', is_flag=True, help='Validate without applying')
479513
@click.option('--report', is_flag=True, help='Generate detailed validation report')
@@ -501,11 +535,11 @@ def init(ctx: click.Context, input: Optional[str], cpus: Optional[str], devices:
501535
# Initialize from DTS file (all resources from file)
502536
kerf init --input=hardware.dts
503537
504-
# Initialize from command line (CPUs 4-7, memory from /proc/iomem)
505-
kerf init --cpus=4-7
538+
# Initialize from command line (APIC IDs 128-134, memory from /proc/iomem)
539+
kerf init --cpus=128-134
506540
507-
# Initialize with CPUs and devices
508-
kerf init --cpus=4-7 --devices=enp9s0_dev,nvme0
541+
# Initialize with APIC IDs and devices
542+
kerf init --cpus=128,130,132 --devices=enp9s0_dev,nvme0
509543
510544
# Validate baseline without applying
511545
kerf init --input=hardware.dts --dry-run

src/kerf/resources.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,29 +201,29 @@ def validate_cpu_allocation(
201201

202202
requested_set = set(requested_cpus)
203203

204-
# Check all requested CPUs exist in hardware
204+
# Check all requested APIC IDs exist in hardware
205205
hardware_cpus = set(tree.hardware.cpus.available)
206206
invalid_cpus = requested_set - hardware_cpus
207207
if invalid_cpus:
208208
raise ResourceError(
209-
f"Invalid CPUs requested: {sorted(invalid_cpus)}. "
210-
f"Available CPUs: {sorted(hardware_cpus)}"
209+
f"Invalid APIC IDs requested: {sorted(invalid_cpus)}. "
210+
f"Available APIC IDs: {sorted(hardware_cpus)}"
211211
)
212212

213-
# Check CPUs are available
213+
# Check APIC IDs are available
214214
unavailable = requested_set - available_cpus
215215
if unavailable:
216-
# Find which instances are using these CPUs
216+
# Find which instances are using these APIC IDs
217217
conflicts = []
218218
for instance in tree.instances.values():
219219
if instance.name == exclude_instance:
220220
continue
221221
conflict_cpus = set(instance.resources.cpus) & unavailable
222222
if conflict_cpus:
223-
conflicts.append(f"{instance.name} uses CPUs {sorted(conflict_cpus)}")
223+
conflicts.append(f"{instance.name} uses APIC IDs {sorted(conflict_cpus)}")
224224

225225
conflict_msg = ", ".join(conflicts) if conflicts else "allocated to other instances"
226-
raise ResourceError(f"CPUs {sorted(unavailable)} are not available ({conflict_msg})")
226+
raise ResourceError(f"APIC IDs {sorted(unavailable)} are not available ({conflict_msg})")
227227

228228

229229
def validate_memory_allocation(

0 commit comments

Comments
 (0)