linux-netdev
diff --git a/‎contest/__init__.py‎ b/‎contest/__init__.py‎
diff --git a/‎contest/hw/README.rst‎
Lines changed: 22 additions & 8 deletions b/‎contest/hw/README.rst‎
Lines changed: 22 additions & 8 deletions
diff --git a/‎contest/hw/hw_worker.py‎
Lines changed: 152 additions & 0 deletions b/‎contest/hw/hw_worker.py‎
Lines changed: 152 additions & 0 deletions
diff --git a/‎contest/hw/hwksft.py‎
Lines changed: 195 additions & 0 deletions b/‎contest/hw/hwksft.py‎
Lines changed: 195 additions & 0 deletions
diff --git a/‎contest/hw/lib/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎contest/hw/lib/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -216,6 +216,22 @@ Config
 
  - reservation timeout, seconds
 
+CLI
+---
+
+The ``nipa-mctrl`` CLI (``/usr/local/bin/nipa-mctrl`` on ctrl) provides
+command-line access to the machine_control API::
+
+  nipa-mctrl machines            # list machines and health state
+  nipa-mctrl nics                # list NICs
+  nipa-mctrl sol --machine-id 1  # view SOL logs
+  nipa-mctrl reserve --machine-ids 1,2  # reserve machines
+  nipa-mctrl close --reservation-id 5   # release a reservation
+  nipa-mctrl power-cycle --machine-id 1 # power cycle via BMC
+
+Add ``--json`` for machine-parseable output. Defaults to
+``http://localhost:5050``; override with ``--url`` or ``MC_URL`` env var.
+
 In-memory state
 ---------------
 
@@ -256,14 +272,12 @@ The service discovers all machines using the ``machine_info`` table at startup.
 SOL collection
 ~~~~~~~~~~~~~~
 
-Service assumes BMC of the machines is already configured to send SOL
-logs to the correct place. The service uses ipmitool call to
-enable the SOL output at startup (and disable it at shutdown).
-
-The service maintains a UDP socket to receive the logs.
-The BMC ipaddr from ``machine_info_sec`` is used to identify the sending
-machine. The service inserts the logs into the correct table
-and does line chunking if necessary.
+At startup the service spawns a persistent ``ipmitool sol activate``
+session for each machine (using BMC credentials from ``machine_info_sec``).
+Each session runs in its own thread, reading stdout and inserting lines
+into the ``sol`` table. If a session drops it is automatically
+reconnected after a short delay. Stale sessions are deactivated before
+each new connection attempt.
 
 Managing reservations
 ~~~~~~~~~~~~~~~~~~~~~
 
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""NIPA HW worker — one-shot on-boot test runner."""
+
+import json
+import os
+import subprocess
+
+from lib.runner import find_newest_unseen, mark_all_seen, run_tests
+
+
+TESTS_DIR = '/srv/hw-worker/tests'
+RESULTS_DIR = '/srv/hw-worker/results'
+
+# kselftest net.config keys (see drivers/net/README.rst)
+_NET_CONFIG_KEYS = ['NETIF', 'LOCAL_V4', 'LOCAL_V6', 'REMOTE_V4', 'REMOTE_V6',
+                    'LOCAL_PREFIX_V6', 'REMOTE_TYPE', 'REMOTE_ARGS']
+
+
+def _parse_env_file(path):
+    """Parse a simple KEY=VALUE env file."""
+    env = {}
+    if not os.path.exists(path):
+        return env
+    with open(path, encoding='utf-8') as fp:
+        for line in fp:
+            line = line.strip()
+            if not line or line.startswith('#'):
+                continue
+            key, sep, val = line.partition('=')
+            if sep:
+                env[key.strip()] = val.strip()
+    return env
+
+
+def _ensure_link_up(ifname):
+    """Bring a network interface up if not already."""
+    subprocess.run(['ip', 'link', 'set', ifname, 'up'], check=True)
+
+
+def _ensure_addr(ifname, addr):
+    """Add an IP address to an interface if not already present."""
+    bare_addr = addr.split('/')[0]
+    ret = subprocess.run(['ip', 'addr', 'show', 'dev', ifname],
+                         capture_output=True, check=False)
+    if bare_addr in ret.stdout.decode():
+        return
+    if '/' not in addr:
+        addr += '/64' if ':' in addr else '/24'
+    subprocess.run(['ip', 'addr', 'add', addr, 'dev', ifname], check=True)
+
+
+def setup_test_interfaces(test_dir):
+    """Configure test NICs and write net.config from nic-test.env.
+
+    The hwksft orchestrator deploys nic-test.env with interface names,
+    IP addresses, and remote connectivity info.  This function:
+      1. Brings up the DUT and peer interfaces
+      2. Adds IP addresses if not already configured
+      3. Writes drivers/net/net.config for the kselftest framework
+    """
+    env = _parse_env_file(os.path.join(test_dir, 'nic-test.env'))
+    if not env:
+        return
+
+    # Configure DUT interface
+    netif = env.get('NETIF')
+    if netif:
+        _ensure_link_up(netif)
+        if env.get('LOCAL_V4'):
+            _ensure_addr(netif, env['LOCAL_V4'])
+        if env.get('LOCAL_V6'):
+            _ensure_addr(netif, env['LOCAL_V6'])
+
+    # Configure peer interface (for loopback / same-machine peers)
+    remote_ifname = env.get('REMOTE_IFNAME')
+    if remote_ifname:
+        _ensure_link_up(remote_ifname)
+        if env.get('REMOTE_V4'):
+            _ensure_addr(remote_ifname, env['REMOTE_V4'])
+        if env.get('REMOTE_V6'):
+            _ensure_addr(remote_ifname, env['REMOTE_V6'])
+
+    # Write net.config for the kselftest framework
+    config_lines = []
+    for key in _NET_CONFIG_KEYS:
+        if env.get(key):
+            config_lines.append(f'{key}={env[key]}')
+
+    if config_lines:
+        config_content = '\n'.join(config_lines) + '\n'
+        for subdir in ['drivers/net', 'drivers/net/hw']:
+            config_dir = os.path.join(test_dir, subdir)
+            if os.path.isdir(config_dir):
+                path = os.path.join(config_dir, 'net.config')
+                with open(path, 'w', encoding='utf-8') as fp:
+                    fp.write(config_content)
+                print(f"Wrote {path}")
+
+
+def main():
+    """Find pending tests, run them, and write results."""
+    tests_dir = TESTS_DIR
+    results_base = RESULTS_DIR
+
+    test_dir = find_newest_unseen(tests_dir)
+    if test_dir is None:
+        print("No outstanding tests found")
+        return
+
+    # Verify we booted into the expected test kernel by comparing
+    # the deployed kernel version against the running kernel.
+    kver_path = os.path.join(test_dir, '.kernel-version')
+    if not os.path.exists(kver_path):
+        print("No kernel version file, skipping")
+        return
+    with open(kver_path, encoding='utf-8') as fp:
+        expected = fp.read().strip()
+
+    actual = os.uname().release
+    # The kernel version includes the git hash and instance name
+    # (via CONFIG_LOCALVERSION), so accidental prefix collisions
+    # (e.g. "6.1" matching "6.12.0") cannot happen in practice.
+    # The '-' separator check is an extra safety measure.
+    if actual != expected and not actual.startswith(expected + '-'):
+        print(f"Kernel mismatch: running {actual}, expected {expected}")
+        return
+
+    mark_all_seen(tests_dir)
+
+    # Configure test interfaces and write net.config
+    setup_test_interfaces(test_dir)
+
+    reservation_id = os.path.basename(test_dir)
+    results_dir = os.path.join(results_base, reservation_id)
+    os.makedirs(results_dir, exist_ok=True)
+
+    results = run_tests(test_dir, results_dir)
+
+    results_file = os.path.join(results_dir, 'results.json')
+    fd = os.open(results_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+    with os.fdopen(fd, 'w') as fp:
+        json.dump(results, fp)
+        fp.flush()
+        os.fsync(fp.fileno())
+
+    print(f"Completed {len(results)} tests, results in {results_dir}")
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""NIPA HW kselftest orchestrator service."""
+
+import datetime
+import os
+import subprocess
+import sys
+import time
+
+# Add the project root to path for cross-package imports
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..'))
+
+# pylint: disable=wrong-import-position,wrong-import-order
+# Imports below require sys.path manipulation for cross-package access.
+
+from core import NipaLifetime  # noqa: E402  # pylint: disable=import-error
+
+from contest.remote.lib.cbarg import CbArg  # noqa: E402
+from contest.remote.lib.fetcher import Fetcher  # noqa: E402
+
+from lib.mc_client import MCClient, resolve_machines, resolve_nic_id  # noqa: E402
+from lib.deployer import (build_kernel, build_ksft, deploy_artifacts,  # noqa: E402
+                          kexec_machine, wait_for_results, fetch_results)
+
+# Config:
+#
+# [executor]
+# name=hwksft-nic0
+# group=selftests-hw
+# init=force / continue / next
+# [remote]
+# branches=https://url-to-branches-manifest
+# [local]
+# base_path=/common/path
+# json_path=base-relative/path/to/json
+# results_path=base-relative/path/to/raw/outputs
+# tree_path=/root-path/to/kernel/git
+# patches_path=/root-path/to/patches/dir
+# [www]
+# url=https://url-to-reach-base-path
+# [hw]
+# nic_vendor=Intel
+# nic_model=E810-C
+# machine_control_url=http://control-node:5050
+# reservation_retry_time=60
+# max_kexec_boot_timeout=300
+# max_test_time=3600
+# crash_wait_time=120
+# sol_poll_interval=15
+# [build]
+# extra_kconfig=/path/to/nic-driver.config
+# [ksft]
+# target=net
+
+
+def test(binfo, rinfo, cbarg):  # pylint: disable=unused-argument
+    """Fetcher callback: build, deploy, run, and collect HW test results."""
+    print("Run at", datetime.datetime.now())
+    cbarg.refresh_config()
+    config = cbarg.config
+
+    results_path = os.path.join(config.get('local', 'base_path'),
+                                config.get('local', 'results_path'),
+                                rinfo['run-cookie'])
+    os.makedirs(results_path, exist_ok=True)
+
+    link = config.get('www', 'url') + '/' + \
+           config.get('local', 'results_path') + '/' + \
+           rinfo['run-cookie']
+    rinfo['link'] = link
+    grp_name = config.get('executor', 'group', fallback='selftests-hw')
+
+    tree_path = config.get('local', 'tree_path')
+    mc_url = config.get('hw', 'machine_control_url')
+    nic_vendor = config.get('hw', 'nic_vendor')
+    nic_model = config.get('hw', 'nic_model')
+    mc = MCClient(mc_url)
+
+    # 1. Build kernel + ksft
+    try:
+        kernel_version = build_kernel(config, tree_path)
+        build_ksft(config, tree_path)
+    except (subprocess.CalledProcessError, OSError) as e:
+        print(f"Build failed: {e}")
+        return [{
+            'test': 'build',
+            'group': grp_name,
+            'result': 'fail',
+            'link': link,
+        }]
+
+    # 2. Resolve machines for NIC
+    all_nics = mc.get_nic_info()
+    nic_id = resolve_nic_id(all_nics, nic_vendor, nic_model)
+    machine_ids, nic = resolve_machines(all_nics, nic_id)
+
+    # Build nic_info dict with peer info for deployment
+    nic_deploy_info = {
+        'ifname': nic.get('ifname', ''),
+        'ip4addr': nic.get('ip4addr', ''),
+        'ip6addr': nic.get('ip6addr', ''),
+    }
+    if nic.get('peer_id'):
+        for n in all_nics:
+            if n['id'] == nic['peer_id']:
+                nic_deploy_info['peer'] = {
+                    'ifname': n.get('ifname', ''),
+                    'ip4addr': n.get('ip4addr', ''),
+                    'ip6addr': n.get('ip6addr', ''),
+                }
+                break
+
+    # 3. Get machine IPs for SSH/SCP
+    all_machines = mc.get_machine_info()
+    machine_ip_map = {m['id']: m['mgmt_ipaddr'] for m in all_machines}
+    machine_ips = [machine_ip_map[mid] for mid in machine_ids]
+
+    # Record peer machine IP so deployer can set REMOTE_ARGS
+    if nic.get('peer_id'):
+        for n in all_nics:
+            if n['id'] == nic['peer_id']:
+                nic_deploy_info['peer_machine_ip'] = machine_ip_map.get(
+                    n['machine_id'], machine_ips[0])
+                break
+
+    # 4. Reserve machines (retry loop with backoff)
+    max_retries = config.getint('hw', 'max_reservation_retries', fallback=30)
+    retry_time = config.getint('hw', 'reservation_retry_time', fallback=60)
+    reservation_id = None
+    for attempt in range(max_retries):
+        result = mc.reserve(machine_ids)
+        if 'reservation_id' in result:
+            reservation_id = result['reservation_id']
+            break
+        wait = min(retry_time * (1.5 ** attempt), 300)
+        print(f"Reserve failed ({result.get('error', '?')}), "
+              f"retry {attempt+1}/{max_retries} in {wait:.0f}s")
+        time.sleep(wait)
+    else:
+        raise RuntimeError(f"Failed to reserve machines after {max_retries} attempts")
+
+    try:
+        # 5. Deploy artifacts via SCP
+        deploy_artifacts(config, machine_ips, reservation_id, nic_deploy_info,
+                         tree_path, kernel_version)
+
+        # 6. kexec into new kernel
+        kexec_machine(config, machine_ips, reservation_id)
+
+        # 7. Wait for hw-worker with crash monitoring
+        wait_for_results(config, mc, reservation_id, machine_ids, machine_ips)
+
+        # 8. Copy back results
+        cases = fetch_results(config, machine_ips, reservation_id, rinfo)
+    finally:
+        # 9. Release reservation
+        try:
+            mc.reservation_close(reservation_id)
+        except Exception as e:
+            print(f"Warning: failed to close reservation {reservation_id}: {e}")
+
+    print("Done at", datetime.datetime.now())
+    return cases
+
+
+def main():
+    """Entry point: set up Fetcher poll loop."""
+    cfg_paths = ['hw.config', 'hwksft.config']
+    if len(sys.argv) > 1:
+        cfg_paths += sys.argv[1:]
+
+    cbarg = CbArg(cfg_paths)
+    config = cbarg.config
+
+    base_dir = config.get('local', 'base_path')
+
+    life = NipaLifetime(config)
+
+    f = Fetcher(test, cbarg,
+                name=config.get('executor', 'name'),
+                branches_url=config.get('remote', 'branches'),
+                results_path=os.path.join(base_dir, config.get('local', 'json_path')),
+                url_path=config.get('www', 'url') + '/' + config.get('local', 'json_path'),
+                tree_path=config.get('local', 'tree_path'),
+                patches_path=config.get('local', 'patches_path', fallback=None),
+                life=life,
+                first_run=config.get('executor', 'init', fallback="continue"))
+    f.run()
+    life.exit()
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1 @@
+# SPDX-License-Identifier: GPL-2.0