@@ -15,6 +15,7 @@ fn main() {
1515 println ! ( "cargo:rerun-if-changed=src/cuda/cooperative_kernels.cu" ) ;
1616 println ! ( "cargo:rerun-if-changed=src/cuda/cluster_kernels.cu" ) ;
1717 println ! ( "cargo:rerun-if-changed=src/cuda/actor_lifecycle_kernel.cu" ) ;
18+ println ! ( "cargo:rerun-if-changed=src/cuda/migration_kernels.cu" ) ;
1819 println ! ( "cargo:rerun-if-env-changed=CUDA_PATH" ) ;
1920 println ! ( "cargo:rerun-if-env-changed=CUDA_HOME" ) ;
2021 println ! ( "cargo:rerun-if-env-changed=RINGKERNEL_CUDA_ARCH" ) ;
@@ -25,8 +26,12 @@ fn main() {
2526 let cooperative_enabled = env:: var ( "CARGO_FEATURE_COOPERATIVE" ) . is_ok ( ) ;
2627
2728 if !cooperative_enabled {
28- // Generate stub when cooperative feature is not enabled
29+ // Generate stubs for every kernel group so `include!` sites compile
30+ // regardless of feature selection.
2931 generate_stub ( & out_dir, "Cooperative feature not enabled" ) ;
32+ generate_cluster_stub ( & out_dir, "Cooperative feature not enabled" ) ;
33+ generate_lifecycle_stub ( & out_dir, "Cooperative feature not enabled" ) ;
34+ generate_migration_stub ( & out_dir, "Cooperative feature not enabled" ) ;
3035 return ;
3136 }
3237
@@ -68,12 +73,25 @@ fn main() {
6873 generate_lifecycle_stub ( & out_dir, & format ! ( "Compilation failed: {}" , e) ) ;
6974 }
7075 }
76+
77+ // Compile migration kernels (portable sm_75+)
78+ match compile_migration_kernels ( & nvcc, & out_dir) {
79+ Ok ( ( ) ) => {
80+ println ! ( "cargo:rustc-cfg=has_migration_kernels" ) ;
81+ println ! ( "cargo:warning=Migration kernels compiled successfully" ) ;
82+ }
83+ Err ( e) => {
84+ println ! ( "cargo:warning=Migration kernels not available: {}" , e) ;
85+ generate_migration_stub ( & out_dir, & format ! ( "Compilation failed: {}" , e) ) ;
86+ }
87+ }
7188 }
7289 None => {
7390 println ! ( "cargo:warning=nvcc not found - cooperative groups will use fallback" ) ;
7491 generate_stub ( & out_dir, "nvcc not found at build time" ) ;
7592 generate_cluster_stub ( & out_dir, "nvcc not found at build time" ) ;
7693 generate_lifecycle_stub ( & out_dir, "nvcc not found at build time" ) ;
94+ generate_migration_stub ( & out_dir, "nvcc not found at build time" ) ;
7795 }
7896 }
7997}
@@ -391,12 +409,14 @@ fn write_cluster_rust_code(
391409 code. push_str ( ptx) ;
392410 code. push_str ( "\" ####;\n \n " ) ;
393411
412+ code. push_str ( "/// Whether Hopper cluster kernel support was compiled in.\n " ) ;
394413 code. push_str ( & format ! (
395414 "pub const HAS_CLUSTER_KERNEL_SUPPORT: bool = {};\n \n " ,
396415 has_support
397416 ) ) ;
398417
399418 let escaped_message = message. replace ( '\\' , "\\ \\ " ) . replace ( '"' , "\\ \" " ) ;
419+ code. push_str ( "/// Build-time message about Hopper cluster kernel support.\n " ) ;
400420 code. push_str ( & format ! (
401421 "pub const CLUSTER_KERNEL_BUILD_MESSAGE: &str = \" {}\" ;\n " ,
402422 escaped_message
@@ -443,9 +463,11 @@ fn compile_lifecycle_kernel(nvcc: &Path, out_dir: &Path) -> Result<(), String> {
443463 let rust_file = out_dir. join ( "actor_lifecycle_kernel.rs" ) ;
444464 let mut code = String :: new ( ) ;
445465 code. push_str ( "// Auto-generated actor lifecycle kernel PTX.\n \n " ) ;
466+ code. push_str ( "/// Pre-compiled PTX for the actor lifecycle kernel.\n " ) ;
446467 code. push_str ( "pub const LIFECYCLE_KERNEL_PTX: &str = r####\" " ) ;
447468 code. push_str ( & ptx_content) ;
448469 code. push_str ( "\" ####;\n \n " ) ;
470+ code. push_str ( "/// Whether the actor lifecycle kernel was compiled in.\n " ) ;
449471 code. push_str ( "pub const HAS_LIFECYCLE_KERNEL: bool = true;\n " ) ;
450472
451473 fs:: write ( & rust_file, code) . map_err ( |e| format ! ( "Write failed: {}" , e) )
@@ -456,9 +478,114 @@ fn generate_lifecycle_stub(out_dir: &Path, reason: &str) {
456478 let rust_file = out_dir. join ( "actor_lifecycle_kernel.rs" ) ;
457479 let code = format ! (
458480 "// Actor lifecycle kernel not available: {}\n \n \
481+ /// Pre-compiled PTX for the actor lifecycle kernel (empty stub).\n \
459482 pub const LIFECYCLE_KERNEL_PTX: &str = \" \" ;\n \
483+ /// Whether the actor lifecycle kernel was compiled in.\n \
460484 pub const HAS_LIFECYCLE_KERNEL: bool = false;\n ",
461485 reason
462486 ) ;
463487 fs:: write ( & rust_file, code) . expect ( "Failed to write lifecycle stub" ) ;
464488}
489+
490+ /// Compile the migration kernels (portable `sm_75+`) to PTX.
491+ ///
492+ /// Migration kernels handle the state capture / restore / in-flight queue
493+ /// drain side of the 3-phase multi-GPU migration protocol (v1.1). The PTX
494+ /// is embedded as a `const &str` for runtime loading via cudarc.
495+ fn compile_migration_kernels ( nvcc : & Path , out_dir : & Path ) -> Result < ( ) , String > {
496+ let manifest_dir = PathBuf :: from ( env:: var ( "CARGO_MANIFEST_DIR" ) . unwrap ( ) ) ;
497+ let cuda_src = manifest_dir. join ( "src/cuda/migration_kernels.cu" ) ;
498+
499+ if !cuda_src. exists ( ) {
500+ return Err ( format ! (
501+ "Migration CUDA source not found: {:?}" ,
502+ cuda_src
503+ ) ) ;
504+ }
505+
506+ let ptx_file = out_dir. join ( "migration_kernels.ptx" ) ;
507+
508+ // Migration kernels are portable — compile for the same multi-arch set
509+ // as the cooperative kernels so they run on any supported device.
510+ let arch_args = determine_cuda_arch ( nvcc) ;
511+
512+ let mut cmd = Command :: new ( nvcc) ;
513+ cmd. args ( [ "-ptx" , "-O3" , "--generate-line-info" ] ) ;
514+ for arg in & arch_args {
515+ cmd. arg ( arg) ;
516+ }
517+ cmd. args ( [ "-std=c++17" , "-w" , "-o" ] ) ;
518+ cmd. arg ( ptx_file. to_str ( ) . unwrap ( ) ) ;
519+ cmd. arg ( cuda_src. to_str ( ) . unwrap ( ) ) ;
520+
521+ let status = cmd
522+ . status ( )
523+ . map_err ( |e| format ! ( "Failed to execute nvcc for migration kernels: {}" , e) ) ?;
524+
525+ if !status. success ( ) {
526+ return Err ( format ! (
527+ "nvcc migration kernel compilation failed with exit code: {:?}" ,
528+ status. code( )
529+ ) ) ;
530+ }
531+
532+ let ptx_content = fs:: read_to_string ( & ptx_file)
533+ . map_err ( |e| format ! ( "Failed to read migration PTX: {}" , e) ) ?;
534+
535+ let rust_file = out_dir. join ( "migration_kernels.rs" ) ;
536+ write_migration_rust_code (
537+ & rust_file,
538+ & ptx_content,
539+ true ,
540+ "Migration kernels compiled successfully" ,
541+ )
542+ . map_err ( |e| format ! ( "Failed to write migration Rust bindings: {}" , e) ) ?;
543+
544+ Ok ( ( ) )
545+ }
546+
547+ /// Generate migration kernel stub when nvcc is unavailable.
548+ fn generate_migration_stub ( out_dir : & Path , reason : & str ) {
549+ let rust_file = out_dir. join ( "migration_kernels.rs" ) ;
550+ write_migration_rust_code ( & rust_file, "" , false , reason)
551+ . expect ( "Failed to write migration Rust stub" ) ;
552+ }
553+
554+ /// Emit the Rust file that wraps the migration PTX blob.
555+ fn write_migration_rust_code (
556+ path : & Path ,
557+ ptx : & str ,
558+ has_support : bool ,
559+ message : & str ,
560+ ) -> std:: io:: Result < ( ) > {
561+ let mut code = String :: new ( ) ;
562+
563+ code. push_str ( "// Auto-generated migration kernel PTX.\n " ) ;
564+ code. push_str ( "// Generated by build.rs at build time.\n " ) ;
565+ code. push_str ( "// Portable: sm_75+ (Turing through Hopper).\n \n " ) ;
566+
567+ code. push_str ( "/// Pre-compiled PTX for v1.1 migration kernels.\n " ) ;
568+ code. push_str ( "/// Contains:\n " ) ;
569+ code. push_str ( "/// - capture_actor_state: snapshot live actor state with CRC32\n " ) ;
570+ code. push_str ( "/// - restore_actor_state: reload captured state with CRC32 verify\n " ) ;
571+ code. push_str ( "/// - drain_inflight_queue: drain K2K queue to external buffer\n " ) ;
572+
573+ code. push_str ( "pub const MIGRATION_KERNEL_PTX: &str = r####\" " ) ;
574+ code. push_str ( ptx) ;
575+ code. push_str ( "\" ####;\n \n " ) ;
576+
577+ code. push_str ( "/// `true` if the migration kernels were compiled and embedded.\n " ) ;
578+ code. push_str ( & format ! (
579+ "pub const HAS_MIGRATION_KERNEL_SUPPORT: bool = {};\n \n " ,
580+ has_support
581+ ) ) ;
582+
583+ code. push_str ( "/// Build-time message describing migration kernel availability.\n " ) ;
584+ let escaped_message = message. replace ( '\\' , "\\ \\ " ) . replace ( '"' , "\\ \" " ) ;
585+ code. push_str ( & format ! (
586+ "pub const MIGRATION_KERNEL_BUILD_MESSAGE: &str = \" {}\" ;\n " ,
587+ escaped_message
588+ ) ) ;
589+
590+ fs:: write ( path, code)
591+ }
0 commit comments