Skip to content

Commit 8d81824

Browse files
mivertowskiclaude
andcommitted
Update documentation for v0.3.0 features
- 04-memory-management.md: Add StratifiedMemoryPool, SizeBucket, AnalyticsContext, PressureHandler, and CUDA/WebGPU buffer caches - 03-core-abstractions.md: Add Domain system (20 business domains), PersistentMessage trait, KernelDispatcher, Queue tiering, and Global reduction primitives - 07-proc-macros.md: Add #[derive(PersistentMessage)], domain attribute, #[derive(GpuType)] macro - 13-cuda-codegen.md: Add compile_ptx(), multi-phase execution, dispatch code generation, reduction code generation - 05-gpu-backends.md: Add compile_ptx API section, cudarc 0.18.2 API changes and migration notes - 02-crate-structure.md: Add new crates (montecarlo, graph), new modules in ringkernel-core, update dependency versions - 01-architecture-overview.md: Update test count (520+ → 825+), add v0.3.0 features to "Working today" list - docs/README.md: Update test count metric (775+ → 825+) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 860b8b2 commit 8d81824

8 files changed

Lines changed: 1021 additions & 14 deletions

docs/01-architecture-overview.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,13 @@ RingKernel is under active development. The core runtime, CPU backend, CUDA back
2121
- Telemetry and metrics collection
2222
- Rust-to-CUDA transpiler (ringkernel-cuda-codegen)
2323
- Rust-to-WGSL transpiler (ringkernel-wgpu-codegen)
24+
- Size-stratified memory pools with pressure handling (v0.3.0)
25+
- Global reduction primitives with multi-phase execution (v0.3.0)
26+
- Multi-kernel dispatch with domain-based routing (v0.3.0)
27+
- Queue tiering for throughput-based capacity selection (v0.3.0)
2428
- 20+ working examples
2529
- 5 showcase applications: WaveSim, WaveSim3D, TxMon, AccNet, ProcInt
26-
- 520+ tests across the workspace
30+
- 825+ tests across the workspace
2731

2832
**In progress:**
2933
- Metal backend (scaffolded)

docs/02-crate-structure.md

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,20 @@ RustCompute/
2525
│ │ └── src/
2626
│ │ ├── lib.rs
2727
│ │ ├── message.rs # RingMessage trait, priority constants
28-
│ │ ├── queue.rs # MessageQueue trait
28+
│ │ ├── queue.rs # MessageQueue trait, QueueTier, QueueFactory, QueueMonitor (v0.3.0)
2929
│ │ ├── runtime.rs # RingKernel, KernelHandle, LaunchOptions
3030
│ │ ├── context.rs # RingContext struct
3131
│ │ ├── control.rs # ControlBlock struct
3232
│ │ ├── telemetry.rs # TelemetryBuffer, MetricsCollector
3333
│ │ ├── pubsub.rs # PubSubBroker, Topic wildcards
3434
│ │ ├── hlc.rs # HlcTimestamp, HlcClock
35-
│ │ └── error.rs # Error types
35+
│ │ ├── error.rs # Error types
36+
│ │ ├── memory.rs # (v0.3.0) SizeBucket, StratifiedMemoryPool, PressureHandler
37+
│ │ ├── reduction.rs # (v0.3.0) ReductionOp, ReductionScalar, GlobalReduction
38+
│ │ ├── analytics_context.rs # (v0.3.0) AnalyticsContext, AllocationHandle
39+
│ │ ├── dispatcher.rs # (v0.3.0) KernelDispatcher, DispatcherBuilder
40+
│ │ ├── persistent_message.rs # (v0.3.0) PersistentMessage trait, DispatchTable
41+
│ │ └── domain.rs # (v0.3.0) Domain enum (20 business domains)
3642
│ │
3743
│ ├── ringkernel-derive/ # Proc macros (in development)
3844
│ │ └── src/lib.rs
@@ -45,9 +51,12 @@ RustCompute/
4551
│ ├── ringkernel-cuda/ # CUDA backend (working)
4652
│ │ ├── Cargo.toml
4753
│ │ ├── src/
48-
│ │ │ ├── lib.rs
54+
│ │ │ ├── lib.rs # compile_ptx() function (v0.3.0)
4955
│ │ │ ├── runtime.rs # CudaRuntime implementation
50-
│ │ │ └── ptx.rs # PTX template for persistent kernels
56+
│ │ │ ├── ptx.rs # PTX template for persistent kernels
57+
│ │ │ ├── reduction.rs # (v0.3.0) ReductionBuffer, ReductionBufferCache
58+
│ │ │ ├── phases.rs # (v0.3.0) SyncMode, MultiPhaseConfig, MultiPhaseExecutor
59+
│ │ │ └── persistent.rs # (v0.3.0) PersistentSimulation, PersistentControlBlock
5160
│ │ └── tests/
5261
│ │ └── gpu_execution_verify.rs # GPU execution verification
5362
│ │
@@ -93,7 +102,30 @@ RustCompute/
93102
│ │ └── handler.rs # Handler function integration
94103
│ │
95104
│ ├── ringkernel-ecosystem/ # Integration utilities
96-
│ │ └── src/lib.rs
105+
│ │ └── src/
106+
│ │ ├── lib.rs
107+
│ │ ├── actix.rs # GpuPersistentActor for Actix
108+
│ │ ├── tower.rs # PersistentKernelService middleware
109+
│ │ ├── axum.rs # PersistentGpuState, REST/SSE endpoints
110+
│ │ ├── grpc.rs # gRPC streaming server
111+
│ │ └── cuda_bridge.rs # CudaPersistentHandle
112+
│ │
113+
│ ├── ringkernel-montecarlo/ # (v0.3.0) Monte Carlo primitives
114+
│ │ └── src/
115+
│ │ ├── lib.rs
116+
│ │ ├── philox.rs # Philox PRNG
117+
│ │ ├── antithetic.rs # Antithetic variates
118+
│ │ ├── control_variate.rs # Control variates
119+
│ │ └── importance.rs # Importance sampling
120+
│ │
121+
│ ├── ringkernel-graph/ # (v0.3.0) Graph algorithms
122+
│ │ └── src/
123+
│ │ ├── lib.rs
124+
│ │ ├── csr.rs # CSR sparse matrix
125+
│ │ ├── bfs.rs # Breadth-first search
126+
│ │ ├── scc.rs # SCC (Tarjan, Kosaraju)
127+
│ │ ├── union_find.rs # Parallel Union-Find (Shiloach-Vishkin)
128+
│ │ └── spmv.rs # Sparse matrix-vector multiply
97129
│ │
98130
│ ├── ringkernel-audio-fft/ # Example: GPU audio processing
99131
│ │ └── src/lib.rs
@@ -263,7 +295,7 @@ members = [
263295
]
264296

265297
[workspace.package]
266-
version = "0.1.2"
298+
version = "0.3.0"
267299
edition = "2021"
268300
rust-version = "1.75"
269301
license = "MIT OR Apache-2.0"
@@ -273,28 +305,44 @@ categories = ["concurrency", "asynchronous", "science"]
273305

274306
[workspace.dependencies]
275307
# Async runtime
276-
tokio = { version = "1.35", features = ["rt-multi-thread", "sync", "macros"] }
308+
tokio = { version = "1.48", features = ["rt-multi-thread", "sync", "macros"] }
277309
async-trait = "0.1"
278310
futures = "0.3"
311+
rayon = "1.11"
279312

280313
# Serialization (zero-copy)
281314
rkyv = { version = "0.7", features = ["validation", "strict"] }
282315
zerocopy = { version = "0.7", features = ["derive"] }
283316
bytemuck = { version = "1.14", features = ["derive"] }
284317

285318
# Error handling
286-
thiserror = "1.0"
319+
thiserror = "2.0"
287320
anyhow = "1.0"
288321

289322
# Logging
290323
tracing = "0.1"
291324
tracing-subscriber = "0.3"
292325

293326
# GPU backends
294-
cudarc = { version = "0.10", optional = true } # CUDA
295-
metal = { version = "0.27", optional = true } # Metal
296-
wgpu = { version = "0.19", optional = true } # WebGPU
297-
ash = { version = "0.37", optional = true } # Vulkan
327+
cudarc = { version = "0.18.2", optional = true } # CUDA (updated API)
328+
metal = { version = "0.31", optional = true } # Metal
329+
wgpu = { version = "27.0", optional = true } # WebGPU (Arc-based)
330+
331+
# Web frameworks
332+
axum = { version = "0.8", optional = true }
333+
tower = { version = "0.5", optional = true }
334+
tonic = { version = "0.14", optional = true } # gRPC
335+
prost = { version = "0.14", optional = true } # Protobuf
336+
337+
# GUI
338+
iced = { version = "0.13", optional = true }
339+
egui = { version = "0.31", optional = true }
340+
winit = { version = "0.30", optional = true }
341+
342+
# Data
343+
arrow = { version = "54", optional = true }
344+
polars = { version = "0.46", optional = true }
345+
glam = "0.29"
298346

299347
# Proc macros
300348
syn = { version = "2.0", features = ["full", "parsing"] }

0 commit comments

Comments
 (0)