Skip to content

Commit 0dd4aca

Browse files
Add mojo format to pre-commit/CI and parallelize update_challenges.py (#239)
Wires mojo format into the lint pipeline and applies it repo-wide (78 starter files reformatted). Also parallelizes the HTTP uploads in scripts/update_challenges.py so deploys fan out instead of running serially. - .pre-commit-config.yaml: local mojo-format hook running `mojo format -q` - .github/workflows/lint.yml: lint-mojo job now installs `modular` via pip and enforces formatting via `mojo format` + `git diff --exit-code` - .github/workflows/create-challenge.yml: adds `modular` to the cron's pip install so Claude's `pre-commit run --all-files` step has `mojo` on PATH - pyproject.toml: drop `target-version = ['py312']` from [tool.black] — mblack (which `mojo format` wraps) reads the same section and rejects any target above py311; black auto-detects target fine without it - scripts/update_challenges.py: upload via ThreadPoolExecutor(max_workers=16). Loading stays sequential because importlib with a shared "challenge" module name and sys.path mutation is not thread-safe. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9a9c134 commit 0dd4aca

83 files changed

Lines changed: 793 additions & 193 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/create-challenge.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929

3030
- name: Install dependencies
3131
run: |
32-
pip install pre-commit requests websocket-client
32+
pip install pre-commit requests websocket-client modular
3333
pre-commit install
3434
3535
- name: Fetch open PRs

.github/workflows/lint.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,21 @@ jobs:
7676
- name: Checkout code
7777
uses: actions/checkout@v4
7878

79+
- name: Set up Python
80+
uses: actions/setup-python@v5
81+
with:
82+
python-version: '3.12'
83+
84+
- name: Install Modular (mojo)
85+
run: |
86+
python -m pip install --upgrade pip
87+
pip install modular
88+
89+
- name: Check Mojo formatting with mojo format
90+
run: |
91+
find challenges -name "*.mojo" -type f -print0 | xargs -0 mojo format -q
92+
git diff --exit-code -- '*.mojo'
93+
7994
- name: Check Mojo files exist and are valid
8095
run: |
8196
echo "Checking Mojo files for basic syntax issues..."

.pre-commit-config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@ repos:
3333
types_or: [c++, c, cuda]
3434
files: \.(cu|cpp|h)$
3535

36+
# Mojo formatting
37+
- repo: local
38+
hooks:
39+
- id: mojo-format
40+
name: mojo format
41+
entry: mojo format -q
42+
language: system
43+
files: \.mojo$
44+
3645
# General file checks
3746
- repo: https://github.com/pre-commit/pre-commit-hooks
3847
rev: v4.5.0

challenges/easy/19_reverse_array/starter/starter.mojo

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ from std.gpu import block_dim, block_idx, thread_idx
33
from std.memory import UnsafePointer
44
from std.math import ceildiv
55

6+
67
fn reverse_array_kernel(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
78
pass
89

10+
911
# input is a device pointer (i.e. pointer to memory on the GPU)
1012
@export
1113
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
@@ -15,10 +17,6 @@ fn solve(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1517
var blocksPerGrid = ceildiv(N, threadsPerBlock)
1618

1719
var _kernel = ctx.compile_function[reverse_array_kernel, reverse_array_kernel]()
18-
ctx.enqueue_function(_kernel,
19-
input, N,
20-
grid_dim = blocksPerGrid,
21-
block_dim = threadsPerBlock
22-
)
20+
ctx.enqueue_function(_kernel, input, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock)
2321

2422
ctx.synchronize()

challenges/easy/1_vector_add/starter/starter.mojo

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,29 @@ from std.gpu import block_dim, block_idx, thread_idx
33
from std.memory import UnsafePointer
44
from std.math import ceildiv
55

6-
fn vector_add_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
6+
7+
fn vector_add_kernel(
8+
A: UnsafePointer[Float32, MutExternalOrigin],
9+
B: UnsafePointer[Float32, MutExternalOrigin],
10+
C: UnsafePointer[Float32, MutExternalOrigin],
11+
N: Int32,
12+
):
713
pass
814

15+
916
# A, B, C are device pointers (i.e. pointers to memory on the GPU)
1017
@export
11-
fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
18+
fn solve(
19+
A: UnsafePointer[Float32, MutExternalOrigin],
20+
B: UnsafePointer[Float32, MutExternalOrigin],
21+
C: UnsafePointer[Float32, MutExternalOrigin],
22+
N: Int32,
23+
) raises:
1224
var BLOCK_SIZE: Int32 = 256
1325
var ctx = DeviceContext()
1426
var num_blocks = ceildiv(N, BLOCK_SIZE)
1527

1628
var _kernel = ctx.compile_function[vector_add_kernel, vector_add_kernel]()
17-
ctx.enqueue_function(_kernel,
18-
A, B, C, N,
19-
grid_dim = num_blocks,
20-
block_dim = BLOCK_SIZE
21-
)
29+
ctx.enqueue_function(_kernel, A, B, C, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE)
2230

2331
ctx.synchronize()

challenges/easy/21_relu/starter/starter.mojo

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,30 @@ from std.gpu import block_dim, block_idx, thread_idx
33
from std.memory import UnsafePointer
44
from std.math import ceildiv
55

6-
fn relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
6+
7+
fn relu_kernel(
8+
input: UnsafePointer[Float32, MutExternalOrigin],
9+
output: UnsafePointer[Float32, MutExternalOrigin],
10+
N: Int32,
11+
):
712
pass
813

14+
915
# input, output are device pointers (i.e. pointers to memory on the GPU)
1016
@export
11-
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
17+
fn solve(
18+
input: UnsafePointer[Float32, MutExternalOrigin],
19+
output: UnsafePointer[Float32, MutExternalOrigin],
20+
N: Int32,
21+
) raises:
1222
var threadsPerBlock: Int32 = 256
1323
var ctx = DeviceContext()
1424

1525
var blocksPerGrid = ceildiv(N, threadsPerBlock)
1626

1727
var _kernel = ctx.compile_function[relu_kernel, relu_kernel]()
18-
ctx.enqueue_function(_kernel,
19-
input, output, N,
20-
grid_dim = blocksPerGrid,
21-
block_dim = threadsPerBlock
28+
ctx.enqueue_function(
29+
_kernel, input, output, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock
2230
)
2331

2432
ctx.synchronize()

challenges/easy/23_leaky_relu/starter/starter.mojo

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,30 @@ from std.gpu import block_dim, block_idx, thread_idx
33
from std.memory import UnsafePointer
44
from std.math import ceildiv
55

6-
fn leaky_relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
6+
7+
fn leaky_relu_kernel(
8+
input: UnsafePointer[Float32, MutExternalOrigin],
9+
output: UnsafePointer[Float32, MutExternalOrigin],
10+
N: Int32,
11+
):
712
pass
813

14+
915
# input, output are device pointers (i.e. pointers to memory on the GPU)
1016
@export
11-
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
17+
fn solve(
18+
input: UnsafePointer[Float32, MutExternalOrigin],
19+
output: UnsafePointer[Float32, MutExternalOrigin],
20+
N: Int32,
21+
) raises:
1222
var threadsPerBlock: Int32 = 256
1323
var ctx = DeviceContext()
1424

1525
var blocksPerGrid = ceildiv(N, threadsPerBlock)
1626

1727
var _kernel = ctx.compile_function[leaky_relu_kernel, leaky_relu_kernel]()
18-
ctx.enqueue_function(_kernel,
19-
input, output, N,
20-
grid_dim = blocksPerGrid,
21-
block_dim = threadsPerBlock
28+
ctx.enqueue_function(
29+
_kernel, input, output, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock
2230
)
2331

2432
ctx.synchronize()

challenges/easy/24_rainbow_table/starter/starter.mojo

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ from std.gpu import block_dim, block_idx, thread_idx
33
from std.memory import UnsafePointer
44
from std.math import ceildiv
55

6+
67
fn fnv1a_hash(input: Int32) -> UInt32:
78
alias FNV_PRIME: UInt32 = 16777619
89
alias OFFSET_BASIS: UInt32 = 2166136261
@@ -15,23 +16,32 @@ fn fnv1a_hash(input: Int32) -> UInt32:
1516

1617
return hash
1718

18-
fn fnv1a_hash_kernel(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin],
19-
N: Int32, R: Int32):
19+
20+
fn fnv1a_hash_kernel(
21+
input: UnsafePointer[Int32, MutExternalOrigin],
22+
output: UnsafePointer[UInt32, MutExternalOrigin],
23+
N: Int32,
24+
R: Int32,
25+
):
2026
pass
2127

28+
2229
# input, output are device pointers (i.e. pointers to memory on the GPU)
2330
@export
24-
fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], N: Int32, R: Int32) raises:
31+
fn solve(
32+
input: UnsafePointer[Int32, MutExternalOrigin],
33+
output: UnsafePointer[UInt32, MutExternalOrigin],
34+
N: Int32,
35+
R: Int32,
36+
) raises:
2537
var threadsPerBlock: Int32 = 256
2638
var ctx = DeviceContext()
2739

2840
var blocksPerGrid = ceildiv(N, threadsPerBlock)
2941

3042
var _kernel = ctx.compile_function[fnv1a_hash_kernel, fnv1a_hash_kernel]()
31-
ctx.enqueue_function(_kernel,
32-
input, output, N, R,
33-
grid_dim = blocksPerGrid,
34-
block_dim = threadsPerBlock
43+
ctx.enqueue_function(
44+
_kernel, input, output, N, R, grid_dim=blocksPerGrid, block_dim=threadsPerBlock
3545
)
3646

3747
ctx.synchronize()

challenges/easy/2_matrix_multiplication/starter/starter.mojo

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,45 @@ from std.gpu import block_dim, block_idx, thread_idx
33
from std.memory import UnsafePointer
44
from std.math import ceildiv
55

6-
fn matrix_multiplication_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32):
6+
7+
fn matrix_multiplication_kernel(
8+
A: UnsafePointer[Float32, MutExternalOrigin],
9+
B: UnsafePointer[Float32, MutExternalOrigin],
10+
C: UnsafePointer[Float32, MutExternalOrigin],
11+
M: Int32,
12+
N: Int32,
13+
K: Int32,
14+
):
715
pass
816

17+
918
# A, B, C are device pointers (i.e. pointers to memory on the GPU)
1019
@export
11-
fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32) raises:
20+
fn solve(
21+
A: UnsafePointer[Float32, MutExternalOrigin],
22+
B: UnsafePointer[Float32, MutExternalOrigin],
23+
C: UnsafePointer[Float32, MutExternalOrigin],
24+
M: Int32,
25+
N: Int32,
26+
K: Int32,
27+
) raises:
1228
var BLOCK_SIZE: Int32 = 16
1329
var ctx = DeviceContext()
1430

1531
var grid_dim_x = ceildiv(K, BLOCK_SIZE)
1632
var grid_dim_y = ceildiv(M, BLOCK_SIZE)
1733

1834
var _kernel = ctx.compile_function[matrix_multiplication_kernel, matrix_multiplication_kernel]()
19-
ctx.enqueue_function(_kernel,
20-
A, B, C, M, N, K,
21-
grid_dim = (grid_dim_x, grid_dim_y),
22-
block_dim = (BLOCK_SIZE, BLOCK_SIZE)
35+
ctx.enqueue_function(
36+
_kernel,
37+
A,
38+
B,
39+
C,
40+
M,
41+
N,
42+
K,
43+
grid_dim=(grid_dim_x, grid_dim_y),
44+
block_dim=(BLOCK_SIZE, BLOCK_SIZE),
2345
)
2446

2547
ctx.synchronize()

challenges/easy/31_matrix_copy/starter/starter.mojo

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,29 @@ from std.gpu import block_dim, block_idx, thread_idx
33
from std.memory import UnsafePointer
44
from std.math import ceildiv
55

6-
fn copy_matrix_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
6+
7+
fn copy_matrix_kernel(
8+
A: UnsafePointer[Float32, MutExternalOrigin],
9+
B: UnsafePointer[Float32, MutExternalOrigin],
10+
N: Int32,
11+
):
712
pass
813

14+
915
# A, B are device pointers (i.e. pointers to memory on the GPU)
1016
@export
11-
fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
17+
fn solve(
18+
A: UnsafePointer[Float32, MutExternalOrigin],
19+
B: UnsafePointer[Float32, MutExternalOrigin],
20+
N: Int32,
21+
) raises:
1222
var total = N * N
1323
var threadsPerBlock: Int32 = 256
1424
var ctx = DeviceContext()
1525

1626
var blocksPerGrid = ceildiv(total, threadsPerBlock)
1727

1828
var _kernel = ctx.compile_function[copy_matrix_kernel, copy_matrix_kernel]()
19-
ctx.enqueue_function(_kernel,
20-
A, B, N,
21-
grid_dim = blocksPerGrid,
22-
block_dim = threadsPerBlock
23-
)
29+
ctx.enqueue_function(_kernel, A, B, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock)
2430

2531
ctx.synchronize()

0 commit comments

Comments
 (0)