Skip to content

Commit 9a9c134

Browse files
Upgrade Mojo starters to 26.2 (#238)
1 parent 1044bc2 commit 9a9c134

78 files changed

Lines changed: 459 additions & 441 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

challenges/easy/19_reverse_array/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn reverse_array_kernel(input: UnsafePointer[Float32], N: Int32):
6+
fn reverse_array_kernel(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
77
pass
88

99
# input is a device pointer (i.e. pointer to memory on the GPU)
1010
@export
11-
def solve(input: UnsafePointer[Float32], N: Int32):
11+
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1212
var threadsPerBlock: Int32 = 256
1313
var ctx = DeviceContext()
1414

1515
var blocksPerGrid = ceildiv(N, threadsPerBlock)
1616

17-
ctx.enqueue_function[reverse_array_kernel](
17+
var _kernel = ctx.compile_function[reverse_array_kernel, reverse_array_kernel]()
18+
ctx.enqueue_function(_kernel,
1819
input, N,
1920
grid_dim = blocksPerGrid,
2021
block_dim = threadsPerBlock

challenges/easy/1_vector_add/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn vector_add_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], N: Int32):
6+
fn vector_add_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
77
pass
88

99
# A, B, C are device pointers (i.e. pointers to memory on the GPU)
1010
@export
11-
def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], N: Int32):
11+
fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1212
var BLOCK_SIZE: Int32 = 256
1313
var ctx = DeviceContext()
1414
var num_blocks = ceildiv(N, BLOCK_SIZE)
1515

16-
ctx.enqueue_function[vector_add_kernel](
16+
var _kernel = ctx.compile_function[vector_add_kernel, vector_add_kernel]()
17+
ctx.enqueue_function(_kernel,
1718
A, B, C, N,
1819
grid_dim = num_blocks,
1920
block_dim = BLOCK_SIZE

challenges/easy/21_relu/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn relu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
6+
fn relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
77
pass
88

99
# input, output are device pointers (i.e. pointers to memory on the GPU)
1010
@export
11-
def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
11+
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1212
var threadsPerBlock: Int32 = 256
1313
var ctx = DeviceContext()
1414

1515
var blocksPerGrid = ceildiv(N, threadsPerBlock)
1616

17-
ctx.enqueue_function[relu_kernel](
17+
var _kernel = ctx.compile_function[relu_kernel, relu_kernel]()
18+
ctx.enqueue_function(_kernel,
1819
input, output, N,
1920
grid_dim = blocksPerGrid,
2021
block_dim = threadsPerBlock

challenges/easy/23_leaky_relu/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn leaky_relu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
6+
fn leaky_relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
77
pass
88

99
# input, output are device pointers (i.e. pointers to memory on the GPU)
1010
@export
11-
def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
11+
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1212
var threadsPerBlock: Int32 = 256
1313
var ctx = DeviceContext()
1414

1515
var blocksPerGrid = ceildiv(N, threadsPerBlock)
1616

17-
ctx.enqueue_function[leaky_relu_kernel](
17+
var _kernel = ctx.compile_function[leaky_relu_kernel, leaky_relu_kernel]()
18+
ctx.enqueue_function(_kernel,
1819
input, output, N,
1920
grid_dim = blocksPerGrid,
2021
block_dim = threadsPerBlock

challenges/easy/24_rainbow_table/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

66
fn fnv1a_hash(input: Int32) -> UInt32:
77
alias FNV_PRIME: UInt32 = 16777619
@@ -15,19 +15,20 @@ fn fnv1a_hash(input: Int32) -> UInt32:
1515

1616
return hash
1717

18-
fn fnv1a_hash_kernel(input: UnsafePointer[Int32], output: UnsafePointer[UInt32],
18+
fn fnv1a_hash_kernel(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin],
1919
N: Int32, R: Int32):
2020
pass
2121

2222
# input, output are device pointers (i.e. pointers to memory on the GPU)
2323
@export
24-
def solve(input: UnsafePointer[Int32], output: UnsafePointer[UInt32], N: Int32, R: Int32):
24+
fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], N: Int32, R: Int32) raises:
2525
var threadsPerBlock: Int32 = 256
2626
var ctx = DeviceContext()
2727

2828
var blocksPerGrid = ceildiv(N, threadsPerBlock)
2929

30-
ctx.enqueue_function[fnv1a_hash_kernel](
30+
var _kernel = ctx.compile_function[fnv1a_hash_kernel, fnv1a_hash_kernel]()
31+
ctx.enqueue_function(_kernel,
3132
input, output, N, R,
3233
grid_dim = blocksPerGrid,
3334
block_dim = threadsPerBlock

challenges/easy/2_matrix_multiplication/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math.math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn matrix_multiplication_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], M: Int32, N: Int32, K: Int32):
6+
fn matrix_multiplication_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32):
77
pass
88

99
# A, B, C are device pointers (i.e. pointers to memory on the GPU)
1010
@export
11-
def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], M: Int32, N: Int32, K: Int32):
11+
fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32) raises:
1212
var BLOCK_SIZE: Int32 = 16
1313
var ctx = DeviceContext()
1414

1515
var grid_dim_x = ceildiv(K, BLOCK_SIZE)
1616
var grid_dim_y = ceildiv(M, BLOCK_SIZE)
1717

18-
ctx.enqueue_function[matrix_multiplication_kernel](
18+
var _kernel = ctx.compile_function[matrix_multiplication_kernel, matrix_multiplication_kernel]()
19+
ctx.enqueue_function(_kernel,
1920
A, B, C, M, N, K,
2021
grid_dim = (grid_dim_x, grid_dim_y),
2122
block_dim = (BLOCK_SIZE, BLOCK_SIZE)

challenges/easy/31_matrix_copy/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn copy_matrix_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], N: Int32):
6+
fn copy_matrix_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
77
pass
88

99
# A, B are device pointers (i.e. pointers to memory on the GPU)
1010
@export
11-
def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], N: Int32):
11+
fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1212
var total = N * N
1313
var threadsPerBlock: Int32 = 256
1414
var ctx = DeviceContext()
1515

1616
var blocksPerGrid = ceildiv(total, threadsPerBlock)
1717

18-
ctx.enqueue_function[copy_matrix_kernel](
18+
var _kernel = ctx.compile_function[copy_matrix_kernel, copy_matrix_kernel]()
19+
ctx.enqueue_function(_kernel,
1920
A, B, N,
2021
grid_dim = blocksPerGrid,
2122
block_dim = threadsPerBlock

challenges/easy/3_matrix_transpose/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn matrix_transpose_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], rows: Int32, cols: Int32):
6+
fn matrix_transpose_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32):
77
pass
88

99
# input, output are device pointers (i.e. pointers to memory on the GPU)
1010
@export
11-
def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], rows: Int32, cols: Int32):
11+
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32) raises:
1212
var BLOCK_SIZE: Int32 = 32
1313
var ctx = DeviceContext()
1414

1515
var grid_dim_x = ceildiv(cols, BLOCK_SIZE)
1616
var grid_dim_y = ceildiv(rows, BLOCK_SIZE)
1717

18-
ctx.enqueue_function[matrix_transpose_kernel](
18+
var _kernel = ctx.compile_function[matrix_transpose_kernel, matrix_transpose_kernel]()
19+
ctx.enqueue_function(_kernel,
1920
input, output, rows, cols,
2021
grid_dim = (grid_dim_x, grid_dim_y),
2122
block_dim = (BLOCK_SIZE, BLOCK_SIZE)

challenges/easy/52_silu/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

6-
fn silu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
6+
fn silu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
77
pass
88

99
# input, output are device pointers
1010
@export
11-
def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
11+
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1212
var threadsPerBlock: Int32 = 256
1313
var ctx = DeviceContext()
1414

1515
var blocksPerGrid = ceildiv(N, threadsPerBlock)
1616

17-
ctx.enqueue_function[silu_kernel](
17+
var _kernel = ctx.compile_function[silu_kernel, silu_kernel]()
18+
ctx.enqueue_function(_kernel,
1819
input, output, N,
1920
grid_dim = blocksPerGrid,
2021
block_dim = threadsPerBlock

challenges/easy/54_swiglu/starter/starter.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
1-
from gpu.host import DeviceContext
2-
from gpu.id import block_dim, block_idx, thread_idx
3-
from memory import UnsafePointer
4-
from math import ceildiv
1+
from std.gpu.host import DeviceContext
2+
from std.gpu import block_dim, block_idx, thread_idx
3+
from std.memory import UnsafePointer
4+
from std.math import ceildiv
55

66

7-
fn swiglu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
7+
fn swiglu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32):
88
pass
99

1010

1111
# input, output are device pointers
1212
@export
13-
def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32):
13+
fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises:
1414
var BLOCK_SIZE: Int32 = 256
1515
var ctx = DeviceContext()
1616
var num_blocks = ceildiv(N // 2, BLOCK_SIZE)
1717

18-
ctx.enqueue_function[swiglu_kernel](
18+
var _kernel = ctx.compile_function[swiglu_kernel, swiglu_kernel]()
19+
ctx.enqueue_function(_kernel,
1920
input, output, N,
2021
grid_dim = num_blocks,
2122
block_dim = BLOCK_SIZE

0 commit comments

Comments
 (0)