-
Notifications
You must be signed in to change notification settings - Fork 80
Expand file tree
/
Copy pathchallenge.py
More file actions
139 lines (120 loc) · 4.46 KB
/
challenge.py
File metadata and controls
139 lines (120 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import ctypes
from typing import Any, List, Dict
import torch
from core.challenge_base import ChallengeBase
class Challenge(ChallengeBase):
def __init__(self):
super().__init__(
name="Max 2D Subarray Sum",
atol=1e-05,
rtol=1e-05,
num_gpus=1,
access_tier="free"
)
def reference_impl(self, input: torch.Tensor, output: torch.Tensor, N: int, window_size: int):
# Validate input types and shapes
assert input.shape == (N, N)
assert output.shape == (1,)
assert input.dtype == torch.int32
assert output.dtype == torch.int32
psum = input.cumsum(dim=0).cumsum(dim=1)
padded = torch.zeros((N+1, N+1), dtype=torch.int32)
padded[1:, 1:] = psum
top_left = padded[:-window_size, :-window_size]
top_right = padded[:-window_size, window_size:]
bottom_left = padded[window_size:, :-window_size]
bottom_right = padded[window_size:, window_size:]
window_sums = bottom_right - top_right - bottom_left + top_left
max_sum = torch.max(window_sums)
output[0] = max_sum
def get_solve_signature(self) -> Dict[str, Any]:
return {
"input": ctypes.POINTER(ctypes.c_int),
"output": ctypes.POINTER(ctypes.c_int),
"N": ctypes.c_int,
"window_size": ctypes.c_int
}
def generate_example_test(self) -> Dict[str, Any]:
dtype = torch.int32
input = torch.tensor([[1, 2, 3], [4, 5, 1], [5, 1, 7]], device="cuda", dtype=dtype)
output = torch.empty(1, device="cuda", dtype=dtype)
return {
"input": input,
"output": output,
"N": 3,
"window_size": 2
}
def generate_functional_test(self) -> List[Dict[str, Any]]:
dtype = torch.int32
tests = []
# basic_example
tests.append({
"input": torch.tensor([[-1, -2, -3], [-4, -5, -1], [-5, -1, -7]], device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 3,
"window_size": 2
})
# all_same_value
tests.append({
"input": torch.tensor([[2]*16] * 16, device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 16,
"window_size": 16
})
tests.append({
"input": torch.tensor([[2]*16] * 16, device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 16,
"window_size": 15
})
tests.append({
"input": torch.tensor([[2]*16] * 16, device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 16,
"window_size": 1
})
# all_minus_value
tests.append({
"input": torch.tensor([[-10]*10]*10, device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 10,
"window_size": 5
})
tests.append({
"input": torch.randint(-10, 0, (123, 123), device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 123,
"window_size": 7
})
# increasing_sequence
tests.append({
"input": torch.randint(-10, 11, (123, 123), device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 123,
"window_size": 7
})
# medium_size
tests.append({
"input": torch.randint(-10, 11, (1000, 1000), device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 1000,
"window_size": 476
})
# large_size
tests.append({
"input": torch.randint(-10, 11, (3000, 3000), device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 3000,
"window_size": 2011
})
return tests
def generate_performance_test(self) -> Dict[str, Any]:
dtype = torch.int32
input = torch.randint(-10, 11, (5000, 5000), device="cuda", dtype=dtype)
output = torch.empty(1, device="cuda", dtype=dtype)
return {
"input": input,
"output": output,
"N": 5000,
"window_size": 2500
}