Skip to content

Commit 2ae3b59

Browse files
committed
Finish T1-1-4: sum、topk、var、var_mean、all
1 parent b8e214f commit 2ae3b59

32 files changed

Lines changed: 2123 additions & 15 deletions

File tree

include/infinicore/ops/all.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
8+
class AllGlobal {
9+
public:
10+
using schema = void (*)(Tensor, Tensor);
11+
static void execute(Tensor input, Tensor output);
12+
static common::OpDispatcher<schema> &dispatcher();
13+
};
14+
15+
Tensor all_global(Tensor input);
16+
void all_global_(Tensor input, Tensor output);
17+
18+
class AllReduce {
19+
public:
20+
using schema = void (*)(Tensor, Tensor, int, bool);
21+
static void execute(Tensor input, Tensor output, int dim, bool keepdim);
22+
static common::OpDispatcher<schema> &dispatcher();
23+
};
24+
25+
Tensor all_reduce(Tensor input, int dim, bool keepdim);
26+
void all_reduce_(Tensor input, Tensor output, int dim, bool keepdim);
27+
28+
} // namespace infinicore::op

include/infinicore/ops/sum.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
8+
class SumGlobal {
9+
public:
10+
using schema = void (*)(Tensor, Tensor);
11+
static void execute(Tensor input, Tensor output);
12+
static common::OpDispatcher<schema> &dispatcher();
13+
};
14+
15+
Tensor sum_global(Tensor input);
16+
void sum_global_(Tensor input, Tensor output);
17+
18+
class SumReduce {
19+
public:
20+
using schema = void (*)(Tensor, Tensor, int, bool);
21+
static void execute(Tensor input, Tensor output, int dim, bool keepdim);
22+
static common::OpDispatcher<schema> &dispatcher();
23+
};
24+
25+
Tensor sum_reduce(Tensor input, int dim, bool keepdim);
26+
void sum_reduce_(Tensor input, Tensor output, int dim, bool keepdim);
27+
28+
} // namespace infinicore::op

include/infinicore/ops/topk.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#pragma once
2+
#include "../device.hpp"
3+
#include "common/op.hpp"
4+
#include <tuple>
5+
6+
namespace infinicore::op {
7+
8+
class TopK {
9+
public:
10+
using schema = void (*)(Tensor, Tensor, Tensor, int, int, bool, bool);
11+
static void execute(Tensor input, Tensor values, Tensor indices, int k, int dim, bool largest, bool sorted);
12+
static common::OpDispatcher<schema> &dispatcher();
13+
};
14+
15+
std::tuple<Tensor, Tensor> topk(Tensor input, int k, int dim, bool largest, bool sorted);
16+
void topk_(Tensor input, Tensor values, Tensor indices, int k, int dim, bool largest, bool sorted);
17+
18+
} // namespace infinicore::op

include/infinicore/ops/var.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
8+
class VarGlobal {
9+
public:
10+
using schema = void (*)(Tensor, Tensor, int);
11+
static void execute(Tensor input, Tensor output, int correction);
12+
static common::OpDispatcher<schema> &dispatcher();
13+
};
14+
15+
Tensor var_global(Tensor input, int correction);
16+
void var_global_(Tensor input, Tensor output, int correction);
17+
18+
class VarReduce {
19+
public:
20+
using schema = void (*)(Tensor, Tensor, int, int, bool);
21+
static void execute(Tensor input, Tensor output, int dim, int correction, bool keepdim);
22+
static common::OpDispatcher<schema> &dispatcher();
23+
};
24+
25+
Tensor var_reduce(Tensor input, int dim, int correction, bool keepdim);
26+
void var_reduce_(Tensor input, Tensor output, int dim, int correction, bool keepdim);
27+
28+
} // namespace infinicore::op
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
#include <tuple>
6+
7+
namespace infinicore::op {
8+
9+
class VarMeanGlobal {
10+
public:
11+
using schema = void (*)(Tensor, Tensor, Tensor, int);
12+
static void execute(Tensor input, Tensor out_var, Tensor out_mean, int correction);
13+
static common::OpDispatcher<schema> &dispatcher();
14+
};
15+
16+
std::tuple<Tensor, Tensor> var_mean_global(Tensor input, int correction);
17+
void var_mean_global_(Tensor input, Tensor out_var, Tensor out_mean, int correction);
18+
19+
class VarMeanReduce {
20+
public:
21+
using schema = void (*)(Tensor, Tensor, Tensor, int, int, bool);
22+
static void execute(Tensor input, Tensor out_var, Tensor out_mean, int dim, int correction, bool keepdim);
23+
static common::OpDispatcher<schema> &dispatcher();
24+
};
25+
26+
std::tuple<Tensor, Tensor> var_mean_reduce(Tensor input, int dim, int correction, bool keepdim);
27+
void var_mean_reduce_(Tensor input, Tensor out_var, Tensor out_mean, int dim, int correction, bool keepdim);
28+
29+
} // namespace infinicore::op

python/infinicore/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,18 @@
4040
uint8,
4141
)
4242
from infinicore.ops.add import add
43+
from infinicore.ops.all import all
4344
from infinicore.ops.attention import attention
4445
from infinicore.ops.matmul import matmul
4546
from infinicore.ops.mul import mul
4647
from infinicore.ops.narrow import narrow
4748
from infinicore.ops.rearrange import rearrange
4849
from infinicore.ops.squeeze import squeeze
50+
from infinicore.ops.sum import sum
51+
from infinicore.ops.topk import topk
4952
from infinicore.ops.unsqueeze import unsqueeze
53+
from infinicore.ops.var import var
54+
from infinicore.ops.var_mean import var_mean
5055
from infinicore.tensor import (
5156
Tensor,
5257
empty,
@@ -119,6 +124,11 @@
119124
"strided_empty",
120125
"strided_from_blob",
121126
"zeros",
127+
"sum",
128+
"topk",
129+
"var",
130+
"var_mean",
131+
"all",
122132
]
123133

124134
use_ntops = False

python/infinicore/ops/all.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import infinicore
2+
from infinicore.lib import _infinicore
3+
from infinicore.tensor import Tensor
4+
5+
6+
def all(
7+
input: Tensor,
8+
dim: int | tuple[int] | list[int] | None = None,
9+
keepdim: bool = False,
10+
*,
11+
out=None,
12+
) -> Tensor:
13+
r"""Computes the logical AND of all elements."""
14+
15+
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
16+
return infinicore.ntops.torch.all(input, dim=dim, keepdim=keepdim, out=out)
17+
18+
if dim is None:
19+
if out is None:
20+
return Tensor(_infinicore.all_global(input._underlying))
21+
_infinicore.all_global_(input._underlying, out._underlying)
22+
return out
23+
24+
else:
25+
if isinstance(dim, int):
26+
dims = [dim]
27+
else:
28+
dims = list(dim)
29+
30+
ndim = input.ndim
31+
normalized_dims = sorted(
32+
[d if d >= 0 else d + ndim for d in dims], reverse=True
33+
)
34+
35+
current_input = input
36+
37+
if len(normalized_dims) == 1 and out is not None:
38+
_infinicore.all_reduce_(
39+
current_input._underlying, out._underlying, normalized_dims[0], keepdim
40+
)
41+
return out
42+
43+
for i, target_dim in enumerate(normalized_dims):
44+
is_last_step = i == len(normalized_dims) - 1
45+
46+
if is_last_step and out is not None:
47+
_infinicore.all_reduce_(
48+
current_input._underlying, out._underlying, target_dim, keepdim
49+
)
50+
return out
51+
else:
52+
res_ptr = _infinicore.all_reduce(
53+
current_input._underlying, target_dim, keepdim
54+
)
55+
current_input = Tensor(res_ptr)
56+
57+
return current_input

python/infinicore/ops/sum.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import infinicore
2+
from infinicore.lib import _infinicore
3+
from infinicore.tensor import Tensor
4+
5+
6+
def sum(
7+
input: Tensor,
8+
dim: int | tuple[int] | list[int] | None = None,
9+
keepdim=False,
10+
*,
11+
dtype=None,
12+
out=None,
13+
) -> Tensor:
14+
r"""Apply the sum function."""
15+
16+
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
17+
return infinicore.ntops.torch.sum(
18+
input, dim, keepdim=keepdim, dtype=dtype, out=out
19+
)
20+
21+
if dim is None:
22+
if out is None:
23+
return Tensor(_infinicore.sum_global(input._underlying))
24+
_infinicore.sum_global_(input._underlying, out._underlying)
25+
return out
26+
27+
else:
28+
target_dim = dim
29+
if isinstance(target_dim, (tuple, list)):
30+
if len(target_dim) == 1:
31+
target_dim = target_dim[0]
32+
if out is None:
33+
return Tensor(
34+
_infinicore.sum_reduce(input._underlying, target_dim, keepdim)
35+
)
36+
37+
_infinicore.sum_reduce_(input._underlying, out._underlying, target_dim, keepdim)
38+
return out

python/infinicore/ops/topk.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import infinicore
2+
from infinicore.lib import _infinicore
3+
from infinicore.tensor import Tensor
4+
5+
6+
def topk(
7+
input: Tensor,
8+
k: int,
9+
dim: int = -1,
10+
largest: bool = True,
11+
sorted: bool = True,
12+
*,
13+
out=None,
14+
):
15+
r"""Returns the k largest elements of the given input tensor along a given dimension."""
16+
17+
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
18+
return infinicore.ntops.torch.topk(input, k, dim, largest, sorted, out=out)
19+
20+
if out is None:
21+
res_values, res_indices = _infinicore.topk(
22+
input._underlying, k, dim, largest, sorted
23+
)
24+
return Tensor(res_values), Tensor(res_indices)
25+
else:
26+
if not isinstance(out, (tuple, list)) or len(out) != 2:
27+
raise ValueError("out argument must be a tuple of (values, indices)")
28+
29+
out_values, out_indices = out
30+
_infinicore.topk_(
31+
input._underlying,
32+
out_values._underlying,
33+
out_indices._underlying,
34+
k,
35+
dim,
36+
largest,
37+
sorted,
38+
)
39+
return out_values, out_indices

python/infinicore/ops/var.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import infinicore
2+
from infinicore.lib import _infinicore
3+
from infinicore.tensor import Tensor
4+
5+
6+
def var(
7+
input: Tensor,
8+
dim: int | tuple[int] | list[int] | None = None,
9+
unbiased: bool | None = None,
10+
correction: int | None = None,
11+
keepdim: bool = False,
12+
*,
13+
dtype=None,
14+
out=None,
15+
) -> Tensor:
16+
r"""Returns the variance of the input tensor."""
17+
18+
if unbiased is not None:
19+
if correction is not None and correction != (1 if unbiased else 0):
20+
raise ValueError(
21+
"Cannot specify both 'unbiased' and 'correction' with conflicting values."
22+
)
23+
final_correction = 1 if unbiased else 0
24+
else:
25+
final_correction = correction if correction is not None else 1
26+
27+
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
28+
return infinicore.ntops.torch.var(
29+
input,
30+
dim=dim,
31+
correction=final_correction,
32+
keepdim=keepdim,
33+
dtype=dtype,
34+
out=out,
35+
)
36+
37+
if dim is None:
38+
if out is None:
39+
return Tensor(_infinicore.var_global(input._underlying, final_correction))
40+
_infinicore.var_global_(input._underlying, out._underlying, final_correction)
41+
return out
42+
else:
43+
target_dim = dim
44+
if isinstance(target_dim, (tuple, list)):
45+
if len(target_dim) == 1:
46+
target_dim = target_dim[0]
47+
48+
if out is None:
49+
return Tensor(
50+
_infinicore.var_reduce(
51+
input._underlying, target_dim, final_correction, keepdim
52+
)
53+
)
54+
55+
_infinicore.var_reduce_(
56+
input._underlying, out._underlying, target_dim, final_correction, keepdim
57+
)
58+
return out

0 commit comments

Comments
 (0)