InfiniTensor
diff --git a/‎include/infinicore/nn.hpp‎
Lines changed: 1 addition & 0 deletions b/‎include/infinicore/nn.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/infinicore/nn/layernorm.hpp‎
Lines changed: 60 additions & 0 deletions b/‎include/infinicore/nn/layernorm.hpp‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎include/infinicore/ops.hpp‎
Lines changed: 8 additions & 0 deletions b/‎include/infinicore/ops.hpp‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎include/infinicore/ops/conv2d.hpp‎
Lines changed: 38 additions & 0 deletions b/‎include/infinicore/ops/conv2d.hpp‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎include/infinicore/ops/gelu.hpp‎
Lines changed: 16 additions & 0 deletions b/‎include/infinicore/ops/gelu.hpp‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎include/infinicore/ops/gelutanh.hpp‎
Lines changed: 16 additions & 0 deletions b/‎include/infinicore/ops/gelutanh.hpp‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎include/infinicore/ops/layer_norm.hpp‎
Lines changed: 28 additions & 0 deletions b/‎include/infinicore/ops/layer_norm.hpp‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎include/infinicore/ops/quickgelu.hpp‎
Lines changed: 16 additions & 0 deletions b/‎include/infinicore/ops/quickgelu.hpp‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎include/infinicore/ops/relu.hpp‎
Lines changed: 16 additions & 0 deletions b/‎include/infinicore/ops/relu.hpp‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎include/infinicore/ops/softmax.hpp‎
Lines changed: 16 additions & 0 deletions b/‎include/infinicore/ops/softmax.hpp‎
Lines changed: 16 additions & 0 deletions
@@ -1,5 +1,6 @@
 #pragma once
 
 #include "nn/embedding.hpp"
+#include "nn/layernorm.hpp"
 #include "nn/linear.hpp"
 #include "nn/rmsnorm.hpp"
@@ -0,0 +1,60 @@
+#pragma once
+
+#include "../ops.hpp"
+#include "module.hpp"
+
+namespace infinicore::nn {
+
+/**
+ * @brief Layer Normalization
+ *
+ * Applies LayerNorm over the last dimension.
+ *
+ * Formula: y = (x - mean) / sqrt(var + eps) * weight + bias
+ */
+class LayerNorm : public Module {
+public:
+    /**
+     * @brief Construct a LayerNorm layer
+     *
+     * @param normalized_shape Size of the feature dimension to normalize (typically hidden_size)
+     * @param eps Small constant for numerical stability (default: 1e-5)
+     * @param dtype Data type for the weight/bias (default: DataType::F32)
+     * @param device Device to create the parameters on
+     */
+    LayerNorm(size_t normalized_shape,
+              double eps = 1e-5,
+              const DataType &dtype = DataType::F32,
+              const Device &device = Device());
+
+    /**
+     * @brief Forward pass: apply LayerNorm
+     *
+     * @param x Input tensor of shape (*, normalized_shape)
+     * @return Normalized tensor with same shape as input
+     */
+    Tensor forward(const Tensor &x) const;
+
+    // Module information
+    size_t normalized_shape() const { return normalized_shape_; }
+    double eps() const { return eps_; }
+    DataType dtype() const { return dtype_; }
+
+    // String representation
+    std::string extra_repr() const;
+
+    // Accessors for parameters
+    Tensor weight() const { return weight_; }
+    Tensor bias() const { return bias_; }
+
+protected:
+    INFINICORE_NN_PARAMETER(weight);
+    INFINICORE_NN_PARAMETER(bias);
+
+private:
+    size_t normalized_shape_;
+    double eps_;
+    DataType dtype_;
+};
+
+} // namespace infinicore::nn
@@ -14,26 +14,34 @@
 #include "ops/binary_cross_entropy_with_logits.hpp"
 #include "ops/causal_softmax.hpp"
 #include "ops/cdist.hpp"
+#include "ops/conv2d.hpp"
 #include "ops/cross_entropy.hpp"
 #include "ops/embedding.hpp"
 #include "ops/flash_attention.hpp"
 #include "ops/fmin.hpp"
 #include "ops/fmod.hpp"
+#include "ops/gelu.hpp"
+#include "ops/gelutanh.hpp"
 #include "ops/hardswish.hpp"
 #include "ops/hardtanh.hpp"
 #include "ops/kv_caching.hpp"
+#include "ops/layer_norm.hpp"
+#include "ops/linear.hpp"
 #include "ops/matmul.hpp"
 #include "ops/ones.hpp"
 #include "ops/paged_attention.hpp"
 #include "ops/paged_attention_prefill.hpp"
 #include "ops/paged_caching.hpp"
 #include "ops/per_tensor_dequant_i8.hpp"
 #include "ops/per_tensor_quant_i8.hpp"
+#include "ops/quickgelu.hpp"
 #include "ops/random_sample.hpp"
 #include "ops/rearrange.hpp"
 #include "ops/reciprocal.hpp"
+#include "ops/relu.hpp"
 #include "ops/rms_norm.hpp"
 #include "ops/rope.hpp"
 #include "ops/silu.hpp"
 #include "ops/silu_and_mul.hpp"
+#include "ops/softmax.hpp"
 #include "ops/swiglu.hpp"
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+#include <cstddef>
+#include <vector>
+
+namespace infinicore::op {
+class Conv2d {
+public:
+    using schema = void (*)(Tensor, Tensor, Tensor, Tensor,
+                            const size_t *, const size_t *, const size_t *, size_t);
+    static void execute(Tensor output,
+                        Tensor input,
+                        Tensor weight,
+                        Tensor bias,
+                        const size_t *pads,
+                        const size_t *strides,
+                        const size_t *dilations,
+                        size_t n);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor conv2d(Tensor input,
+              Tensor weight,
+              Tensor bias,
+              const std::vector<size_t> &pads,
+              const std::vector<size_t> &strides,
+              const std::vector<size_t> &dilations);
+void conv2d_(Tensor output,
+             Tensor input,
+             Tensor weight,
+             Tensor bias,
+             const std::vector<size_t> &pads,
+             const std::vector<size_t> &strides,
+             const std::vector<size_t> &dilations);
+} // namespace infinicore::op
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+class Gelu {
+public:
+    using schema = void (*)(Tensor, Tensor);
+    static void execute(Tensor output, Tensor input);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor gelu(Tensor input);
+void gelu_(Tensor output, Tensor input);
+} // namespace infinicore::op
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+class GeluTanh {
+public:
+    using schema = void (*)(Tensor, Tensor);
+    static void execute(Tensor output, Tensor input);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor gelu_tanh(Tensor input);
+void gelu_tanh_(Tensor output, Tensor input);
+} // namespace infinicore::op
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+class LayerNorm {
+public:
+    using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, float);
+    static void execute(Tensor output,
+                        Tensor input_standardization,
+                        Tensor input_std_deviation,
+                        Tensor input,
+                        Tensor weight,
+                        Tensor bias,
+                        float epsilon);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor layer_norm(Tensor input, Tensor weight, Tensor bias, float epsilon = 1e-5f);
+void layer_norm_(Tensor output,
+                 Tensor input_standardization,
+                 Tensor input_std_deviation,
+                 Tensor input,
+                 Tensor weight,
+                 Tensor bias,
+                 float epsilon = 1e-5f);
+} // namespace infinicore::op
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+class QuickGelu {
+public:
+    using schema = void (*)(Tensor, Tensor);
+    static void execute(Tensor output, Tensor input);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor quick_gelu(Tensor input);
+void quick_gelu_(Tensor output, Tensor input);
+} // namespace infinicore::op
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+class Relu {
+public:
+    using schema = void (*)(Tensor, Tensor);
+    static void execute(Tensor output, Tensor input);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor relu(Tensor input);
+void relu_(Tensor output, Tensor input);
+} // namespace infinicore::op
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+class Softmax {
+public:
+    using schema = void (*)(Tensor, Tensor, int);
+    static void execute(Tensor output, Tensor input, int axis);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor softmax(Tensor input, int axis = -1);
+void softmax_(Tensor output, Tensor input, int axis = -1);
+} // namespace infinicore::op