optimization on contiguous parameters

theoheimel · theoheimel · commit 2b38d34678b4 · 2026-04-11T15:23:02.000+02:00
diff --git a/madspace/include/madspace/driver/adam_optimizer.h b/madspace/include/madspace/driver/adam_optimizer.h
@@ -37,9 +37,10 @@ class AdamOptimizer {
     double _beta1;
     double _beta2;
     double _eps;
-    TensorVec _parameters;
-    TensorVec _exp_avgs;
-    TensorVec _exp_avg_sqs;
+    Tensor _one;
+    Tensor _parameter;
+    Tensor _exp_avg;
+    Tensor _exp_avg_sq;
     TypeVec _input_types;
 };
 
diff --git a/madspace/include/madspace/driver/context.h b/madspace/include/madspace/driver/context.h
@@ -128,6 +128,7 @@ class Context {
     std::vector<std::string> global_names() const;
     void delete_global(const std::string& name);
     void copy_globals_from(Context& context);
+    Tensor reallocate_globals_contiguously(const std::vector<std::string>& names);
     const MatrixElementApi& matrix_element(std::size_t index) const;
     void save_globals(const std::string& dir) const;
     void load_globals(const std::string& dir);
diff --git a/madspace/include/madspace/driver/tensor.h b/madspace/include/madspace/driver/tensor.h
@@ -51,6 +51,13 @@ class Sizes {
     const std::size_t* data() const { return &_values[0]; }
     std::size_t& back() { return _values[_size - 1]; }
     const std::size_t& back() const { return _values[_size - 1]; }
+    std::size_t product() const {
+        std::size_t size = 1;
+        for (std::size_t dim_size : *this) {
+            size *= dim_size;
+        }
+        return size;
+    }
 
 private:
     std::size_t _values[max_size];
@@ -485,14 +492,7 @@ class Tensor {
         }
     }
 
-    std::size_t byte_size() const {
-        check_impl();
-        std::size_t size = dtype_size();
-        for (auto dim_size : impl->shape) {
-            size *= dim_size;
-        }
-        return size;
-    }
+    std::size_t byte_size() const { return dtype_size() * shape().product(); }
 
     void reset() {
         if (impl == nullptr) {
@@ -517,7 +517,9 @@ class Tensor {
     std::vector<Tensor> unstack(std::size_t axis) const;
     Tensor unsqueeze(std::size_t axis) const;
     Tensor expand(const Sizes& shape) const;
+    Tensor reshape(const Sizes& shape) const;
     Tensor factor_dim(std::size_t axis, std::size_t factor);
+    std::vector<Tensor> split_and_reshape(const std::vector<Sizes>& shapes) const;
 
     template <typename D>
     Tensor cpu(const D& device) const {
diff --git a/madspace/src/driver/adam_optimizer.cpp b/madspace/src/driver/adam_optimizer.cpp
@@ -15,21 +15,26 @@ AdamOptimizer::AdamOptimizer(
     double eps
 ) :
     _context(context),
-    _runtime(build_runtime(function, context)),
     _learning_rate(learning_rate),
     _schedule(schedule),
     _step(0),
     _step_count(step_count),
     _beta1(beta1),
     _beta2(beta2),
-    _eps(eps) {
+    _eps(eps),
+    _one(1.0, context->device()) {
     DevicePtr device = context->device();
+    std::vector<std::string> param_names;
     for (auto& [name, value] : function.globals()) {
-        Tensor global = context->global(name);
-        _parameters.push_back(global);
-        _exp_avgs.emplace_back(global.dtype(), global.shape(), device).zero();
-        _exp_avg_sqs.emplace_back(global.dtype(), global.shape(), device).zero();
+        if (context->global_requires_grad(name)) {
+            param_names.push_back(name);
+        }
     }
+    _parameter = context->reallocate_globals_contiguously(param_names);
+    _runtime = build_runtime(function, context);
+    _parameter = Tensor(_parameter.dtype(), _parameter.shape(), _parameter.device());
+    _exp_avg = Tensor(_parameter.dtype(), _parameter.shape(), _parameter.device());
+    _exp_avg_sq = Tensor(_parameter.dtype(), _parameter.shape(), _parameter.device());
     _input_types.reserve(function.inputs().size());
     for (auto& input : function.inputs()) {
         _input_types.push_back(input.type);
@@ -47,20 +52,20 @@ TensorVec AdamOptimizer::step(const TensorVec& inputs) {
         _runtime->run_with_grad(inputs, std::vector<bool>(inputs.size(), false));
     TensorVec output_grads(outputs.size());
     DevicePtr device = _context->device();
-    output_grads.at(0) = Tensor(1.0, device);
+    output_grads.at(0) = _one;
     auto [input_grads, global_grads] =
         _runtime->run_backward(output_grads, stored_locals, eval_grad);
-    /*device->adam_step(
-        global_grads,
-        _parameters,
-        _exp_avgs,
-        _exp_avg_sqs,
+    device->adam_step(
+        global_grads.at(0),
+        _parameter,
+        _exp_avg,
+        _exp_avg_sq,
         step_size,
         _beta1,
         _beta2,
         _eps,
         bias_corr2_sqrt
-    );*/
+    );
     return outputs;
 }
 
diff --git a/madspace/src/driver/context.cpp b/madspace/src/driver/context.cpp
@@ -195,6 +195,38 @@ void Context::copy_globals_from(Context& context) {
     }
 }
 
+Tensor Context::reallocate_globals_contiguously(const std::vector<std::string>& names) {
+    std::vector<Sizes> shapes;
+    shapes.reserve(names.size());
+    std::size_t total_size = 0;
+    DataType dtype;
+    for (bool first = true; auto& name : names) {
+        auto& glob = _globals.at(name).first;
+        if (!glob.is_only_reference()) {
+            throw std::runtime_error(
+                std::format(
+                    "Global {}: cannot reallocate as it is externally referenced", name
+                )
+            );
+        }
+        if (first) {
+            dtype = glob.dtype();
+            first = false;
+        } else if (dtype != glob.dtype()) {
+            throw std::runtime_error(
+                std::format("Global {}: incompatible dtype", name)
+            );
+        }
+        shapes.push_back(glob.shape());
+        total_size += glob.shape().product();
+    }
+    Tensor parent(dtype, {total_size}, device());
+    for (auto [name, tensor] : zip(names, parent.split_and_reshape(shapes))) {
+        _globals.at(name).first = tensor;
+    }
+    return parent;
+}
+
 const MatrixElementApi& Context::matrix_element(std::size_t index) const {
     if (index >= _matrix_elements.size()) {
         throw std::runtime_error("Matrix element index out of bounds");
diff --git a/madspace/src/driver/tensor.cpp b/madspace/src/driver/tensor.cpp
@@ -120,6 +120,31 @@ Tensor Tensor::expand(const Sizes& shape) const {
     });
 }
 
+Tensor Tensor::reshape(const Sizes& new_shape) const {
+    check_impl();
+    if (!is_contiguous()) {
+        throw std::runtime_error("Tensor must be contiguous");
+    }
+    if (new_shape.product() != shape().product()) {
+        throw std::runtime_error("Incompatible shapes");
+    }
+    Tensor ret(new Tensor::TensorImpl{
+        impl->dtype,
+        new_shape,
+        impl->device,
+        impl->data,
+        false,
+        std::nullopt,
+        impl,
+        1,
+        {},
+        impl->offset,
+        0
+    });
+    ret.init_stride();
+    return ret;
+}
+
 Tensor Tensor::factor_dim(std::size_t axis, std::size_t factor) {
     check_impl();
     auto new_dim = impl->shape.size() + 1;
@@ -157,6 +182,28 @@ Tensor Tensor::factor_dim(std::size_t axis, std::size_t factor) {
     });
 }
 
+std::vector<Tensor> Tensor::split_and_reshape(const std::vector<Sizes>& shapes) const {
+    check_impl();
+    if (!is_contiguous() || shape().size() != 1) {
+        throw std::runtime_error(
+            "split_and_reshape is only available for single-dimensional contiguous "
+            "tensors"
+        );
+    }
+    SizeVec size_prods;
+    size_prods.reserve(shapes.size());
+    for (auto& shape : shapes) {
+        size_prods.push_back(shape.product());
+    }
+    TensorVec split_tensors = split(0, size_prods);
+    TensorVec ret;
+    ret.reserve(shapes.size());
+    for (auto [tensor, shape] : zip(split_tensors, shapes)) {
+        ret.push_back(tensor.reshape(shape));
+    }
+    return ret;
+}
+
 std::size_t Tensor::init_stride() {
     std::size_t stride_prod = 1;
     bool first = true;