Skip to content

Commit 195a4b0

Browse files
Transurgeonclaude
andauthored
adds constant offset argument in linear op (#34)
* adds constant offset argument in linear op * Remove has_offset parameter from new_linear, use null check instead Address PR feedback: - Remove license header from test file (not used elsewhere in diff engine) - Remove redundant has_offset parameter - just check if b is NULL - More idiomatic C pattern for optional parameters Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 9571bc5 commit 195a4b0

15 files changed

Lines changed: 297 additions & 26 deletions

File tree

include/affine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "subexpr.h"
66
#include "utils/CSR_Matrix.h"
77

8-
expr *new_linear(expr *u, const CSR_Matrix *A);
8+
expr *new_linear(expr *u, const CSR_Matrix *A, const double *b);
99

1010
expr *new_add(expr *left, expr *right);
1111
expr *new_neg(expr *child);

include/subexpr.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@ struct int_double_pair;
1010

1111
/* Type-specific expression structures that "inherit" from expr */
1212

13-
/* Linear operator: y = A * x */
13+
/* Linear operator: y = A * x + b */
1414
typedef struct linear_op_expr
1515
{
1616
expr base;
1717
CSC_Matrix *A_csc;
1818
CSR_Matrix *A_csr;
19+
double *b; /* constant offset vector (NULL if no offset) */
1920
} linear_op_expr;
2021

2122
/* Power: y = x^p */

python/atoms/linear.h

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args)
77
{
88
PyObject *child_capsule;
99
PyObject *data_obj, *indices_obj, *indptr_obj;
10+
PyObject *b_obj = Py_None; /* Optional offset vector */
1011
int m, n;
11-
if (!PyArg_ParseTuple(args, "OOOOii", &child_capsule, &data_obj, &indices_obj,
12-
&indptr_obj, &m, &n))
12+
13+
/* Accept optional b array: (child, data, indices, indptr, m, n[, b]) */
14+
if (!PyArg_ParseTuple(args, "OOOOii|O", &child_capsule, &data_obj, &indices_obj,
15+
&indptr_obj, &m, &n, &b_obj))
1316
{
1417
return NULL;
1518
}
@@ -46,8 +49,26 @@ static PyObject *py_make_linear(PyObject *self, PyObject *args)
4649
Py_DECREF(indices_array);
4750
Py_DECREF(indptr_array);
4851

49-
expr *node = new_linear(child, A);
52+
/* Parse optional b offset vector */
53+
double *b_data = NULL;
54+
PyArrayObject *b_array = NULL;
55+
56+
if (b_obj != Py_None)
57+
{
58+
b_array = (PyArrayObject *) PyArray_FROM_OTF(b_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
59+
if (!b_array)
60+
{
61+
free_csr_matrix(A);
62+
return NULL;
63+
}
64+
b_data = (double *) PyArray_DATA(b_array);
65+
}
66+
67+
expr *node = new_linear(child, A, b_data);
68+
69+
/* Clean up */
5070
free_csr_matrix(A);
71+
Py_XDECREF(b_array);
5172

5273
if (!node)
5374
{
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
"""Tests for linear_op with constant offset: A @ x + b.
2+
3+
Tests verify that linear_op correctly handles:
4+
1. Forward pass: y = A @ x + b
5+
2. Gradient computation via chain rule
6+
"""
7+
8+
import numpy as np
9+
import dnlp_diff_engine as de
10+
11+
12+
def test_linear_op_with_offset_forward():
13+
"""Test linear_op(x, A, b) computes A @ x + b correctly in forward pass.
14+
15+
Note: linear_op is an internal expression type and must be wrapped in
16+
another expression (like log) to be used as an objective.
17+
"""
18+
n_vars = 2
19+
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)
20+
21+
# A @ x + b where A = [[1, 1]], b = [5]
22+
# Should compute x[0] + x[1] + 5
23+
A_data = np.array([1.0, 1.0])
24+
A_indices = np.array([0, 1], dtype=np.int32)
25+
A_indptr = np.array([0, 2], dtype=np.int32)
26+
b = np.array([5.0])
27+
28+
linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
29+
# Wrap in log to create a valid objective
30+
log_expr = de.make_log(linear_with_offset)
31+
32+
prob = de.make_problem(log_expr, [])
33+
de.problem_init_derivatives(prob)
34+
35+
# Test at u = [2.0, 3.0]
36+
u = np.array([2.0, 3.0])
37+
38+
# Forward: log(2 + 3 + 5) = log(10)
39+
obj = de.problem_objective_forward(prob, u)
40+
assert np.isclose(obj, np.log(10.0)), f"Expected log(10), got {obj}"
41+
42+
43+
def test_linear_op_with_offset_gradient():
44+
"""Test gradient of log(A @ x + b)."""
45+
n_vars = 2
46+
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)
47+
48+
# A @ x + b where A = [[1, 1]], b = [5]
49+
# log(x[0] + x[1] + 5)
50+
A_data = np.array([1.0, 1.0])
51+
A_indices = np.array([0, 1], dtype=np.int32)
52+
A_indptr = np.array([0, 2], dtype=np.int32)
53+
b = np.array([5.0])
54+
55+
linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
56+
log_expr = de.make_log(linear_with_offset)
57+
58+
prob = de.make_problem(log_expr, [])
59+
de.problem_init_derivatives(prob)
60+
61+
# Test at u = [2.0, 3.0]
62+
u = np.array([2.0, 3.0])
63+
64+
# Forward: log(2 + 3 + 5) = log(10)
65+
obj = de.problem_objective_forward(prob, u)
66+
assert np.isclose(obj, np.log(10.0))
67+
68+
# Gradient: d/dx log(x+y+5) = 1/(x+y+5) for both
69+
grad = de.problem_gradient(prob)
70+
expected = 1.0 / 10.0
71+
np.testing.assert_allclose(grad, [expected, expected], rtol=1e-5)
72+
73+
74+
def test_linear_op_without_offset():
75+
"""Test linear_op(x, A) still works (no b parameter)."""
76+
n_vars = 2
77+
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)
78+
79+
# A @ x where A = [[2, 3]]
80+
A_data = np.array([2.0, 3.0])
81+
A_indices = np.array([0, 1], dtype=np.int32)
82+
A_indptr = np.array([0, 2], dtype=np.int32)
83+
84+
# No b parameter - pass None explicitly
85+
linear_no_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, None)
86+
log_expr = de.make_log(linear_no_offset)
87+
88+
prob = de.make_problem(log_expr, [])
89+
de.problem_init_derivatives(prob)
90+
91+
u = np.array([1.0, 1.0])
92+
obj = de.problem_objective_forward(prob, u)
93+
assert np.isclose(obj, np.log(5.0)) # log(2*1 + 3*1)
94+
95+
grad = de.problem_gradient(prob)
96+
# d/dx log(2x + 3y) = [2, 3] / (2x + 3y) = [2/5, 3/5]
97+
np.testing.assert_allclose(grad, [2.0/5.0, 3.0/5.0], rtol=1e-5)
98+
99+
100+
def test_linear_op_with_offset_hessian():
101+
"""Test Hessian of log(A @ x + b)."""
102+
n_vars = 2
103+
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)
104+
105+
# log(x[0] + x[1] + 5)
106+
A_data = np.array([1.0, 1.0])
107+
A_indices = np.array([0, 1], dtype=np.int32)
108+
A_indptr = np.array([0, 2], dtype=np.int32)
109+
b = np.array([5.0])
110+
111+
linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b)
112+
log_expr = de.make_log(linear_with_offset)
113+
114+
prob = de.make_problem(log_expr, [])
115+
de.problem_init_derivatives(prob)
116+
117+
# Test at u = [2.0, 3.0]
118+
u = np.array([2.0, 3.0])
119+
de.problem_objective_forward(prob, u)
120+
121+
# Hessian: d²/dx² log(x+y+5) = -1/(x+y+5)² for all entries
122+
obj_factor = 1.0
123+
hess_data, hess_indices, hess_indptr, hess_shape = de.problem_hessian(
124+
prob, obj_factor, np.array([])
125+
)
126+
127+
# The Hessian should be [[h, h], [h, h]] where h = -1/100
128+
expected_h = -1.0 / 100.0
129+
130+
# Check that entries are correct
131+
assert len(hess_data) >= 3, f"Expected at least 3 Hessian entries, got {len(hess_data)}"
132+
133+
# Check values
134+
for val in hess_data:
135+
np.testing.assert_allclose(val, expected_h, rtol=1e-5)
136+
137+
138+
def test_linear_op_vector_with_offset():
139+
"""Test linear_op with vector output: y = A @ x + b where y is a vector."""
140+
n_vars = 3
141+
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)
142+
143+
# A is 2x3, b is 2-vector
144+
# A = [[1, 2, 0], [0, 1, 3]]
145+
# b = [1, 2]
146+
# y[0] = x[0] + 2*x[1] + 1
147+
# y[1] = x[1] + 3*x[2] + 2
148+
A_data = np.array([1.0, 2.0, 1.0, 3.0])
149+
A_indices = np.array([0, 1, 1, 2], dtype=np.int32)
150+
A_indptr = np.array([0, 2, 4], dtype=np.int32)
151+
b = np.array([1.0, 2.0])
152+
153+
linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 2, 3, b)
154+
155+
# sum(log(A @ x + b))
156+
log_expr = de.make_log(linear_with_offset)
157+
sum_expr = de.make_sum(log_expr, -1)
158+
159+
prob = de.make_problem(sum_expr, [])
160+
de.problem_init_derivatives(prob)
161+
162+
# Test at u = [1, 1, 1]
163+
u = np.array([1.0, 1.0, 1.0])
164+
165+
# y[0] = 1 + 2 + 1 = 4
166+
# y[1] = 1 + 3 + 2 = 6
167+
# sum(log(y)) = log(4) + log(6)
168+
obj = de.problem_objective_forward(prob, u)
169+
expected_obj = np.log(4.0) + np.log(6.0)
170+
np.testing.assert_allclose(obj, expected_obj, rtol=1e-5)
171+
172+
# Gradient:
173+
# d/dx[0] = 1/y[0] * 1 = 1/4
174+
# d/dx[1] = 1/y[0] * 2 + 1/y[1] * 1 = 2/4 + 1/6 = 0.5 + 0.1667
175+
# d/dx[2] = 1/y[1] * 3 = 3/6 = 0.5
176+
grad = de.problem_gradient(prob)
177+
expected_grad = np.array([1.0/4.0, 2.0/4.0 + 1.0/6.0, 3.0/6.0])
178+
np.testing.assert_allclose(grad, expected_grad, rtol=1e-5)
179+
180+
181+
def test_linear_op_sparse_matrix_with_offset():
182+
"""Test linear_op with sparse matrix and offset."""
183+
n_vars = 5
184+
x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1)
185+
186+
# A is 3x5 sparse matrix (only some entries non-zero)
187+
# A = [[1, 0, 2, 0, 0],
188+
# [0, 3, 0, 4, 0],
189+
# [0, 0, 0, 0, 5]]
190+
# b = [10, 20, 30]
191+
A_data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
192+
A_indices = np.array([0, 2, 1, 3, 4], dtype=np.int32)
193+
A_indptr = np.array([0, 2, 4, 5], dtype=np.int32)
194+
b = np.array([10.0, 20.0, 30.0])
195+
196+
linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 3, 5, b)
197+
log_expr = de.make_log(linear_with_offset)
198+
sum_expr = de.make_sum(log_expr, -1)
199+
200+
prob = de.make_problem(sum_expr, [])
201+
de.problem_init_derivatives(prob)
202+
203+
# Test at u = [1, 1, 1, 1, 1]
204+
u = np.ones(5)
205+
206+
# y[0] = 1*1 + 2*1 + 10 = 13
207+
# y[1] = 3*1 + 4*1 + 20 = 27
208+
# y[2] = 5*1 + 30 = 35
209+
obj = de.problem_objective_forward(prob, u)
210+
expected_obj = np.log(13.0) + np.log(27.0) + np.log(35.0)
211+
np.testing.assert_allclose(obj, expected_obj, rtol=1e-5)
212+
213+
grad = de.problem_gradient(prob)
214+
# d/dx[0] = 1/13
215+
# d/dx[1] = 3/27
216+
# d/dx[2] = 2/13
217+
# d/dx[3] = 4/27
218+
# d/dx[4] = 5/35
219+
expected_grad = np.array([1.0/13.0, 3.0/27.0, 2.0/13.0, 4.0/27.0, 5.0/35.0])
220+
np.testing.assert_allclose(grad, expected_grad, rtol=1e-5)

src/affine/linear_op.c

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
11
#include "affine.h"
22
#include <assert.h>
33
#include <stdlib.h>
4+
#include <string.h>
45

56
static void forward(expr *node, const double *u)
67
{
78
expr *x = node->left;
9+
linear_op_expr *lin_node = (linear_op_expr *) node;
810

911
/* child's forward pass */
1012
node->left->forward(node->left, u);
1113

1214
/* y = A * x */
13-
csr_matvec(((linear_op_expr *) node)->A_csr, x->value, node->value, x->var_id);
15+
csr_matvec(lin_node->A_csr, x->value, node->value, x->var_id);
16+
17+
/* y += b (if offset exists) */
18+
if (lin_node->b != NULL)
19+
{
20+
for (int i = 0; i < node->size; i++)
21+
{
22+
node->value[i] += lin_node->b[i];
23+
}
24+
}
1425
}
1526

1627
static bool is_affine(const expr *node)
@@ -30,6 +41,13 @@ static void free_type_data(expr *node)
3041
}
3142

3243
free_csc_matrix(lin_node->A_csc);
44+
45+
if (lin_node->b != NULL)
46+
{
47+
free(lin_node->b);
48+
lin_node->b = NULL;
49+
}
50+
3351
lin_node->A_csr = NULL;
3452
lin_node->A_csc = NULL;
3553
}
@@ -39,7 +57,7 @@ static void jacobian_init(expr *node)
3957
node->jacobian = ((linear_op_expr *) node)->A_csr;
4058
}
4159

42-
expr *new_linear(expr *u, const CSR_Matrix *A)
60+
expr *new_linear(expr *u, const CSR_Matrix *A, const double *b)
4361
{
4462
assert(u->d2 == 1);
4563
/* Allocate the type-specific struct */
@@ -55,5 +73,16 @@ expr *new_linear(expr *u, const CSR_Matrix *A)
5573
copy_csr_matrix(A, lin_node->A_csr);
5674
lin_node->A_csc = csr_to_csc(A);
5775

76+
/* Initialize offset (copy b if provided, otherwise NULL) */
77+
if (b != NULL)
78+
{
79+
lin_node->b = (double *) malloc(A->m * sizeof(double));
80+
memcpy(lin_node->b, b, A->m * sizeof(double));
81+
}
82+
else
83+
{
84+
lin_node->b = NULL;
85+
}
86+
5887
return node;
5988
}

tests/forward_pass/affine/test_linear_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const char *test_linear_op()
2424
memcpy(A->p, Ap, 4 * sizeof(int));
2525

2626
expr *var = new_variable(3, 1, 2, 6);
27-
expr *linear_node = new_linear(var, A);
27+
expr *linear_node = new_linear(var, A, NULL);
2828
double x[6] = {0, 0, 1, 2, 3, 0};
2929
linear_node->forward(linear_node, x);
3030

tests/jacobian_tests/test_composite.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ const char *test_jacobian_composite_log()
1818
memcpy(A->p, Ap, 3 * sizeof(int));
1919

2020
expr *u = new_variable(3, 1, 2, 6);
21-
expr *Au = new_linear(u, A);
21+
expr *Au = new_linear(u, A, NULL);
2222
expr *log_node = new_log(Au);
2323
log_node->forward(log_node, u_vals);
2424
log_node->jacobian_init(log_node);
@@ -69,8 +69,8 @@ const char *test_jacobian_composite_log_add()
6969

7070
expr *x = new_variable(3, 1, 2, 7);
7171
expr *y = new_variable(2, 1, 5, 7);
72-
expr *Ax_expr = new_linear(x, A);
73-
expr *By_expr = new_linear(y, B);
72+
expr *Ax_expr = new_linear(x, A, NULL);
73+
expr *By_expr = new_linear(y, B, NULL);
7474
expr *log_Ax = new_log(Ax_expr);
7575
expr *log_By = new_log(By_expr);
7676
expr *sum = new_add(log_Ax, log_By);

0 commit comments

Comments
 (0)