|
| 1 | +"""Tests for linear_op with constant offset: A @ x + b. |
| 2 | +
|
| 3 | +Tests verify that linear_op correctly handles: |
| 4 | +1. Forward pass: y = A @ x + b |
| 5 | +2. Gradient computation via chain rule |
| 6 | +""" |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import dnlp_diff_engine as de |
| 10 | + |
| 11 | + |
| 12 | +def test_linear_op_with_offset_forward(): |
| 13 | + """Test linear_op(x, A, b) computes A @ x + b correctly in forward pass. |
| 14 | +
|
| 15 | + Note: linear_op is an internal expression type and must be wrapped in |
| 16 | + another expression (like log) to be used as an objective. |
| 17 | + """ |
| 18 | + n_vars = 2 |
| 19 | + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) |
| 20 | + |
| 21 | + # A @ x + b where A = [[1, 1]], b = [5] |
| 22 | + # Should compute x[0] + x[1] + 5 |
| 23 | + A_data = np.array([1.0, 1.0]) |
| 24 | + A_indices = np.array([0, 1], dtype=np.int32) |
| 25 | + A_indptr = np.array([0, 2], dtype=np.int32) |
| 26 | + b = np.array([5.0]) |
| 27 | + |
| 28 | + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b) |
| 29 | + # Wrap in log to create a valid objective |
| 30 | + log_expr = de.make_log(linear_with_offset) |
| 31 | + |
| 32 | + prob = de.make_problem(log_expr, []) |
| 33 | + de.problem_init_derivatives(prob) |
| 34 | + |
| 35 | + # Test at u = [2.0, 3.0] |
| 36 | + u = np.array([2.0, 3.0]) |
| 37 | + |
| 38 | + # Forward: log(2 + 3 + 5) = log(10) |
| 39 | + obj = de.problem_objective_forward(prob, u) |
| 40 | + assert np.isclose(obj, np.log(10.0)), f"Expected log(10), got {obj}" |
| 41 | + |
| 42 | + |
| 43 | +def test_linear_op_with_offset_gradient(): |
| 44 | + """Test gradient of log(A @ x + b).""" |
| 45 | + n_vars = 2 |
| 46 | + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) |
| 47 | + |
| 48 | + # A @ x + b where A = [[1, 1]], b = [5] |
| 49 | + # log(x[0] + x[1] + 5) |
| 50 | + A_data = np.array([1.0, 1.0]) |
| 51 | + A_indices = np.array([0, 1], dtype=np.int32) |
| 52 | + A_indptr = np.array([0, 2], dtype=np.int32) |
| 53 | + b = np.array([5.0]) |
| 54 | + |
| 55 | + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b) |
| 56 | + log_expr = de.make_log(linear_with_offset) |
| 57 | + |
| 58 | + prob = de.make_problem(log_expr, []) |
| 59 | + de.problem_init_derivatives(prob) |
| 60 | + |
| 61 | + # Test at u = [2.0, 3.0] |
| 62 | + u = np.array([2.0, 3.0]) |
| 63 | + |
| 64 | + # Forward: log(2 + 3 + 5) = log(10) |
| 65 | + obj = de.problem_objective_forward(prob, u) |
| 66 | + assert np.isclose(obj, np.log(10.0)) |
| 67 | + |
| 68 | + # Gradient: d/dx log(x+y+5) = 1/(x+y+5) for both |
| 69 | + grad = de.problem_gradient(prob) |
| 70 | + expected = 1.0 / 10.0 |
| 71 | + np.testing.assert_allclose(grad, [expected, expected], rtol=1e-5) |
| 72 | + |
| 73 | + |
| 74 | +def test_linear_op_without_offset(): |
| 75 | + """Test linear_op(x, A) still works (no b parameter).""" |
| 76 | + n_vars = 2 |
| 77 | + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) |
| 78 | + |
| 79 | + # A @ x where A = [[2, 3]] |
| 80 | + A_data = np.array([2.0, 3.0]) |
| 81 | + A_indices = np.array([0, 1], dtype=np.int32) |
| 82 | + A_indptr = np.array([0, 2], dtype=np.int32) |
| 83 | + |
| 84 | + # No b parameter - pass None explicitly |
| 85 | + linear_no_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, None) |
| 86 | + log_expr = de.make_log(linear_no_offset) |
| 87 | + |
| 88 | + prob = de.make_problem(log_expr, []) |
| 89 | + de.problem_init_derivatives(prob) |
| 90 | + |
| 91 | + u = np.array([1.0, 1.0]) |
| 92 | + obj = de.problem_objective_forward(prob, u) |
| 93 | + assert np.isclose(obj, np.log(5.0)) # log(2*1 + 3*1) |
| 94 | + |
| 95 | + grad = de.problem_gradient(prob) |
| 96 | + # d/dx log(2x + 3y) = [2, 3] / (2x + 3y) = [2/5, 3/5] |
| 97 | + np.testing.assert_allclose(grad, [2.0/5.0, 3.0/5.0], rtol=1e-5) |
| 98 | + |
| 99 | + |
| 100 | +def test_linear_op_with_offset_hessian(): |
| 101 | + """Test Hessian of log(A @ x + b).""" |
| 102 | + n_vars = 2 |
| 103 | + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) |
| 104 | + |
| 105 | + # log(x[0] + x[1] + 5) |
| 106 | + A_data = np.array([1.0, 1.0]) |
| 107 | + A_indices = np.array([0, 1], dtype=np.int32) |
| 108 | + A_indptr = np.array([0, 2], dtype=np.int32) |
| 109 | + b = np.array([5.0]) |
| 110 | + |
| 111 | + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 1, 2, b) |
| 112 | + log_expr = de.make_log(linear_with_offset) |
| 113 | + |
| 114 | + prob = de.make_problem(log_expr, []) |
| 115 | + de.problem_init_derivatives(prob) |
| 116 | + |
| 117 | + # Test at u = [2.0, 3.0] |
| 118 | + u = np.array([2.0, 3.0]) |
| 119 | + de.problem_objective_forward(prob, u) |
| 120 | + |
| 121 | + # Hessian: d²/dx² log(x+y+5) = -1/(x+y+5)² for all entries |
| 122 | + obj_factor = 1.0 |
| 123 | + hess_data, hess_indices, hess_indptr, hess_shape = de.problem_hessian( |
| 124 | + prob, obj_factor, np.array([]) |
| 125 | + ) |
| 126 | + |
| 127 | + # The Hessian should be [[h, h], [h, h]] where h = -1/100 |
| 128 | + expected_h = -1.0 / 100.0 |
| 129 | + |
| 130 | + # Check that entries are correct |
| 131 | + assert len(hess_data) >= 3, f"Expected at least 3 Hessian entries, got {len(hess_data)}" |
| 132 | + |
| 133 | + # Check values |
| 134 | + for val in hess_data: |
| 135 | + np.testing.assert_allclose(val, expected_h, rtol=1e-5) |
| 136 | + |
| 137 | + |
| 138 | +def test_linear_op_vector_with_offset(): |
| 139 | + """Test linear_op with vector output: y = A @ x + b where y is a vector.""" |
| 140 | + n_vars = 3 |
| 141 | + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) |
| 142 | + |
| 143 | + # A is 2x3, b is 2-vector |
| 144 | + # A = [[1, 2, 0], [0, 1, 3]] |
| 145 | + # b = [1, 2] |
| 146 | + # y[0] = x[0] + 2*x[1] + 1 |
| 147 | + # y[1] = x[1] + 3*x[2] + 2 |
| 148 | + A_data = np.array([1.0, 2.0, 1.0, 3.0]) |
| 149 | + A_indices = np.array([0, 1, 1, 2], dtype=np.int32) |
| 150 | + A_indptr = np.array([0, 2, 4], dtype=np.int32) |
| 151 | + b = np.array([1.0, 2.0]) |
| 152 | + |
| 153 | + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 2, 3, b) |
| 154 | + |
| 155 | + # sum(log(A @ x + b)) |
| 156 | + log_expr = de.make_log(linear_with_offset) |
| 157 | + sum_expr = de.make_sum(log_expr, -1) |
| 158 | + |
| 159 | + prob = de.make_problem(sum_expr, []) |
| 160 | + de.problem_init_derivatives(prob) |
| 161 | + |
| 162 | + # Test at u = [1, 1, 1] |
| 163 | + u = np.array([1.0, 1.0, 1.0]) |
| 164 | + |
| 165 | + # y[0] = 1 + 2 + 1 = 4 |
| 166 | + # y[1] = 1 + 3 + 2 = 6 |
| 167 | + # sum(log(y)) = log(4) + log(6) |
| 168 | + obj = de.problem_objective_forward(prob, u) |
| 169 | + expected_obj = np.log(4.0) + np.log(6.0) |
| 170 | + np.testing.assert_allclose(obj, expected_obj, rtol=1e-5) |
| 171 | + |
| 172 | + # Gradient: |
| 173 | + # d/dx[0] = 1/y[0] * 1 = 1/4 |
| 174 | + # d/dx[1] = 1/y[0] * 2 + 1/y[1] * 1 = 2/4 + 1/6 = 0.5 + 0.1667 |
| 175 | + # d/dx[2] = 1/y[1] * 3 = 3/6 = 0.5 |
| 176 | + grad = de.problem_gradient(prob) |
| 177 | + expected_grad = np.array([1.0/4.0, 2.0/4.0 + 1.0/6.0, 3.0/6.0]) |
| 178 | + np.testing.assert_allclose(grad, expected_grad, rtol=1e-5) |
| 179 | + |
| 180 | + |
| 181 | +def test_linear_op_sparse_matrix_with_offset(): |
| 182 | + """Test linear_op with sparse matrix and offset.""" |
| 183 | + n_vars = 5 |
| 184 | + x = de.make_variable(n_vars, 1, 0, n_vars) # Column vector (n_vars x 1) |
| 185 | + |
| 186 | + # A is 3x5 sparse matrix (only some entries non-zero) |
| 187 | + # A = [[1, 0, 2, 0, 0], |
| 188 | + # [0, 3, 0, 4, 0], |
| 189 | + # [0, 0, 0, 0, 5]] |
| 190 | + # b = [10, 20, 30] |
| 191 | + A_data = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) |
| 192 | + A_indices = np.array([0, 2, 1, 3, 4], dtype=np.int32) |
| 193 | + A_indptr = np.array([0, 2, 4, 5], dtype=np.int32) |
| 194 | + b = np.array([10.0, 20.0, 30.0]) |
| 195 | + |
| 196 | + linear_with_offset = de.make_linear(x, A_data, A_indices, A_indptr, 3, 5, b) |
| 197 | + log_expr = de.make_log(linear_with_offset) |
| 198 | + sum_expr = de.make_sum(log_expr, -1) |
| 199 | + |
| 200 | + prob = de.make_problem(sum_expr, []) |
| 201 | + de.problem_init_derivatives(prob) |
| 202 | + |
| 203 | + # Test at u = [1, 1, 1, 1, 1] |
| 204 | + u = np.ones(5) |
| 205 | + |
| 206 | + # y[0] = 1*1 + 2*1 + 10 = 13 |
| 207 | + # y[1] = 3*1 + 4*1 + 20 = 27 |
| 208 | + # y[2] = 5*1 + 30 = 35 |
| 209 | + obj = de.problem_objective_forward(prob, u) |
| 210 | + expected_obj = np.log(13.0) + np.log(27.0) + np.log(35.0) |
| 211 | + np.testing.assert_allclose(obj, expected_obj, rtol=1e-5) |
| 212 | + |
| 213 | + grad = de.problem_gradient(prob) |
| 214 | + # d/dx[0] = 1/13 |
| 215 | + # d/dx[1] = 3/27 |
| 216 | + # d/dx[2] = 2/13 |
| 217 | + # d/dx[3] = 4/27 |
| 218 | + # d/dx[4] = 5/35 |
| 219 | + expected_grad = np.array([1.0/13.0, 3.0/27.0, 2.0/13.0, 4.0/27.0, 5.0/35.0]) |
| 220 | + np.testing.assert_allclose(grad, expected_grad, rtol=1e-5) |
0 commit comments