11#include "affine.h"
22#include "utils/int_double_pair.h"
3+ #include "utils/utils.h"
34#include <assert.h>
5+ #include <stdio.h>
46#include <stdlib.h>
57#include <string.h>
8+ #include <utils/iVec.h>
69
710static void forward (expr * node , const double * u )
811{
@@ -25,21 +28,39 @@ static void forward(expr *node, const double *u)
2528static void jacobian_init (expr * node )
2629{
2730 expr * x = node -> left ;
31+ assert (x -> d1 == x -> d2 );
2832
2933 /* initialize child's jacobian */
3034 x -> jacobian_init (x );
3135
32- /* count total nnz in the rows of the jacobian that should be summed */
36+ // ---------------------------------------------------------------
37+ // count total nnz and allocate matrix with sufficient space
38+ // ---------------------------------------------------------------
3339 const CSR_Matrix * A = x -> jacobian ;
3440 int total_nnz = 0 ;
3541 int row_spacing = x -> d1 + 1 ;
42+
3643 for (int row = 0 ; row < A -> m ; row += row_spacing )
3744 {
38- total_nnz += ( A -> p [row + 1 ] - A -> p [row ]) ;
45+ total_nnz += A -> p [row + 1 ] - A -> p [row ];
3946 }
4047
4148 node -> jacobian = new_csr_matrix (1 , node -> n_vars , total_nnz );
42- ((trace_expr * ) node )-> int_double_pairs = new_int_double_pair_array (total_nnz );
49+
50+ // ---------------------------------------------------------------
51+ // fill sparsity pattern and idx_map
52+ // ---------------------------------------------------------------
53+ trace_expr * tnode = (trace_expr * ) node ;
54+ node -> iwork = malloc (MAX (node -> jacobian -> n , total_nnz ) * sizeof (int ));
55+
56+ /* the idx_map array maps each nonzero entry j in the original matrix A (from the
57+ selected, evenly spaced rows) to the corresponding index in the output row
58+ matrix C. Specifically, for each nonzero entry j in A (from the selected
59+ rows), idx_map[j] gives the position in C->x where the value from A->x[j]
60+ should be accumulated. */
61+ tnode -> idx_map = malloc (x -> jacobian -> nnz * sizeof (int ));
62+ sum_spaced_rows_into_row_csr_fill_sparsity_and_idx_map (
63+ A , node -> jacobian , row_spacing , node -> iwork , tnode -> idx_map );
4364}
4465
4566static void eval_jacobian (expr * node )
@@ -51,8 +72,9 @@ static void eval_jacobian(expr *node)
5172 x -> eval_jacobian (x );
5273
5374 /* local jacobian */
54- sum_spaced_rows_into_row_csr (x -> jacobian , node -> jacobian ,
55- tnode -> int_double_pairs , 0 , x -> d1 + 1 );
75+ memset (node -> jacobian -> x , 0 , node -> jacobian -> nnz * sizeof (double ));
76+ idx_map_accumulator_with_spacing (x -> jacobian , tnode -> idx_map , node -> jacobian -> x ,
77+ x -> d1 + 1 );
5678}
5779
5880/* Placeholders for Hessian-related functions */
@@ -66,16 +88,12 @@ static void wsum_hess_init(expr *node)
6688 node -> wsum_hess = new_csr_matrix (node -> n_vars , node -> n_vars , x -> wsum_hess -> nnz );
6789 node -> dwork = (double * ) calloc (x -> size , sizeof (double ));
6890
69- /* TODO: here we could copy over sparsity pattern once we have checked
70- that all atoms fill their sparsity pattern in the init functions. Perhaps
71- we should only take sparsity pattern of rows that are summed? Not the rows
72- which will get zero weight in the hessian. That would be very cool.
73- But must eval_wsum_hess then also ignore contributions with zero weight? that
74- would be bad. */
75-
76- /* lets conclude that the hessian can be made more sophisticated */
77-
78- /* but perhaps let's keep it as simple as possible! */
91+ /* We copy over the sparsity pattern from the child. This also includes the
92+ contribution to wsum_hess of entries of the child that will always have
93+ zero weight in eval_wsum_hess. We do this for simplicity. But the Hessian
94+ can for sure be made more sophisticated. */
95+ memcpy (node -> wsum_hess -> p , x -> wsum_hess -> p , (x -> n_vars + 1 ) * sizeof (int ));
96+ memcpy (node -> wsum_hess -> i , x -> wsum_hess -> i , x -> wsum_hess -> nnz * sizeof (int ));
7997}
8098
8199static void eval_wsum_hess (expr * node , const double * w )
@@ -90,14 +108,7 @@ static void eval_wsum_hess(expr *node, const double *w)
90108
91109 x -> eval_wsum_hess (x , node -> dwork );
92110
93- /* TODO: here we only need to copy over values once we have filled the sparsity
94- * pattern in wsum_hess_init*/
95- node -> wsum_hess -> nnz = x -> wsum_hess -> nnz ;
96- memcpy (node -> wsum_hess -> p , x -> wsum_hess -> p , sizeof (int ) * (node -> n_vars + 1 ));
97- memcpy (node -> wsum_hess -> i , x -> wsum_hess -> i , sizeof (int ) * x -> wsum_hess -> nnz );
98111 memcpy (node -> wsum_hess -> x , x -> wsum_hess -> x , sizeof (double ) * x -> wsum_hess -> nnz );
99-
100- /* This might contain many many zeros actually! Hmm...*/
101112}
102113
103114static bool is_affine (const expr * node )
@@ -107,27 +118,21 @@ static bool is_affine(const expr *node)
107118
108119static void free_type_data (expr * node )
109120{
110- trace_expr * tnode = (trace_expr * ) node ;
111- free_int_double_pair_array (tnode -> int_double_pairs );
121+ if (node )
122+ {
123+ trace_expr * tnode = (trace_expr * ) node ;
124+ free (tnode -> idx_map );
125+ }
112126}
113127
114128expr * new_trace (expr * child )
115129{
116- /* Output is a single scalar */
117- int d1 = 1 ;
118-
119130 trace_expr * tnode = (trace_expr * ) calloc (1 , sizeof (trace_expr ));
120131 expr * node = & tnode -> base ;
121- init_expr (node , d1 , 1 , child -> n_vars , forward , jacobian_init , eval_jacobian ,
132+ init_expr (node , 1 , 1 , child -> n_vars , forward , jacobian_init , eval_jacobian ,
122133 is_affine , wsum_hess_init , eval_wsum_hess , free_type_data );
123134 node -> left = child ;
124135 expr_retain (child );
125136
126- /* Initialize type-specific fields */
127- tnode -> int_double_pairs = NULL ;
128-
129- // just for debugging, should be removed
130- strcpy (node -> name , "trace" );
131-
132137 return node ;
133138}
0 commit comments