|
| 1 | +\documentclass[aspectratio=169]{beamer} |
| 2 | + |
| 3 | +\usetheme{Madrid} |
| 4 | +\usecolortheme{default} |
| 5 | + |
| 6 | +\usepackage{amsmath,amssymb,amsfonts,bm} |
| 7 | +\usepackage{mathtools} |
| 8 | + |
| 9 | +\title{The Frobenius Norm and the Trace Identity} |
| 10 | +\author{Morten Hjorth-Jensen} |
| 11 | +\date{Spring 2026} |
| 12 | + |
| 13 | +\begin{document} |
| 14 | + |
| 15 | +\frame{\titlepage} |
| 16 | + |
| 17 | +\begin{frame}{Goal} |
| 18 | +For a real matrix \(A\in\mathbb{R}^{m\times n}\), we want to show that |
| 19 | +\[ |
| 20 | +\|A\|_F^2=\operatorname{Tr}(A^T A), |
| 21 | +\qquad |
| 22 | +\text{equivalently} |
| 23 | +\qquad |
| 24 | +\|A\|_F=\sqrt{\operatorname{Tr}(A^T A)}. |
| 25 | +\] |
| 26 | + |
| 27 | +We will present two proofs: |
| 28 | +\begin{enumerate} |
| 29 | +\item a direct component-wise proof, |
| 30 | +\item a proof using vectorization and inner products. |
| 31 | +\end{enumerate} |
| 32 | +\end{frame} |
| 33 | + |
| 34 | +\begin{frame}{Definition of the Frobenius norm} |
| 35 | +Let |
| 36 | +\[ |
| 37 | +A=(a_{ij}) \in \mathbb{R}^{m\times n}. |
| 38 | +\] |
| 39 | + |
| 40 | +The Frobenius norm is defined by |
| 41 | +\[ |
| 42 | +\|A\|_F |
| 43 | += |
| 44 | +\sqrt{\sum_{i=1}^{m}\sum_{j=1}^{n} a_{ij}^2 }. |
| 45 | +\] |
| 46 | + |
| 47 | +Therefore, |
| 48 | +\[ |
| 49 | +\|A\|_F^2 |
| 50 | += |
| 51 | +\sum_{i=1}^{m}\sum_{j=1}^{n} a_{ij}^2. |
| 52 | +\] |
| 53 | + |
| 54 | +So the Frobenius norm is simply the Euclidean norm of the matrix entries viewed as one long vector. |
| 55 | +\end{frame} |
| 56 | + |
| 57 | +\begin{frame}{Step 1: Entries of \(A^T A\)} |
| 58 | +The matrix \(A^T A\) is an \(n\times n\) matrix. |
| 59 | + |
| 60 | +Its \((j,k)\)-entry is |
| 61 | +\[ |
| 62 | +(A^T A)_{jk} |
| 63 | += |
| 64 | +\sum_{i=1}^{m} a_{ij}a_{ik}. |
| 65 | +\] |
| 66 | + |
| 67 | +In particular, the diagonal entries are |
| 68 | +\[ |
| 69 | +(A^T A)_{jj} |
| 70 | += |
| 71 | +\sum_{i=1}^{m} a_{ij}^2. |
| 72 | +\] |
| 73 | +\end{frame} |
| 74 | + |
| 75 | +\begin{frame}{Step 2: Take the trace} |
| 76 | +By definition, the trace is the sum of the diagonal entries: |
| 77 | +\[ |
| 78 | +\operatorname{Tr}(A^T A) |
| 79 | += |
| 80 | +\sum_{j=1}^{n}(A^T A)_{jj}. |
| 81 | +\] |
| 82 | + |
| 83 | +Using the previous expression, |
| 84 | +\[ |
| 85 | +\operatorname{Tr}(A^T A) |
| 86 | += |
| 87 | +\sum_{j=1}^{n}\sum_{i=1}^{m} a_{ij}^2. |
| 88 | +\] |
| 89 | + |
| 90 | +Reordering the sums gives |
| 91 | +\[ |
| 92 | +\operatorname{Tr}(A^T A) |
| 93 | += |
| 94 | +\sum_{i=1}^{m}\sum_{j=1}^{n} a_{ij}^2. |
| 95 | +\] |
| 96 | +\end{frame} |
| 97 | + |
| 98 | +\begin{frame}{Conclusion of the direct proof} |
| 99 | +But |
| 100 | +\[ |
| 101 | +\sum_{i=1}^{m}\sum_{j=1}^{n} a_{ij}^2 |
| 102 | += |
| 103 | +\|A\|_F^2. |
| 104 | +\] |
| 105 | + |
| 106 | +Hence |
| 107 | +\[ |
| 108 | +\boxed{\|A\|_F^2=\operatorname{Tr}(A^T A)}. |
| 109 | +\] |
| 110 | + |
| 111 | +Taking square roots yields |
| 112 | +\[ |
| 113 | +\boxed{\|A\|_F=\sqrt{\operatorname{Tr}(A^T A)}}. |
| 114 | +\] |
| 115 | +\end{frame} |
| 116 | + |
| 117 | +\begin{frame}{Frobenius inner product} |
| 118 | +Define the Frobenius inner product of two matrices \(A,B\in\mathbb{R}^{m\times n}\) by |
| 119 | +\[ |
| 120 | +\langle A,B\rangle_F |
| 121 | += |
| 122 | +\operatorname{Tr}(A^T B). |
| 123 | +\] |
| 124 | + |
| 125 | +In components, |
| 126 | +\[ |
| 127 | +\operatorname{Tr}(A^T B) |
| 128 | += |
| 129 | +\sum_{i=1}^{m}\sum_{j=1}^{n} a_{ij}b_{ij}. |
| 130 | +\] |
| 131 | + |
| 132 | +So this is exactly the standard Euclidean inner product of the entries of the two matrices. |
| 133 | +\end{frame} |
| 134 | + |
| 135 | +\begin{frame}{The norm induced by the Frobenius inner product} |
| 136 | +Every inner product induces a norm: |
| 137 | +\[ |
| 138 | +\|A\|=\sqrt{\langle A,A\rangle}. |
| 139 | +\] |
| 140 | + |
| 141 | +For the Frobenius inner product, |
| 142 | +\[ |
| 143 | +\|A\|_F |
| 144 | += |
| 145 | +\sqrt{\langle A,A\rangle_F} |
| 146 | += |
| 147 | +\sqrt{\operatorname{Tr}(A^T A)}. |
| 148 | +\] |
| 149 | + |
| 150 | +Thus the identity |
| 151 | +\[ |
| 152 | +\|A\|_F^2=\operatorname{Tr}(A^T A) |
| 153 | +\] |
| 154 | +can be viewed as a direct consequence of the fact that the Frobenius norm is the norm induced by the Frobenius inner product. |
| 155 | +\end{frame} |
| 156 | + |
| 157 | +\begin{frame}{Vectorization of a matrix} |
| 158 | +Define the vectorization map |
| 159 | +\[ |
| 160 | +\operatorname{vec}: \mathbb{R}^{m\times n}\to \mathbb{R}^{mn} |
| 161 | +\] |
| 162 | +by stacking the columns of \(A\) into one long vector: |
| 163 | +\[ |
| 164 | +\operatorname{vec}(A) |
| 165 | += |
| 166 | +\begin{pmatrix} |
| 167 | +a_{11}\\ |
| 168 | +a_{21}\\ |
| 169 | +\vdots\\ |
| 170 | +a_{m1}\\ |
| 171 | +a_{12}\\ |
| 172 | +a_{22}\\ |
| 173 | +\vdots\\ |
| 174 | +a_{mn} |
| 175 | +\end{pmatrix}. |
| 176 | +\] |
| 177 | + |
| 178 | +The precise ordering is not important for the norm, as long as every matrix entry appears exactly once. |
| 179 | +\end{frame} |
| 180 | + |
| 181 | +\begin{frame}{Proof using vectorization} |
| 182 | +The Euclidean norm of \(\operatorname{vec}(A)\) is |
| 183 | +\[ |
| 184 | +\|\operatorname{vec}(A)\|_2^2 |
| 185 | += |
| 186 | +\sum_{i=1}^{m}\sum_{j=1}^{n} a_{ij}^2. |
| 187 | +\] |
| 188 | + |
| 189 | +But by definition, this is exactly the Frobenius norm squared: |
| 190 | +\[ |
| 191 | +\|\operatorname{vec}(A)\|_2^2=\|A\|_F^2. |
| 192 | +\] |
| 193 | + |
| 194 | +On the other hand, the Euclidean inner product of \(\operatorname{vec}(A)\) with itself is |
| 195 | +\[ |
| 196 | +\operatorname{vec}(A)^T\operatorname{vec}(A). |
| 197 | +\] |
| 198 | + |
| 199 | +So |
| 200 | +\[ |
| 201 | +\|A\|_F^2=\operatorname{vec}(A)^T\operatorname{vec}(A). |
| 202 | +\] |
| 203 | +\end{frame} |
| 204 | + |
| 205 | +\begin{frame}{Vectorization and the trace} |
| 206 | +A standard identity is |
| 207 | +\[ |
| 208 | +\operatorname{vec}(A)^T\operatorname{vec}(B)=\operatorname{Tr}(A^T B). |
| 209 | +\] |
| 210 | + |
| 211 | +Setting \(B=A\), we get |
| 212 | +\[ |
| 213 | +\operatorname{vec}(A)^T\operatorname{vec}(A)=\operatorname{Tr}(A^T A). |
| 214 | +\] |
| 215 | + |
| 216 | +Since |
| 217 | +\[ |
| 218 | +\|A\|_F^2=\operatorname{vec}(A)^T\operatorname{vec}(A), |
| 219 | +\] |
| 220 | +it follows immediately that |
| 221 | +\[ |
| 222 | +\boxed{\|A\|_F^2=\operatorname{Tr}(A^T A)}. |
| 223 | +\] |
| 224 | +\end{frame} |
| 225 | + |
| 226 | +\begin{frame}{Why the vectorization identity is true} |
| 227 | +Let \(A=(a_{ij})\) and \(B=(b_{ij})\). Then |
| 228 | +\[ |
| 229 | +\operatorname{vec}(A)^T\operatorname{vec}(B) |
| 230 | += |
| 231 | +\sum_{i=1}^{m}\sum_{j=1}^{n} a_{ij}b_{ij}. |
| 232 | +\] |
| 233 | + |
| 234 | +But from the definition of the trace, |
| 235 | +\[ |
| 236 | +\operatorname{Tr}(A^T B) |
| 237 | += |
| 238 | +\sum_{j=1}^{n}(A^T B)_{jj} |
| 239 | += |
| 240 | +\sum_{j=1}^{n}\sum_{i=1}^{m} a_{ij}b_{ij}. |
| 241 | +\] |
| 242 | + |
| 243 | +Hence |
| 244 | +\[ |
| 245 | +\operatorname{vec}(A)^T\operatorname{vec}(B)=\operatorname{Tr}(A^T B). |
| 246 | +\] |
| 247 | + |
| 248 | +So vectorization turns the Frobenius inner product into the standard Euclidean inner product in \(\mathbb{R}^{mn}\). |
| 249 | +\end{frame} |
| 250 | + |
| 251 | +\begin{frame}{Geometric interpretation} |
| 252 | +The matrix space \(\mathbb{R}^{m\times n}\) is itself a Euclidean vector space. |
| 253 | + |
| 254 | +Under vectorization, |
| 255 | +\[ |
| 256 | +A \longmapsto \operatorname{vec}(A), |
| 257 | +\] |
| 258 | +the Frobenius norm becomes the ordinary Euclidean norm: |
| 259 | +\[ |
| 260 | +\|A\|_F=\|\operatorname{vec}(A)\|_2. |
| 261 | +\] |
| 262 | + |
| 263 | +Thus |
| 264 | +\[ |
| 265 | +\operatorname{Tr}(A^T A) |
| 266 | +\] |
| 267 | +is nothing but the squared Euclidean length of the matrix when viewed as a vector of all its entries. |
| 268 | +\end{frame} |
| 269 | + |
| 270 | +\begin{frame}{Summary} |
| 271 | +We have shown that for any real matrix \(A\in\mathbb{R}^{m\times n}\), |
| 272 | +\[ |
| 273 | +\boxed{\|A\|_F^2=\operatorname{Tr}(A^T A)}. |
| 274 | +\] |
| 275 | + |
| 276 | +Two proofs were given: |
| 277 | +\begin{itemize} |
| 278 | +\item a direct component-wise proof using the diagonal entries of \(A^T A\), |
| 279 | +\item a conceptual proof using |
| 280 | +\[ |
| 281 | +\operatorname{vec}(A)^T\operatorname{vec}(B)=\operatorname{Tr}(A^T B). |
| 282 | +\] |
| 283 | +\end{itemize} |
| 284 | + |
| 285 | +Equivalently, |
| 286 | +\[ |
| 287 | +\boxed{\|A\|_F=\sqrt{\operatorname{Tr}(A^T A)}}. |
| 288 | +\] |
| 289 | +\end{frame} |
| 290 | + |
| 291 | +\end{document} |
0 commit comments