Files
unibo-ai-notes/src/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex
2023-12-30 13:39:00 +01:00

227 lines
9.9 KiB
TeX

\chapter{Matrix decomposition}
\section{Eigendecomposition}
\marginnote{Eigendecomposition}
Given a matrix $\matr{A} \in \mathbb{R}^{n \times n}$.
If the eigenvectors of $\matr{A}$ form a basis of $\mathbb{R}^n$,
then $\matr{A} \in \mathbb{R}^{n \times n}$ can be decomposed into:
\[ \matr{A} = \matr{P}\matr{D}\matr{P}^{-1} \]
where $\matr{P} \in \mathbb{R}^{n \times n}$ contains the eigenvectors of $\matr{A}$ as its columns and
$\matr{D}$ is a diagonal matrix whose diagonal contains the eigenvalues of $\matr{A}$.
Note that a symmetric matrix can always be decomposed (\Cref{th:spectral_theorem})
\section{Singular value decomposition}
\marginnote{Singular value decomposition}
Given a matrix $\matr{A} \in \mathbb{R}^{m \times n}$ of rank $r \in [0, \min\{m, n\}]$.
The singular value decomposition (SVD) of $\matr{A}$ is always possible and has form:
\[
\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T
\]
\[
=
\begin{pmatrix}
\begin{pmatrix} \\ \vec{u}_1 \\ \\ \end{pmatrix} &
\dots &
\begin{pmatrix} \\ \vec{u}_m \\ \\ \end{pmatrix}
\end{pmatrix}
\begin{pmatrix}
\sigma_1 & 0 & 0 \\
0 & \ddots & 0 \\
0 & 0 & \sigma_{\min\{m, n\}} \\
\end{pmatrix}
\begin{pmatrix}
\begin{pmatrix} & \vec{v}_1 & \end{pmatrix} \\
\vdots \\
\begin{pmatrix} & \vec{v}_n & \end{pmatrix} \\
\end{pmatrix}
\]
where:
\begin{itemize}
\item
$\matr{U} \in \mathbb{R}^{m \times m}$ is an orthogonal matrix whose columns $\vec{u}_i$ are called left-singular vectors.
\item
$\matr{V} \in \mathbb{R}^{n \times n}$ is an orthogonal matrix whose columns $\vec{v}_i$ are called right-singular vectors.
\item
$\matr{\Sigma} \in \mathbb{R}^{m \times n}$ is a matrix with $\matr{\Sigma}_{i,j} = 0$ (i.e. diagonal if it was a square matrix) and
the singular values $\sigma_i, i = 1 \dots \min\{m, n\}$ on the diagonal.
By convention $\sigma_1 \geq \sigma_2 \geq \dots \geq \sigma_r \geq 0$.
Note that singular values $\sigma_j = 0$ for $(r + 1) \leq j \leq \min\{m, n\}$
(i.e. singular values at indexes after $\text{rank}(\matr{A})$ are always 0).
\end{itemize}
\marginnote{Singular value equation}
We can also represent SVD as a \textbf{singular value equation}, which resembles the eigenvalue equation:
\[ \matr{A}\vec{v}_i = \sigma_i\vec{u}_i \text{ for } i = 1, \dots, r \]
This is derived from:
\[
\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T
\iff \matr{A}\matr{V} = \matr{U}\matr{\Sigma}\matr{V}^T\matr{V}
\iff \matr{A}\matr{V} = \matr{U}\matr{\Sigma}
\]
\subsection{Singular values and eigenvalues}
\marginnote{Eigendecomposition of $\matr{A}^T\matr{A}$ and $\matr{A}\matr{A}^T$}
Given $\matr{A} \in \mathbb{R}^{m \times n}$, we can obtain the eigenvalues and eigenvectors
of $\matr{A}^T\matr{A}$ and $\matr{A}\matr{A}^T$ through SVD.
For $\matr{A}^T\matr{A}$, we can compute:
\[
\begin{split}
\matr{A}^T\matr{A} & = (\matr{U}\matr{\Sigma}\matr{V}^T)^T(\matr{U}\matr{\Sigma}\matr{V}^T) \text{ using } (\matr{A}\matr{B})^T = \matr{B}^T\matr{A}^T \\
& = (\matr{V}\matr{\Sigma}^T\matr{U}^T)(\matr{U}\matr{\Sigma}\matr{V}^T) \\
& = \matr{V}\matr{\Sigma}^T\matr{\Sigma}\matr{V}^T \\
& = \matr{V}\matr{\Sigma}^2\matr{V}^T
\end{split}
\]
As $\matr{V}$ is orthogonal ($\matr{V}^T = \matr{V}^{-1}$), we can apply the eigendecomposition theorem:
\begin{itemize}
\item The diagonal of $\matr{\Sigma}^2$ (i.e. the square of the singular values of $A$) are the eigenvalues of $\matr{A}^T\matr{A}$.
\item The columns of $\matr{V}$ (right-singular vectors) are the eigenvectors of $\matr{A}^T\matr{A}$.
\end{itemize}
The same process holds for $\matr{A}\matr{A}^T$. In this case, the columns of $\matr{U}$ (left-singular vectors) are the eigenvectors.
\subsection{Singular values and 2-norm}
Given a symmetric matrix $\matr{A} \in \mathbb{R}^{n \times n}$,
we have that $\matr{A}^T\matr{A} = \matr{A}^2 = \matr{A}\matr{A}^T$ (as $\matr{A}^T = \matr{A}$).
The eigenvalues of $\matr{A}^2$ are $\lambda_1^2, \dots,\lambda_n^2$, where $\lambda_i$ are eigenvalues of $\matr{A}$.
Alternatively, the eigenvalues of $\matr{A}^2$ are the squared singular values of $\matr{A}$: $\lambda_i^2 = \sigma_i^2$.
Moreover, the eigenvalues of $\matr{A}^{-1}$ are $\frac{1}{\lambda_1}, \dots, \frac{1}{\lambda_n}$.
\marginnote{2-norm using SVD}
We can compute the 2-norm as:
\[ \Vert \matr{A} \Vert_2 = \sqrt{\rho(\matr{A}^T\matr{A})} = \sqrt{\rho(\matr{A}^2)} = \sqrt{\max\{\sigma_1^2, \dots, \sigma_r^2\}} = \sigma_1 \]
\[
\Vert \matr{A}^{-1} \Vert_2 = \sqrt{\rho((\matr{A}^{-1})^T(\matr{A}^{-1}))} =
\sqrt{\rho((\matr{A}\matr{A}^T)^{-1})} = \sqrt{\rho((\matr{A}^2)^{-1})} =
\sqrt{\max \left\{\frac{1}{\sigma_1^2}, \dots, \frac{1}{\sigma_r^2} \right\}} = \frac{1}{\sigma_r}
\]
Furthermore, we can compute the condition number of $\matr{A}$ as:
\[ K(\matr{A}) = \Vert \matr{A} \Vert_2 \cdot \Vert \matr{A}^{-1} \Vert_2 = \sigma_1 \cdot \frac{1}{\sigma_r} \]
\subsection{Application: Matrix approximation}
Given a matrix $\matr{A} \in \mathbb{R}^{m \times n}$ and its SVD decomposition $\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T$,
we can construct a rank-1 matrix (dyad) $\matr{A}_i \in \mathbb{R}^{m \times n}$ as: \marginnote{Dyad}
\[ \matr{A}_i = \vec{u}_i \vec{v}_i^T \]
where $\vec{u}_i \in \mathbb{R}^m$ is the $i$-th column of $\matr{U}$ and
$\vec{v}_i \in \mathbb{R}^n$ is the $i$-th column of $\matr{V}$.
Then, we can compose $\matr{A}$ as a sum of dyads:
\[ \matr{A}_i = \sum_{i=1}^{r} \sigma_i \vec{u}_i \vec{v}_i^T = \sum_{i=1}^{r} \sigma_i \matr{A}_i \]
\marginnote{Rank-$k$ approximation}
By considering only the first $k < r$ singular values, we can obtain a rank-$k$ approximation of $\matr{A}$:
\[ \hat{\matr{A}}(k) = \sum_{i=1}^{k} \sigma_i \vec{u}_i \vec{v}_i^T = \sum_{i=1}^{k} \sigma_i \matr{A}_i \]
\begin{theorem}[Eckart-Young]
Given $\matr{A} \in \mathbb{R}^{m \times n}$ of rank $r$.
For any $k \leq r$ (this theorem is interesting for $k < r$), the rank-$k$ approximation is:
\[
\hat{\matr{A}}(k) = \arg \min_{\matr{B} \in \mathbb{R}^{m \times n}, \text{rank}(\matr{B}) = k} \Vert \matr{A} - \matr{B} \Vert_2
\]
\end{theorem}
In other words, among all the possible projections, $\hat{\matr{A}}(k)$ is the closest one to $\matr{A}$.
Moreover, the error of the rank-$k$ approximation is:
\[
\Vert \matr{A} - \hat{\matr{A}}(k) \Vert_2 =
\left\Vert \sum_{i=1}^{r} \sigma_i \matr{A}_i - \sum_{j=1}^{k} \sigma_j \matr{A}_j \right\Vert_2 =
\left\Vert \sum_{i=k+1}^{r} \sigma_i \matr{A}_i \right\Vert_2 =
\sigma_{k+1}
\]
\subsubsection{Image compression}
Each dyad requires $1 + m + n$ (respectively for $\sigma_i$, $\vec{u}_i$ and $\vec{v}_i$) numbers to be stored.
A rank-$k$ approximation requires to store $k(1 + m + n)$ numbers.
Therefore, the compression factor is given by: \marginnote{Compression factor}
\[
c_k = 1 - \frac{k(1 + m + n)}{mn}
\]
\begin{figure}[h]
\centering
\includegraphics[width=0.60\textwidth]{img/_rank_k_approx.pdf}
\caption{Approximation of an image}
\end{figure}
\subsection{Application: Linear least squares problem} \label{sec:lls}
Given a least squares problem:
\[
\tilde{\vec{x}} = \arg\min_{\vec{x} \in \mathbb{R}^n} \Vert \matr{A}\vec{x} - \vec{b} \Vert_2^2
\]
When $\text{rank}(\matr{A}) < n$, the system admits infinite solutions.
Of all the solutions $S$, we are interested in the one with minimum norm:
\[ \vec{x}^* = \arg\min_{\vec{x} \in S} \Vert \vec{x} \Vert_2 \]
This problem can be solved using SVD:
\[ \vec{x}^* = \sum_{i=1}^{\text{rank}(\matr{A})} \frac{\vec{u}_i^T\vec{b}}{\sigma_i}\vec{v}_i \]
\subsection{Application: Polynomial interpolation}
\marginnote{Polynomial interpolation}
Given a set of $m$ data $(x_i, y_i), i=1, \dots, m$,
we want to find a polynomial of degree $n$ ($m > n$) that approximates it.
In other words, we want to find a function:
\[ f(x) = c_0 + c_1 x + c_2 x^2 + \dots + c_n x^n \]
that minimizes the residual vector $\vec{r} = (r_1, \dots, r_m)$,
where $r_i = \vert y_i - f(x_i) \vert$.
We can formulate this as a linear system:
\[
\vec{r} = \vec{y} - \matr{A}\vec{c} =
\begin{pmatrix}
y_1 \\
\vdots \\
y_m
\end{pmatrix}
-
\begin{pmatrix}
1 & x_1 & x_1^2 & \dots & x_1^n \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
1 & x_m & x_m^2 & \dots & x_m^n
\end{pmatrix}
\begin{pmatrix}
c_0 \\
\vdots \\
c_n
\end{pmatrix}
\]
that can be solved as a linear least squares problem:
\[ \min_{\vec{c} \in \mathbb{R}^n} \Vert \vec{y} - \matr{A}\vec{c} \Vert_2^2 \]
\begin{figure}[h]
\centering
\includegraphics[width=0.40\textwidth]{img/linear_regression.png}
\caption{Interpolation using a polynomial of degree 1}
\end{figure}
\section{Eigendecomposition vs SVD}
\begin{center}
\begin{tabular}{m{16em} | m{16em}}
\hline
\multicolumn{1}{c|}{\textbf{Eigendecomposition}} & \multicolumn{1}{c}{\textbf{SVD}} \\
\multicolumn{1}{c|}{$\matr{A} = \matr{P}\matr{D}\matr{P}^{-1}$} & \multicolumn{1}{c}{$\matr{A}=\matr{U}\matr{\Sigma}\matr{V}$} \\
\hline
Only defined for square matrices $\matr{A} \in \mathbb{R}^{n \times n}$ with eigenvectors that form a basis of $\mathbb{R}^n$
& Always exists \\
\hline
$\matr{P}$ is not necessarily orthogonal & $\matr{U}$ and $\matr{V}$ are orthogonal \\
\hline
The elements on the diagonal of $\matr{D}$ may be in $\mathbb{C}$
& The elements on the diagonal of $\matr{\Sigma}$ are all non-negative reals \\
\hline
\multicolumn{2}{c}{For symmetric matrices, eigendecomposition and SVD are the same} \\
\hline
\end{tabular}
\end{center}