mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-15 19:12:22 +01:00
241 lines
11 KiB
TeX
241 lines
11 KiB
TeX
\chapter{Matrix decomposition}
|
|
|
|
|
|
\section{Eigendecomposition}
|
|
\marginnote{Eigendecomposition}
|
|
Given a matrix $\matr{A} \in \mathbb{R}^{n \times n}$.
|
|
If the eigenvectors of $\matr{A}$ form a basis of $\mathbb{R}^n$,
|
|
then $\matr{A} \in \mathbb{R}^{n \times n}$ can be decomposed into:
|
|
\[ \matr{A} = \matr{P}\matr{D}\matr{P}^{-1} \]
|
|
where $\matr{P} \in \mathbb{R}^{n \times n}$ contains the eigenvectors of $\matr{A}$ as its columns and
|
|
$\matr{D}$ is a diagonal matrix whose diagonal contains the eigenvalues of $\matr{A}$.
|
|
|
|
|
|
|
|
\section{Singular value decomposition}
|
|
\marginnote{Singular value decomposition}
|
|
Given a matrix $\matr{A} \in \mathbb{R}^{m \times n}$ of rank $r \in [0, \min\{m, n\}]$.
|
|
The singular value decomposition (SVD) of $\matr{A}$ is always possible and has form:
|
|
\[
|
|
\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T
|
|
\]
|
|
\[
|
|
=
|
|
\begin{pmatrix}
|
|
\begin{pmatrix} \\ \vec{u}_1 \\ \\ \end{pmatrix} &
|
|
\dots &
|
|
\begin{pmatrix} \\ \vec{u}_m \\ \\ \end{pmatrix}
|
|
\end{pmatrix}
|
|
\begin{pmatrix}
|
|
\sigma_1 & 0 & 0 \\
|
|
0 & \ddots & 0 \\
|
|
0 & 0 & \sigma_{\min\{m, n\}} \\
|
|
\end{pmatrix}
|
|
\begin{pmatrix}
|
|
\begin{pmatrix} & \vec{v}_1 & \end{pmatrix} \\
|
|
\vdots \\
|
|
\begin{pmatrix} & \vec{v}_n & \end{pmatrix} \\
|
|
\end{pmatrix}
|
|
\]
|
|
where:
|
|
\begin{itemize}
|
|
\item
|
|
$\matr{U} \in \mathbb{R}^{m \times m}$ is an orthogonal matrix with columns $\vec{u}_i$ called left-singular vectors.
|
|
|
|
\item
|
|
$\matr{V} \in \mathbb{R}^{n \times n}$ is an orthogonal matrix with columns $\vec{v}_i$ called right-singular vectors.
|
|
|
|
\item
|
|
$\matr{\Sigma} \in \mathbb{R}^{m \times n}$ is a matrix with $\matr{\Sigma}_{i,j} = 0$ (i.e. diagonal if it was a square matrix) and
|
|
the singular values $\sigma_i, i = 1 \dots \min\{m, n\}$ on the diagonal.
|
|
By convention $\sigma_1 \geq \sigma_2 \geq \dots \geq \sigma_r \geq 0$.
|
|
Note that singular values $\sigma_j = 0$ for $(r + 1) \leq j \leq \min\{m, n\}$
|
|
(i.e. singular values at indexes after $\text{rank}(\matr{A})$ are always 0).
|
|
\end{itemize}
|
|
|
|
\marginnote{Singular value equation}
|
|
We can also represent SVD as a \textbf{singular value equation}, which resembles the eigenvalue equation:
|
|
\[ \matr{A}\vec{v}_i = \sigma_i\vec{u}_i \text{ for } i = 1, \dots, r \]
|
|
This is derived from:
|
|
\[
|
|
\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T
|
|
\iff \matr{A}\matr{V} = \matr{U}\matr{\Sigma}\matr{V}^T\matr{V}
|
|
\iff \matr{A}\matr{V} = \matr{U}\matr{\Sigma}
|
|
\]
|
|
|
|
\subsection{Singular values and eigenvalues}
|
|
\marginnote{Eigendecomposition of $\matr{A}^T\matr{A}$ and $\matr{A}\matr{A}^T$}
|
|
Given $\matr{A} \in \mathbb{R}^{m \times n}$, we can obtain the eigenvalues and eigenvectors
|
|
of $\matr{A}^T\matr{A}$ and $\matr{A}\matr{A}^T$ through SVD.
|
|
|
|
For $\matr{A}^T\matr{A}$, we can compute:
|
|
\[
|
|
\begin{split}
|
|
\matr{A}^T\matr{A} & = (\matr{U}\matr{\Sigma}\matr{V}^T)^T(\matr{U}\matr{\Sigma}\matr{V}^T) \text{ using } (\matr{A}\matr{B})^T = \matr{B}^T\matr{A}^T \\
|
|
& = (\matr{V}\matr{\Sigma}^T\matr{U}^T)(\matr{U}\matr{\Sigma}\matr{V}^T) \\
|
|
& = \matr{V}\matr{\Sigma}^T\matr{\Sigma}\matr{V}^T \\
|
|
& = \matr{V}\matr{\Sigma}^2\matr{V}^T
|
|
\end{split}
|
|
\]
|
|
As $\matr{V}$ is orthogonal ($\matr{V}^T = \matr{V}^{-1}$), we can apply the eigendecomposition theorem:
|
|
\begin{itemize}
|
|
\item The diagonal of $\matr{\Sigma}^2$ (i.e. the square of the singular values of $A$) are the eigenvalues of $\matr{A}^T\matr{A}$
|
|
\item The columns of $\matr{V}$ (right-singular vectors) are the eigenvectors of $\matr{A}^T\matr{A}$
|
|
\end{itemize}
|
|
|
|
The same process holds for $\matr{A}\matr{A}^T$. In this case, the columns of $\matr{U}$ (left-singular vectors) are the eigenvectors.
|
|
|
|
|
|
\subsection{Singular values and 2-norm}
|
|
Given a symmetric matrix $\matr{A} \in \mathbb{R}^{n \times n}$,
|
|
we have that $\matr{A}^T\matr{A} = \matr{A}^2 = \matr{A}\matr{A}^T$ (as $\matr{A}^T = \matr{A}$).
|
|
|
|
The eigenvalues of $\matr{A}^2$ are $\lambda_1^2, \dots,\lambda_n^2$, where $\lambda_i$ are eigenvalues of $\matr{A}$.
|
|
Alternatively, the eigenvalues of $\matr{A}^2$ are the squared singular values of $\matr{A}$: $\lambda_i^2 = \sigma_i^2$.
|
|
Moreover, the eigenvalues of $\matr{A}^{-1}$ are $\frac{1}{\lambda_1}, \dots, \frac{1}{\lambda_n}$.
|
|
|
|
\marginnote{2-norm using SVD}
|
|
We can compute the 2-norm as:
|
|
\[ \Vert \matr{A} \Vert_2 = \sqrt{\rho(\matr{A}^T\matr{A})} = \sqrt{\rho(\matr{A}^2)} = \sqrt{\max\{\sigma_1^2, \dots, \sigma_r^2\}} = \sigma_1 \]
|
|
\[
|
|
\Vert \matr{A}^{-1} \Vert_2 = \sqrt{\rho((\matr{A}^{-1})^T(\matr{A}^{-1}))} =
|
|
\sqrt{\rho((\matr{A}\matr{A}^T)^{-1})} = \sqrt{\rho((\matr{A}^2)^{-1})} = \sqrt{\max\{\frac{1}{\sigma_1^2}, \dots, \frac{1}{\sigma_r^2}\}} = \frac{1}{\sigma_r}
|
|
\]
|
|
Furthermore, we can compute the condition number of $\matr{A}$ as:
|
|
\[ K(\matr{A}) = \Vert \matr{A} \Vert_2 \cdot \Vert \matr{A}^{-1} \Vert_2 = \sigma_1 \cdot \frac{1}{\sigma_r} \]
|
|
|
|
|
|
|
|
\subsection{Application: Matrix approximation}
|
|
Given a matrix $\matr{A} \in \mathbb{R}^{m \times n}$ and its SVD decomposition $\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T$,
|
|
we can construct a rank-1 matrix (dyad) $\matr{A}_i \in \mathbb{R}^{m \times n}$ as: \marginnote{Dyad}
|
|
\[ \matr{A}_i = \vec{u}_i \vec{v}_i^T \]
|
|
where $\vec{u}_i \in \mathbb{R}^m$ is the $i$-th column of $\matr{U}$ and
|
|
$\vec{v}_i \in \mathbb{R}^n$ is the $i$-th column of $\matr{V}$.
|
|
Then, we can compose $\matr{A}$ as a sum of dyads:
|
|
\[ \matr{A}_i = \sum_{i=1}^{r} \sigma_i \vec{u}_i \vec{v}_i^T = \sum_{i=1}^{r} \sigma_i \matr{A}_i \]
|
|
|
|
\marginnote{Rank-$k$ approximation}
|
|
By considering only the first $k < r$ singular values, we can obtain a rank-$k$ approximation of $\matr{A}$:
|
|
\[ \hat{\matr{A}}(k) = \sum_{i=1}^{k} \sigma_i \vec{u}_i \vec{v}_i^T = \sum_{i=1}^{k} \sigma_i \matr{A}_i \]
|
|
|
|
\begin{theorem}[Eckart-Young]
|
|
Given $\matr{A} \in \mathbb{R}^{m \times n}$ of rank $r$.
|
|
For any $k \leq r$ (this theorem is interesting for $k < r$), the rank-$k$ approximation is:
|
|
\[
|
|
\hat{\matr{A}}(k) = \arg \min_{\matr{B} \in \mathbb{R}^{m \times n}, \text{rank}(\matr{B}) = k} \Vert \matr{A} - \matr{B} \Vert_2
|
|
\]
|
|
\end{theorem}
|
|
In other words, among all the possible projections, $\hat{\matr{A}}(k)$ is the closer one to $\matr{A}$.
|
|
Moreover, the error of the rank-$k$ approximation is:
|
|
\[
|
|
\Vert \matr{A} - \hat{\matr{A}}(k) \Vert_2 =
|
|
\left\Vert \sum_{i=1}^{r} \sigma_i \matr{A}_i - \sum_{j=1}^{k} \sigma_j \matr{A}_j \right\Vert_2 =
|
|
\left\Vert \sum_{i=k+1}^{r} \sigma_i \matr{A}_i \right\Vert_2 =
|
|
\sigma_{k+1}
|
|
\]
|
|
|
|
\subsubsection{Image compression}
|
|
Each dyad requires $1 + m + n$ (respectively for $\sigma_i$, $\vec{u}_i$ and $\vec{v}_i$) numbers to be stored.
|
|
A rank-$k$ approximation requires to store $k(1 + m + n)$ numbers.
|
|
Therefore, the compression factor is given by: \marginnote{Compression factor}
|
|
\[
|
|
c_k = 1 - \frac{k(1 + m + n)}{mn}
|
|
\]
|
|
|
|
\begin{figure}[h]
|
|
\centering
|
|
\includegraphics[width=0.60\textwidth]{img/_rank_k_approx.pdf}
|
|
\caption{Approximation of an image}
|
|
\end{figure}
|
|
|
|
|
|
|
|
\subsection{Application: Linear least squares problem} \label{sec:lls}
|
|
A system $\matr{A}\vec{x} = \vec{b}$ with $\matr{A} \in \mathbb{R}^{m \times n} \text{, } m > n$
|
|
does not generally have a solution.
|
|
\marginnote{Linear least squares}
|
|
Therefore, instead of finding the exact solution, it is possible to search for a $\tilde{\vec{x}}$ such that:
|
|
\[ \matr{A}\tilde{\vec{x}} - \vec{b} \approx \nullvec \]
|
|
In other words, we aim to find a $\tilde{\vec{x}}$ that is close enough to solve the system.
|
|
This problem is usually formulated as:
|
|
\[
|
|
\tilde{\vec{x}} = \arg\min_{\vec{x} \in \mathbb{R}^n} \Vert \matr{A}\vec{x} - \vec{b} \Vert_2^2
|
|
\]
|
|
It always admits a solution and, depending on $\text{rank}(\matr{A})$, there two possible cases:
|
|
\begin{descriptionlist}
|
|
\item[$\text{rank}(\matr{A}) = n$]
|
|
The solution is unique for each $b \in \mathbb{R}^m$.
|
|
\marginnote{Normal equation}
|
|
It is found by solving the normal equation:
|
|
\[ \matr{A}^T\matr{A}\vec{x} = \matr{A}^T\vec{b} \]
|
|
$\matr{A}^T\matr{A}$ is symmetric definite positive and the system can be solved using the Cholesky factorization.
|
|
|
|
\item[$\text{rank}(\matr{A}) < n$] \marginnote{Least squares using SVD}
|
|
The system admits infinite solutions.
|
|
Of all the solutions $S$, we are interested in the one with minimum norm:
|
|
\[ \vec{x}^* = \arg\min_{\vec{x} \in S} \Vert \vec{x} \Vert_2 \]
|
|
This problem can be solved using SVD:
|
|
\[ \vec{x}^* = \sum_{i=1}^{\text{rank}(\matr{A})} \frac{\vec{u}_i^T\vec{b}}{\sigma_i}\vec{v}_i \]
|
|
\end{descriptionlist}
|
|
|
|
|
|
\subsection{Application: Polynomial interpolation}
|
|
\marginnote{Polynomial interpolation}
|
|
Given a set of $m$ data $(x_i, y_i), i=1, \dots, m$,
|
|
we want to find a polynomial of degree $n$ ($m > n$) that approximates it.
|
|
In other words, we want to find a function:
|
|
\[ f(x) = c_0 + c_1 x + c_2 x^2 + \dots + c_n x^n \]
|
|
that minimizes the residual vector $\vec{r} = (r_1, \dots, r_m)$,
|
|
where $r_i = \vert y_i - f(x_i) \vert$.
|
|
We can formulate this as a linear system:
|
|
\[
|
|
\vec{r} = \vec{y} - \matr{A}\vec{c} =
|
|
\begin{pmatrix}
|
|
y_1 \\
|
|
\vdots \\
|
|
y_m
|
|
\end{pmatrix}
|
|
-
|
|
\begin{pmatrix}
|
|
1 & x_1 & x_1^2 & \dots & x_1^n \\
|
|
\vdots & \vdots & \vdots & \ddots & \vdots \\
|
|
1 & x_m & x_m^2 & \dots & x_m^n
|
|
\end{pmatrix}
|
|
\begin{pmatrix}
|
|
c_0 \\
|
|
\vdots \\
|
|
c_n
|
|
\end{pmatrix}
|
|
\]
|
|
that can be solved as a linear least squares problem:
|
|
\[ \min_{\vec{c} \in \mathbb{R}^n} \Vert \vec{y} - \matr{A}\vec{c} \Vert_2^2 \]
|
|
|
|
\begin{figure}[h]
|
|
\centering
|
|
\includegraphics[width=0.40\textwidth]{img/linear_regression.png}
|
|
\caption{Interpolation using a polynomial of degree 1}
|
|
\end{figure}
|
|
|
|
|
|
|
|
\section{Eigendecomposition vs SVD}
|
|
\begin{center}
|
|
\begin{tabular}{m{16em} | m{16em}}
|
|
\hline
|
|
\multicolumn{1}{c|}{\textbf{Eigendecomposition}} & \multicolumn{1}{c}{\textbf{SVD}} \\
|
|
\multicolumn{1}{c|}{$\matr{A} = \matr{P}\matr{D}\matr{P}^{-1}$} & \multicolumn{1}{c}{$\matr{A}=\matr{U}\matr{\Sigma}\matr{V}$} \\
|
|
\hline
|
|
Only defined for square matrices $\matr{A} \in \mathbb{R}^{n \times n}$ with eigenvectors that form a basis of $\mathbb{R}^n$
|
|
& Always exists \\
|
|
\hline
|
|
$\matr{P}$ is not necessarily orthogonal & $\matr{U}$ and $\matr{V}$ are orthogonal \\
|
|
\hline
|
|
The elements on the diagonal of $\matr{D}$ may be in $\mathbb{C}$
|
|
& The elements on the diagonal of $\matr{\Sigma}$ are all non-negative reals \\
|
|
\hline
|
|
\multicolumn{2}{c}{For symmetric matrices, eigendecomposition and SVD are the same} \\
|
|
\hline
|
|
\end{tabular}
|
|
\end{center}
|