From 4358e7f408030f5412c85b60d14e603bd03adcd9 Mon Sep 17 00:00:00 2001 From: NotXia <35894453+NotXia@users.noreply.github.com> Date: Mon, 25 Sep 2023 21:54:26 +0200 Subject: [PATCH] Add SMM SVD --- .../main.tex | 4 +- .../sections/_linear_algebra.tex | 10 +- .../sections/_matrix_decomp.tex | 131 ++++++++++++++++++ 3 files changed, 134 insertions(+), 11 deletions(-) create mode 100644 src/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex diff --git a/src/statistical-and-mathematical-methods-for-ai/main.tex b/src/statistical-and-mathematical-methods-for-ai/main.tex index 7f7d4c0..e7d7d2c 100644 --- a/src/statistical-and-mathematical-methods-for-ai/main.tex +++ b/src/statistical-and-mathematical-methods-for-ai/main.tex @@ -8,6 +8,7 @@ \usepackage[inline]{enumitem} \usepackage{marginnote} \usepackage[bottom]{footmisc} +\usepackage{array} \geometry{ margin=3cm, lmargin=2cm, rmargin=4cm, marginparwidth=3cm } \hypersetup{ colorlinks, citecolor=black, filecolor=black, linkcolor=black, urlcolor=black, linktoc=all } @@ -59,10 +60,9 @@ \pagenumbering{arabic} \input{sections/_finite_numbers.tex} - \newpage \input{sections/_linear_algebra.tex} - \newpage \input{sections/_linear_systems.tex} + \input{sections/_matrix_decomp.tex} \end{document} \ No newline at end of file diff --git a/src/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex b/src/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex index 84245c8..746e2b0 100644 --- a/src/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex +++ b/src/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex @@ -209,6 +209,7 @@ Common norms are: \frac{\vec{x}^T\vec{y}}{\Vert \vec{x} \Vert \cdot \Vert \vec{y} \Vert} \] \end{enumerate} + Note: an orthogonal matrix represents a rotation. \item[Orthogonal basis] \marginnote{Orthogonal basis} Given a $n$-dimensional vector space $V$ and a basis $\beta = \{ \vec{b}_1, \dots, \vec{b}_n \}$ of $V$. @@ -338,15 +339,6 @@ A matrix $\matr{A} \in \mathbb{R}^{n \times n}$ is diagonalizable if it is simil Similar matrices have the same eigenvalues. \end{theorem} -\begin{theorem}[Eigendecomposition] \marginnote{Eigendecomposition} - Given a matrix $\matr{A} \in \mathbb{R}^{n \times n}$. - If the eigenvectors of $\matr{A}$ form a basis of $\mathbb{R}^n$, - then $\matr{A} \in \mathbb{R}^{n \times n}$ can be decomposed into: - \[ \matr{A} = \matr{P}\matr{D}\matr{P}^{-1} \] - where $\matr{P} \in \mathbb{R}^{n \times n}$ contains the eigenvectors of $\matr{A}$ as its columns and - $\matr{D}$ is a diagonal matrix whose diagonal contains the eigenvalues of $\matr{A}$. -\end{theorem} - \begin{theorem} \marginnote{Symmetric matrix diagonalizability} A symmetric matrix $\matr{A} \in \mathbb{R}^{n \times n}$ is always diagonalizable. \end{theorem} \ No newline at end of file diff --git a/src/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex b/src/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex new file mode 100644 index 0000000..1b29623 --- /dev/null +++ b/src/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex @@ -0,0 +1,131 @@ +\chapter{Matrix decomposition} + + +\section{Eigendecomposition} +\marginnote{Eigendecomposition} +Given a matrix $\matr{A} \in \mathbb{R}^{n \times n}$. +If the eigenvectors of $\matr{A}$ form a basis of $\mathbb{R}^n$, +then $\matr{A} \in \mathbb{R}^{n \times n}$ can be decomposed into: +\[ \matr{A} = \matr{P}\matr{D}\matr{P}^{-1} \] +where $\matr{P} \in \mathbb{R}^{n \times n}$ contains the eigenvectors of $\matr{A}$ as its columns and +$\matr{D}$ is a diagonal matrix whose diagonal contains the eigenvalues of $\matr{A}$. + + + +\section{Singular value decomposition} +\marginnote{Singular value decomposition} +Given a matrix $\matr{A} \in \mathbb{R}^{m \times n}$ of rank $r \in [0, \min\{m, n\}]$. +The singular value decomposition (SVD) of $\matr{A}$ is always possible and has form: +\[ + \matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T +\] +where: +\begin{itemize} + \item + $\matr{U} \in \mathbb{R}^{m \times m}$ is an orthogonal matrix with columns $\vec{u}_i$ called left-singular vectors. + + \item + $\matr{\Sigma} \in \mathbb{R}^{m \times n}$ is an orthogonal matrix with columns $\vec{v}_i$ called right-singular vectors. + + \item + $\matr{V} \in \mathbb{R}^{n \times n}$ is a matrix with $\matr{\Sigma}_{i,j} = 0$ (i.e. diagonal if it was a square matrix) and + the singular values $\sigma_i, i = 1 \dots r$ on the diagonal. + By convention $\sigma_1 \geq \sigma_2 \geq \dots \geq \sigma_r \geq 0$. + Note that singular values $\sigma_j = 0$ for $(r + 1) \leq j \leq n$. +\end{itemize} + +\marginnote{Singular value equation} +We can also represent SVD as a \textbf{singular value equation}, which resembles the eigenvalue equation: +\[ \matr{A}\vec{v}_i = \sigma_i\vec{u}_i \text{ for } i = 1, \dots, r \] +This is derived from: +\[ + \matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T + \iff \matr{A}\matr{V} = \matr{U}\matr{\Sigma}\matr{V}^T\matr{V} + \iff \matr{A}\matr{V} = \matr{U}\matr{\Sigma} +\] + +\subsection{Singular values and eigenvalues} +\marginnote{Eigendecomposition of $\matr{A}^T\matr{A}$ and $\matr{A}\matr{A}^T$} +Given $\matr{A} \in \mathbb{R}^{m \times n}$, we can obtain the eigenvalues and eigenvectors +of $\matr{A}^T\matr{A}$ and $\matr{A}\matr{A}^T$ through SVD. + +For $\matr{A}^T\matr{A}$, we can compute: +\[ +\begin{split} + \matr{A}^T\matr{A} & = (\matr{U}\matr{\Sigma}\matr{V}^T)^T(\matr{U}\matr{\Sigma}\matr{V}^T) \text{ using } (\matr{A}\matr{B})^T = \matr{B}^T\matr{A}^T \\ + & = (\matr{V}\matr{\Sigma}^T\matr{U}^T)(\matr{U}\matr{\Sigma}\matr{V}^T) \\ + & = \matr{V}\matr{\Sigma}^T\matr{\Sigma}\matr{V}^T \\ + & = \matr{V}\matr{\Sigma}^2\matr{V}^T +\end{split} +\] +As $\matr{V}$ is orthogonal ($\matr{V}^T = \matr{V}^{-1}$), we can apply the eigendecomposition theorem: +\begin{itemize} + \item The diagonal of $\matr{\Sigma}^2$ (i.e. the square of the singular values of $A$) are the eigenvalues of $\matr{A}^T\matr{A}$ + \item The columns of $\matr{V}$ (right-singular vectors) are the eigenvectors of $\matr{A}^T\matr{A}$ +\end{itemize} + +The same process holds for $\matr{A}\matr{A}^T$. In this case, the columns of $\matr{U}$ (left-singular vectors) are the eigenvectors. + + +\subsection{Singular values and 2-norm} +Given a symmetric matrix $\matr{A} \in \mathbb{R}^{n \times n}$, +we have that $\matr{A}^T\matr{A} = \matr{A}^2 = \matr{A}\matr{A}^T$ (as $\matr{A}^T = \matr{A}$). + +The eigenvalues of $\matr{A}^2$ are $\lambda_1^2, \dots,\lambda_n^2$, where $\lambda_i$ are eigenvalues of $\matr{A}$. +Alternatively, the eigenvalues of $\matr{A}^2$ are the squared singular values of $\matr{A}$: $\lambda_i^2 = \sigma_i^2$. +Moreover, the eigenvalues of $\matr{A}^{-1}$ are $\frac{1}{\lambda_1}, \dots, \frac{1}{\lambda_n}$. + +\marginnote{2-norm using SVD} +We can compute the 2-norm as: +\[ \Vert \matr{A} \Vert_2 = \sqrt{\rho(\matr{A}^T\matr{A})} = \sqrt{\rho(\matr{A}^2)} = \sqrt{\max\{\sigma_1^2, \dots, \sigma_r^2\}} = \sigma_1 \] +\[ + \Vert \matr{A}^{-1} \Vert_2 = \sqrt{\rho((\matr{A}^{-1})^T(\matr{A}^{-1}))} = + \sqrt{\rho((\matr{A}\matr{A}^T)^{-1})} = \sqrt{\rho((\matr{A}^2)^{-1})} = \sqrt{\max\{\frac{1}{\sigma_1^2}, \dots, \frac{1}{\sigma_r^2}\}} = \frac{1}{\sigma_r} +\] +Furthermore, we can compute the condition number of $\matr{A}$ as: +\[ K(\matr{A}) = \Vert \matr{A} \Vert_2 \cdot \Vert \matr{A}^{-1} \Vert_2 = \sigma_1 \cdot \frac{1}{\sigma_r} \] + + + +\subsection{Matrix approximation} +Given a matrix $\matr{A} \in \mathbb{R}^{m \times n}$ and its SVD decomposition $\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T$, +we can construct a rank-1 matrix (dyad) $\matr{A}_i \in \mathbb{R}^{m \times n}$ as: \marginnote{Dyad} +\[ \matr{A}_i = \vec{u}_i \vec{v}_i^T \] +where $\vec{u}_i \in \mathbb{R}^m$ is the $i$-th column of $\matr{U}$ and +$\vec{v}_i \in \mathbb{R}^n$ is the $i$-th column of $\matr{V}$. +Then, we can compose $\matr{A}$ as a sum of dyads: +\[ \matr{A}_i = \sum_{i=1}^{r} \sigma_i \vec{u}_i \vec{v}_i^T = \sum_{i=1}^{r} \sigma_i \matr{A}_i \] + +\marginnote{Rank-$k$ approximation} +By considering only the first $k < r$ singular values, we can obtain a rank-$k$ approximation of $\matr{A}$: +\[ \hat{\matr{A}}(k) = \sum_{i=1}^{k} \sigma_i \vec{u}_i \vec{v}_i^T = \sum_{i=1}^{k} \sigma_i \matr{A}_i \] +Each dyad required $1 + m + n$ (respectively for $\sigma_i$, $\vec{u}_i$ and $\vec{v}_i$) numbers to be stored. +A rank-$k$ approximation requires to store $k(1 + m + n)$ numbers. + +\begin{figure}[h] + \centering + \includegraphics[width=0.60\textwidth]{img/rank_k_approx.pdf} + \caption{Approximation of an image} +\end{figure} + + + +\section{Eigendecomposition vs SVD} +\begin{center} + \begin{tabular}{m{16em} | m{16em}} + \hline + \multicolumn{1}{c|}{\textbf{Eigendecomposition}} & \multicolumn{1}{c}{\textbf{SVD}} \\ + \multicolumn{1}{c|}{$\matr{A} = \matr{P}\matr{D}\matr{P}^{-1}$} & \multicolumn{1}{c}{$\matr{A}=\matr{U}\matr{\Sigma}\matr{V}$} \\ + \hline + Only defined for square matrices $\matr{A} \in \mathbb{R}^{n \times n}$ with eigenvectors that form a basis of $\mathbb{R}^n$ + & Always exists \\ + \hline + $\matr{P}$ is not necessarily orthogonal & $\matr{U}$ and $\matr{V}$ are orthogonal \\ + \hline + The elements on the diagonal of $\matr{D}$ may be in $\mathbb{C}$ + & The elements on the diagonal of $\matr{\Sigma}$ are all non-negative reals \\ + \hline + \multicolumn{2}{c}{For symmetric matrices, eigendecomposition and SVD are the same} \\ + \hline + \end{tabular} +\end{center}