Add SMM least squares problem

2026-02-04 15:51:43 +01:00 · 2023-09-30 13:29:03 +02:00
parent ea49db6789
commit 509f460d3a
3 changed files with 85 additions and 2 deletions
--- a/src/statistical-and-mathematical-methods-for-ai/img/linear_regression.png
+++ b/src/statistical-and-mathematical-methods-for-ai/img/linear_regression.png
--- a/src/statistical-and-mathematical-methods-for-ai/sections/_linear_systems.tex
+++ b/src/statistical-and-mathematical-methods-for-ai/sections/_linear_systems.tex
@ -116,6 +116,17 @@ It must be noted that $\matr{P}$ is orthogonal, so $\matr{P}^T = \matr{P}^{-1}$.
 The solution to the system ($\matr{P}^T\matr{A}\vec{x} = \matr{P}^T\vec{b}$) can be found as above.


+\subsection{Cholesky factorization}
+Given a symmetric definite positive matrix $\matr{A} \in \mathbb{R}^{n \times n}$.
+It is possible to decompose $\matr{A}$ as:
+\[ \matr{A} = \matr{L}\matr{L}^T \]
+where $\matr{L}$ is lower triangular.
+
+A square system where $\matr{A}$ is symmetric definite positive can be solved as above using the Cholesky factorization.
+This method has time complexity $O(\frac{n^3}{6})$.
+
+
+

 \section{Iterative methods}
 \marginnote{Iterative methods}
@ -186,4 +197,8 @@ Finally, we can define the \textbf{condition number} of a matrix $\matr{A}$ as:

 A system is \textbf{ill-conditioned} if $K(\matr{A})$ is large \marginnote{Ill-conditioned}
 (i.e. a small perturbation of the input causes a large change of the output).
-Otherwise it is \textbf{well-conditioned}. \marginnote{Well-conditioned}
+Otherwise it is \textbf{well-conditioned}. \marginnote{Well-conditioned}
+
+
+\section{Linear least squares problem}
+See \Cref{sec:lls}.
--- a/src/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex
+++ b/src/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex
@ -87,7 +87,7 @@ Furthermore, we can compute the condition number of $\matr{A}$ as:



-\subsection{Matrix approximation}
+\subsection{Application: Matrix approximation}
 Given a matrix $\matr{A} \in \mathbb{R}^{m \times n}$ and its SVD decomposition $\matr{A} = \matr{U}\matr{\Sigma}\matr{V}^T$,
 we can construct a rank-1 matrix (dyad) $\matr{A}_i \in \mathbb{R}^{m \times n}$ as: \marginnote{Dyad}
 \[ \matr{A}_i = \vec{u}_i \vec{v}_i^T \]
@ -132,6 +132,74 @@ Therefore, the compression factor is given by: \marginnote{Compression factor}



+\subsection{Application: Linear least squares problem} \label{sec:lls}
+A system $\matr{A}\vec{x} = \vec{b}$ with $\matr{A} \in \mathbb{R}^{m \times n} \text{, } m > n$ 
+does not generally have a solution.
+\marginnote{Linear least squares}
+Therefore, instead of finding the exact solution, it is possible to search for a $\tilde{\vec{x}}$ such that:
+\[ \matr{A}\tilde{\vec{x}} - \vec{b} \approx \nullvec \]
+In other words, we aim to find a $\tilde{\vec{x}}$ that is close enough to solve the system.
+This problem is usually formulated as:
+\[ 
+    \tilde{\vec{x}} = \arg\min_{\vec{x} \in \mathbb{R}^n} \Vert \matr{A}\vec{x} - \vec{b} \Vert_2^2
+\]
+It always admits a solution and, depending on $\text{rank}(\matr{A})$, there two possible cases:
+\begin{descriptionlist}
+    \item[$\text{rank}(\matr{A}) = n$] 
+        The solution is unique for each $b \in \mathbb{R}^m$.
+        \marginnote{Normal equation}
+        It is found by solving the normal equation:
+        \[ \matr{A}^T\matr{A}\vec{x} = \matr{A}^T\vec{b} \]
+        $\matr{A}^T\matr{A}$ is symmetric definite positive and the system can be solved using the Cholesky factorization.
+    
+    \item[$\text{rank}(\matr{A}) < n$] \marginnote{Least squares using SVD}
+        The system admits infinite solutions.
+        Of all the solutions $S$, we are interested in the one with minimum norm:
+        \[ \vec{x}^* = \arg\min_{\vec{x} \in S} \Vert \vec{x} \Vert_2 \]
+        This problem can be solved using SVD:
+        \[ \vec{x}^* = \sum_{i=1}^{\text{rank}(\matr{A})} \frac{\vec{u}_i^T\vec{b}}{\sigma_i}\vec{v}_i \]
+\end{descriptionlist}
+
+
+\subsection{Application: Polynomial interpolation}
+\marginnote{Polynomial interpolation}
+Given a set of $m$ data $(x_i, y_i), i=1, \dots, m$, 
+we want to find a polynomial of degree $n$ ($m > n$) that approximates it.
+In other words, we want to find a function:
+\[ f(x) = c_0 + c_1 x + c_2 x^2 + \dots + c_n x^n \]
+that minimizes the residual vector $\vec{r} = (r_1, \dots, r_m)$, 
+where $r_i = \vert y_i - f(x_i) \vert$.
+We can formulate this as a linear system:
+\[
+    \vec{r} = \vec{y} - \matr{A}\vec{c} = 
+    \begin{pmatrix}
+        y_1     \\
+        \vdots  \\
+        y_m
+    \end{pmatrix}
+    -
+    \begin{pmatrix}
+        1       & x_1    & x_1^2    & \dots     & x_1^n  \\
+        \vdots  & \vdots & \vdots   & \ddots    & \vdots \\ 
+        1       & x_m    & x_m^2    & \dots     & x_m^n  
+    \end{pmatrix}
+    \begin{pmatrix}
+        c_0     \\
+        \vdots  \\
+        c_n
+    \end{pmatrix}
+\]
+that can be solved as a linear least squares problem:
+\[ \min_{\vec{c} \in \mathbb{R}^n} \Vert \vec{y} - \matr{A}\vec{c} \Vert_2^2 \]
+
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.40\textwidth]{img/linear_regression.png}
+    \caption{Interpolation using a polynomial of degree 1}
+\end{figure}
+
+
+
 \section{Eigendecomposition vs SVD}
 \begin{center}
    \begin{tabular}{m{16em} | m{16em}}