mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-15 02:52:22 +01:00
Fix typos
This commit is contained in:
@ -12,7 +12,7 @@
|
|||||||
Propagation of rounding errors in each step of an algorithm.
|
Propagation of rounding errors in each step of an algorithm.
|
||||||
|
|
||||||
\item[Truncation error] \marginnote{Truncation error}
|
\item[Truncation error] \marginnote{Truncation error}
|
||||||
Approximating an infinite procedure into a finite number of iterations.
|
Approximating an infinite procedure to a finite number of iterations.
|
||||||
|
|
||||||
\item[Inherent error] \marginnote{Inherent error}
|
\item[Inherent error] \marginnote{Inherent error}
|
||||||
Caused by the finite representation of the data (floating-point).
|
Caused by the finite representation of the data (floating-point).
|
||||||
@ -30,16 +30,16 @@
|
|||||||
Let $x$ be a value and $\hat{x}$ its approximation. Then:
|
Let $x$ be a value and $\hat{x}$ its approximation. Then:
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
\item[Absolute error]
|
\item[Absolute error]
|
||||||
\begin{equation}
|
\[
|
||||||
E_{a} = \hat{x} - x
|
E_{a} = \hat{x} - x
|
||||||
\marginnote{Absolute error}
|
\marginnote{Absolute error}
|
||||||
\end{equation}
|
\]
|
||||||
Note that, out of context, the absolute error is meaningless.
|
Note that, out of context, the absolute error is meaningless.
|
||||||
\item[Relative error]
|
\item[Relative error]
|
||||||
\begin{equation}
|
\[
|
||||||
E_{a} = \frac{\hat{x} - x}{x}
|
E_{a} = \frac{\hat{x} - x}{x}
|
||||||
\marginnote{Relative error}
|
\marginnote{Relative error}
|
||||||
\end{equation}
|
\]
|
||||||
\end{descriptionlist}
|
\end{descriptionlist}
|
||||||
|
|
||||||
|
|
||||||
@ -48,9 +48,9 @@ Let $x$ be a value and $\hat{x}$ its approximation. Then:
|
|||||||
|
|
||||||
Let $\beta \in \mathbb{N}_{> 1}$ be the base.
|
Let $\beta \in \mathbb{N}_{> 1}$ be the base.
|
||||||
Each $x \in \mathbb{R} \smallsetminus \{0\}$ can be uniquely represented as:
|
Each $x \in \mathbb{R} \smallsetminus \{0\}$ can be uniquely represented as:
|
||||||
\begin{equation} \label{eq:finnum_b_representation}
|
\[ \label{eq:finnum_b_representation}
|
||||||
x = \texttt{sign}(x) \cdot (d_1\beta^{-1} + d_2\beta^{-2} + \dots d_n\beta^{-n})\beta^p
|
x = \texttt{sign}(x) \cdot (d_1\beta^{-1} + d_2\beta^{-2} + \dots + d_n\beta^{-n})\beta^p
|
||||||
\end{equation}
|
\]
|
||||||
where:
|
where:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item $0 \leq d_i \leq \beta-1$
|
\item $0 \leq d_i \leq \beta-1$
|
||||||
@ -59,9 +59,9 @@ where:
|
|||||||
\end{itemize}
|
\end{itemize}
|
||||||
%
|
%
|
||||||
\Cref{eq:finnum_b_representation} can be represented using the normalized scientific notation as: \marginnote{Normalized scientific notation}
|
\Cref{eq:finnum_b_representation} can be represented using the normalized scientific notation as: \marginnote{Normalized scientific notation}
|
||||||
\begin{equation}
|
\[
|
||||||
x = \pm (0.d_1d_2\dots) \beta^p
|
x = \pm (0.d_1d_2\dots) \beta^p
|
||||||
\end{equation}
|
\]
|
||||||
where $0.d_1d_2\dots$ is the \textbf{mantissa} and $\beta^p$ the \textbf{exponent}. \marginnote{Mantissa\\Exponent}
|
where $0.d_1d_2\dots$ is the \textbf{mantissa} and $\beta^p$ the \textbf{exponent}. \marginnote{Mantissa\\Exponent}
|
||||||
|
|
||||||
|
|
||||||
@ -73,11 +73,13 @@ A floating-point system $\mathcal{F}(\beta, t, L, U)$ is defined by the paramete
|
|||||||
\item $t$: precision (number of digits in the mantissa)
|
\item $t$: precision (number of digits in the mantissa)
|
||||||
\item $[L, U]$: range of the exponent
|
\item $[L, U]$: range of the exponent
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
%
|
|
||||||
Each $x \in \mathcal{F}(\beta, t, L, U)$ can be represented in its normalized form:
|
Each $x \in \mathcal{F}(\beta, t, L, U)$ can be represented in its normalized form:
|
||||||
\begin{eqnarray}
|
\begin{eqnarray}
|
||||||
x = \pm (0.d_1d_2 \dots d_t) \beta^p & L \leq p \leq U
|
x = \pm (0.d_1d_2 \dots d_t) \beta^p & L \leq p \leq U
|
||||||
\end{eqnarray}
|
\end{eqnarray}
|
||||||
|
We denote with $\texttt{fl}(x)$ the representation of $x \in \mathbb{R}$ in a given floating-point system.
|
||||||
|
|
||||||
\begin{example}
|
\begin{example}
|
||||||
In $\mathcal{F}(10, 5, -3, 3)$, $x=12.\bar{3}$ is represented as:
|
In $\mathcal{F}(10, 5, -3, 3)$, $x=12.\bar{3}$ is represented as:
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
@ -101,21 +103,20 @@ It must be noted that there is an underflow area around 0.
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
\subsection{Numbers representation}
|
\subsection{Number representation}
|
||||||
Given a floating-point system $\mathcal{F}(\beta, t, L, U)$, the representation of $x \in \mathbb{R}$ can result in:
|
Given a floating-point system $\mathcal{F}(\beta, t, L, U)$, the representation of $x \in \mathbb{R}$ can result in:
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
\item[Exact representation]
|
\item[Exact representation]
|
||||||
if $p \in [L, U]$ and $d_i=0$ for $i>t$.
|
if $p \in [L, U]$ and $d_i=0$ for $i>t$.
|
||||||
|
|
||||||
\item[Approximation]
|
\item[Approximation] \marginnote{Truncation\\Rounding}
|
||||||
if $p \in [L, U]$ but $d_i$ may not be 0 for $i>t$.
|
if $p \in [L, U]$ but $d_i$ may not be 0 for $i>t$.
|
||||||
In this case, the representation is obtained by truncating or rounding the value.
|
In this case, the representation is obtained by truncating or rounding the value.
|
||||||
\marginnote{Truncation\\Rounding}
|
|
||||||
|
|
||||||
\item[Underflow]
|
\item[Underflow] \marginnote{Underflow}
|
||||||
if $p < L$. In this case, the values is approximated as 0.
|
if $p < L$. In this case, the value is approximated to 0.
|
||||||
|
|
||||||
\item[Overflow]
|
\item[Overflow] \marginnote{Overflow}
|
||||||
if $p > U$. In this case, an exception is usually raised.
|
if $p > U$. In this case, an exception is usually raised.
|
||||||
\end{descriptionlist}
|
\end{descriptionlist}
|
||||||
|
|
||||||
@ -179,16 +180,17 @@ Let:
|
|||||||
%
|
%
|
||||||
To compute $x \oplus y$, a machine:
|
To compute $x \oplus y$, a machine:
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item Calculates $x + y$ in a high precision register (still approximated, but more precise than the storing system)
|
\item Calculates $x + y$ in a high precision register
|
||||||
|
(still approximated, but more precise than the floating-point system used to store the result)
|
||||||
\item Stores the result as $\texttt{fl}(x + y)$
|
\item Stores the result as $\texttt{fl}(x + y)$
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
A floating-point operation causes a small rounding error:
|
A floating-point operation causes a small rounding error:
|
||||||
\begin{equation}
|
\[
|
||||||
\left\vert \frac{(x \oplus y) - (x + y)}{x+y} \right\vert < \varepsilon_{\text{mach}}
|
\left\vert \frac{(x \oplus y) - (x + y)}{x+y} \right\vert < \varepsilon_{\text{mach}}
|
||||||
\end{equation}
|
\]
|
||||||
%
|
%
|
||||||
Although, some operations may be subject to the \textbf{cancellation} problem which causes information loss.
|
However, some operations may be subject to the \textbf{cancellation} problem which causes information loss.
|
||||||
\marginnote{Cancellation}
|
\marginnote{Cancellation}
|
||||||
\begin{example}
|
\begin{example}
|
||||||
Given $x = 1$ and $y = 1 \cdot 10^{-16}$, we want to compute $x + y$ in $\mathcal{F}(10, 16, U, L)$.\\
|
Given $x = 1$ and $y = 1 \cdot 10^{-16}$, we want to compute $x + y$ in $\mathcal{F}(10, 16, U, L)$.\\
|
||||||
|
|||||||
@ -16,7 +16,8 @@ A vector space has the following properties:
|
|||||||
\item Addition is commutative and associative
|
\item Addition is commutative and associative
|
||||||
\item A null vector exists: $\exists \nullvec \in V$ s.t. $\forall \vec{u} \in V: \nullvec + \vec{u} = \vec{u} + \nullvec = \vec{u}$
|
\item A null vector exists: $\exists \nullvec \in V$ s.t. $\forall \vec{u} \in V: \nullvec + \vec{u} = \vec{u} + \nullvec = \vec{u}$
|
||||||
\item An identity element for scalar multiplication exists: $\forall \vec{u} \in V: 1\vec{u} = \vec{u}$
|
\item An identity element for scalar multiplication exists: $\forall \vec{u} \in V: 1\vec{u} = \vec{u}$
|
||||||
\item Each vector has its opposite: $\forall \vec{u} \in V, \exists \vec{a} \in V: \vec{a} + \vec{u} = \vec{u} + \vec{a} = \nullvec$
|
\item Each vector has its opposite: $\forall \vec{u} \in V, \exists \vec{a} \in V: \vec{a} + \vec{u} = \vec{u} + \vec{a} = \nullvec$.\\
|
||||||
|
$\vec{a}$ is denoted as $-\vec{u}$.
|
||||||
\item Distributive properties:
|
\item Distributive properties:
|
||||||
\[ \forall \alpha \in \mathbb{R}, \forall \vec{u}, \vec{w} \in V: \alpha(\vec{u} + \vec{w}) = \alpha \vec{u} + \alpha \vec{w} \]
|
\[ \forall \alpha \in \mathbb{R}, \forall \vec{u}, \vec{w} \in V: \alpha(\vec{u} + \vec{w}) = \alpha \vec{u} + \alpha \vec{w} \]
|
||||||
\[ \forall \alpha, \beta \in \mathbb{R}, \forall \vec{u} \in V: (\alpha + \beta)\vec{u} = \alpha \vec{u} + \beta \vec{u} \]
|
\[ \forall \alpha, \beta \in \mathbb{R}, \forall \vec{u} \in V: (\alpha + \beta)\vec{u} = \alpha \vec{u} + \beta \vec{u} \]
|
||||||
@ -24,7 +25,7 @@ A vector space has the following properties:
|
|||||||
\[ \forall \alpha, \beta \in \mathbb{R}, \forall \vec{u} \in V: (\alpha \beta)\vec{u} = \alpha (\beta \vec{u}) \]
|
\[ \forall \alpha, \beta \in \mathbb{R}, \forall \vec{u} \in V: (\alpha \beta)\vec{u} = \alpha (\beta \vec{u}) \]
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
%
|
%
|
||||||
A subset $U \subseteq V$ of a vector space $V$, is a \textbf{subspace} iff $U$ is a vector space.
|
A subset $U \subseteq V$ of a vector space $V$ is a \textbf{subspace} iff $U$ is a vector space.
|
||||||
\marginnote{Subspace}
|
\marginnote{Subspace}
|
||||||
|
|
||||||
|
|
||||||
@ -95,7 +96,7 @@ The norm of a vector is a function: \marginnote{Vector norm}
|
|||||||
such that for each $\lambda \in \mathbb{R}$ and $\vec{x}, \vec{y} \in \mathbb{R}^n$:
|
such that for each $\lambda \in \mathbb{R}$ and $\vec{x}, \vec{y} \in \mathbb{R}^n$:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item $\Vert \vec{x} \Vert \geq 0$
|
\item $\Vert \vec{x} \Vert \geq 0$
|
||||||
\item $\Vert \vec{x} \Vert = 0 \iff \vec{x} = 0$
|
\item $\Vert \vec{x} \Vert = 0 \iff \vec{x} = \nullvec$
|
||||||
\item $\Vert \lambda \vec{x} \Vert = \vert \lambda \vert \cdot \Vert \vec{x} \Vert$
|
\item $\Vert \lambda \vec{x} \Vert = \vert \lambda \vert \cdot \Vert \vec{x} \Vert$
|
||||||
\item $\Vert \vec{x} + \vec{y} \Vert \leq \Vert \vec{x} \Vert + \Vert \vec{y} \Vert$
|
\item $\Vert \vec{x} + \vec{y} \Vert \leq \Vert \vec{x} \Vert + \Vert \vec{y} \Vert$
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
@ -110,7 +111,7 @@ Common norms are:
|
|||||||
\end{descriptionlist}
|
\end{descriptionlist}
|
||||||
%
|
%
|
||||||
In general, different norms tend to maintain the same proportion.
|
In general, different norms tend to maintain the same proportion.
|
||||||
In some cases, unbalanced results may be given when comparing different norms.
|
In some cases, unbalanced results may be obtained when comparing different norms.
|
||||||
\begin{example}
|
\begin{example}
|
||||||
Let $\vec{x} = (1, 1000)$ and $\vec{y} = (999, 1000)$. Their norms are:
|
Let $\vec{x} = (1, 1000)$ and $\vec{y} = (999, 1000)$. Their norms are:
|
||||||
\begin{center}
|
\begin{center}
|
||||||
@ -130,7 +131,7 @@ The norm of a matrix is a function: \marginnote{Matrix norm}
|
|||||||
such that for each $\lambda \in \mathbb{R}$ and $\matr{A}, \matr{B} \in \mathbb{R}^{m \times n}$:
|
such that for each $\lambda \in \mathbb{R}$ and $\matr{A}, \matr{B} \in \mathbb{R}^{m \times n}$:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item $\Vert \matr{A} \Vert \geq 0$
|
\item $\Vert \matr{A} \Vert \geq 0$
|
||||||
\item $\Vert \matr{A} \Vert = 0 \iff \matr{A} = \bar{0}$
|
\item $\Vert \matr{A} \Vert = 0 \iff \matr{A} = \matr{0}$
|
||||||
\item $\Vert \lambda \matr{A} \Vert = \vert \lambda \vert \cdot \Vert \matr{A} \Vert$
|
\item $\Vert \lambda \matr{A} \Vert = \vert \lambda \vert \cdot \Vert \matr{A} \Vert$
|
||||||
\item $\Vert \matr{A} + \matr{B} \Vert \leq \Vert \matr{A} \Vert + \Vert \matr{B} \Vert$
|
\item $\Vert \matr{A} + \matr{B} \Vert \leq \Vert \matr{A} \Vert + \Vert \matr{B} \Vert$
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
@ -141,7 +142,7 @@ Common norms are:
|
|||||||
$\Vert \matr{A} \Vert_2 = \sqrt{ \rho(\matr{A}^T\matr{A}) }$,\\
|
$\Vert \matr{A} \Vert_2 = \sqrt{ \rho(\matr{A}^T\matr{A}) }$,\\
|
||||||
where $\rho(\matr{X})$ is the largest absolute value of the eigenvalues of $\matr{X}$ (spectral radius).
|
where $\rho(\matr{X})$ is the largest absolute value of the eigenvalues of $\matr{X}$ (spectral radius).
|
||||||
|
|
||||||
\item[1-norm] $\Vert \matr{A} \Vert_1 = \max_{1 \leq j \leq n} \sum_{i=1}^{m} \vert a_{i,j} \vert$
|
\item[1-norm] $\Vert \matr{A} \Vert_1 = \max_{1 \leq j \leq n} \sum_{i=1}^{m} \vert a_{i,j} \vert$ (i.e. max sum of the columns in absolute value)
|
||||||
|
|
||||||
\item[Frobenius norm] $\Vert \matr{A} \Vert_F = \sqrt{ \sum_{i=1}^{m} \sum_{j=1}^{n} a_{i,j}^2 }$
|
\item[Frobenius norm] $\Vert \matr{A} \Vert_F = \sqrt{ \sum_{i=1}^{m} \sum_{j=1}^{n} a_{i,j}^2 }$
|
||||||
\end{descriptionlist}
|
\end{descriptionlist}
|
||||||
@ -210,12 +211,12 @@ Common norms are:
|
|||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
\item[Orthogonal basis] \marginnote{Orthogonal basis}
|
\item[Orthogonal basis] \marginnote{Orthogonal basis}
|
||||||
Given an $n$-dimensional vector space $V$ and a basis $\beta = \{ \vec{b}_1, \dots, \vec{b}_n \}$ of $V$.
|
Given a $n$-dimensional vector space $V$ and a basis $\beta = \{ \vec{b}_1, \dots, \vec{b}_n \}$ of $V$.
|
||||||
$\beta$ is an orthogonal basis if:
|
$\beta$ is an orthogonal basis if:
|
||||||
\[ \vec{b}_i \perp \vec{b}_j \text{ for } i \neq j \text{ (i.e.} \left\langle \vec{b}_i, \vec{b}_j \right\rangle = 0 \text{)} \]
|
\[ \vec{b}_i \perp \vec{b}_j \text{ for } i \neq j \text{ (i.e.} \left\langle \vec{b}_i, \vec{b}_j \right\rangle = 0 \text{)} \]
|
||||||
|
|
||||||
\item[Orthonormal basis] \marginnote{Orthonormal basis}
|
\item[Orthonormal basis] \marginnote{Orthonormal basis}
|
||||||
Given an $n$-dimensional vector space $V$ and an orthogonal basis $\beta = \{ \vec{b}_1, \dots, \vec{b}_n \}$ of $V$.
|
Given a $n$-dimensional vector space $V$ and an orthogonal basis $\beta = \{ \vec{b}_1, \dots, \vec{b}_n \}$ of $V$.
|
||||||
$\beta$ is an orthonormal basis if:
|
$\beta$ is an orthonormal basis if:
|
||||||
\[ \Vert \vec{b}_i \Vert_2 = 1 \text{ (or} \left\langle \vec{b}_i, \vec{b}_i \right\rangle = 1 \text{)} \]
|
\[ \Vert \vec{b}_i \Vert_2 = 1 \text{ (or} \left\langle \vec{b}_i, \vec{b}_i \right\rangle = 1 \text{)} \]
|
||||||
|
|
||||||
@ -267,7 +268,7 @@ and is found by minimizing the distance between $\pi_U(\vec{x})$ and $\vec{x}$.
|
|||||||
|
|
||||||
Given a square matrix $\matr{A} \in \mathbb{R}^{n \times n}$,
|
Given a square matrix $\matr{A} \in \mathbb{R}^{n \times n}$,
|
||||||
$\lambda \in \mathbb{C}$ is an eigenvalue of $\matr{A}$ \marginnote{Eigenvalue}
|
$\lambda \in \mathbb{C}$ is an eigenvalue of $\matr{A}$ \marginnote{Eigenvalue}
|
||||||
with corresponding eigenvector $\vec{x} \in \mathbb{R}^n \smallsetminus \{ \nullvec \}$ if \marginnote{Eigenvector}
|
with corresponding eigenvector $\vec{x} \in \mathbb{R}^n \smallsetminus \{ \nullvec \}$ if: \marginnote{Eigenvector}
|
||||||
\[ \matr{A}\vec{x} = \lambda\vec{x} \]
|
\[ \matr{A}\vec{x} = \lambda\vec{x} \]
|
||||||
|
|
||||||
It is equivalent to say that:
|
It is equivalent to say that:
|
||||||
@ -295,7 +296,7 @@ we can prove that $\forall c \in \mathbb{R} \smallsetminus \{0\}:$ $c\vec{x}$ is
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Eigenspace] \marginnote{Eigenspace}
|
\item[Eigenspace] \marginnote{Eigenspace}
|
||||||
Set of all the eigenvectors of $\matr{A} \in \mathbb{R}^{n \times n}$ associated to an eigenvalues $\lambda$.
|
Set of all the eigenvectors of $\matr{A} \in \mathbb{R}^{n \times n}$ associated to an eigenvalue $\lambda$.
|
||||||
This set is a subspace of $\mathbb{R}^n$.
|
This set is a subspace of $\mathbb{R}^n$.
|
||||||
|
|
||||||
\item[Eigenspectrum] \marginnote{Eigenspectrum}
|
\item[Eigenspectrum] \marginnote{Eigenspectrum}
|
||||||
@ -306,7 +307,7 @@ we can prove that $\forall c \in \mathbb{R} \smallsetminus \{0\}:$ $c\vec{x}$ is
|
|||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Geometric multiplicity] \marginnote{Geometric multiplicity}
|
\item[Geometric multiplicity] \marginnote{Geometric multiplicity}
|
||||||
Given an eigenvalue $\lambda$ of a matrix $\matr{A} \in \mathbb{R}^{n \times n}$.
|
Given an eigenvalue $\lambda$ of a matrix $\matr{A} \in \mathbb{R}^{n \times n}$.
|
||||||
The geometric multiplicity of $\lambda$ is the number of linearly independent eigenvectors associated with $\lambda$.
|
The geometric multiplicity of $\lambda$ is the number of linearly independent eigenvectors associated to $\lambda$.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -54,7 +54,7 @@ has an unique solution iff one of the following conditions is satisfied:
|
|||||||
|
|
||||||
The solution can be algebraically determined as \marginnote{Algebraic solution to linear systems}
|
The solution can be algebraically determined as \marginnote{Algebraic solution to linear systems}
|
||||||
\[ \matr{A}\vec{x} = \vec{b} \iff \vec{x} = \matr{A}^{-1}\vec{b} \]
|
\[ \matr{A}\vec{x} = \vec{b} \iff \vec{x} = \matr{A}^{-1}\vec{b} \]
|
||||||
However this approach requires to compute the inverse of a matrix, which has a time complexity of $O(n^3)$.
|
However, this approach requires to compute the inverse of a matrix, which has a time complexity of $O(n^3)$.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -74,21 +74,23 @@ the matrix $\matr{A} \in \mathbb{R}^{n \times n}$ is factorized into $\matr{A} =
|
|||||||
\item $\matr{U} \in \mathbb{R}^{n \times n}$ is an upper triangular matrix
|
\item $\matr{U} \in \mathbb{R}^{n \times n}$ is an upper triangular matrix
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
%
|
%
|
||||||
As directly solving a system with a triangular matrix has complexity $O(n^2)$ (forward or backward substitutions),
|
The system can be decomposed to:
|
||||||
the system can be decomposed to:
|
\[
|
||||||
\begin{equation}
|
|
||||||
\begin{split}
|
\begin{split}
|
||||||
\matr{A}\vec{x} = \vec{b} & \iff \matr{LU}\vec{x} = \vec{b} \\
|
\matr{A}\vec{x} = \vec{b} & \iff \matr{LU}\vec{x} = \vec{b} \\
|
||||||
& \iff \vec{y} = \matr{U}\vec{x} \text{ \& } \matr{L}\vec{y} = \vec{b}
|
& \iff \vec{y} = \matr{U}\vec{x} \text{ \& } \matr{L}\vec{y} = \vec{b}
|
||||||
\end{split}
|
\end{split}
|
||||||
\end{equation}
|
\]
|
||||||
To find the solution, it is sufficient to solve in order:
|
To find the solution, it is sufficient to solve in order:
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item $\matr{L}\vec{y} = \vec{b}$ (solved w.r.t. $\vec{y}$)
|
\item $\matr{L}\vec{y} = \vec{b}$ (solved w.r.t. $\vec{y}$)
|
||||||
\item $\vec{y} = \matr{U}\vec{x}$ (solved w.r.t. $\vec{x}$)
|
\item $\vec{y} = \matr{U}\vec{x}$ (solved w.r.t. $\vec{x}$)
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
The overall complexity is $O(\frac{n^3}{3}) + 2 \cdot O(n^2) = O(\frac{n^3}{3})$
|
The overall complexity is $O(\frac{n^3}{3}) + 2 \cdot O(n^2) = O(\frac{n^3}{3})$.\\
|
||||||
|
$O(\frac{n^3}{3})$ is the time complexity of the LU factorization.
|
||||||
|
$O(n^2)$ is the complexity to directly solving a system with a triangular matrix (forward or backward substitutions).
|
||||||
|
|
||||||
|
|
||||||
\subsection{Gaussian factorization with pivoting}
|
\subsection{Gaussian factorization with pivoting}
|
||||||
\marginnote{Gaussian factorization with pivoting}
|
\marginnote{Gaussian factorization with pivoting}
|
||||||
@ -100,12 +102,12 @@ This is achieved by using a permutation matrix $\matr{P}$, which is obtained as
|
|||||||
|
|
||||||
The permuted system becomes $\matr{P}\matr{A}\vec{x} = \matr{P}\vec{b}$ and the factorization is obtained as $\matr{P}\matr{A} = \matr{L}\matr{U}$.
|
The permuted system becomes $\matr{P}\matr{A}\vec{x} = \matr{P}\vec{b}$ and the factorization is obtained as $\matr{P}\matr{A} = \matr{L}\matr{U}$.
|
||||||
The system can be decomposed to:
|
The system can be decomposed to:
|
||||||
\begin{equation}
|
\[
|
||||||
\begin{split}
|
\begin{split}
|
||||||
\matr{P}\matr{A}\vec{x} = \matr{P}\vec{b} & \iff \matr{L}\matr{U}\vec{x} = \matr{P}\vec{b} \\
|
\matr{P}\matr{A}\vec{x} = \matr{P}\vec{b} & \iff \matr{L}\matr{U}\vec{x} = \matr{P}\vec{b} \\
|
||||||
& \iff \vec{y} = \matr{U}\vec{x} \text{ \& } \matr{L}\vec{y} = \matr{P}\vec{b}
|
& \iff \vec{y} = \matr{U}\vec{x} \text{ \& } \matr{L}\vec{y} = \matr{P}\vec{b}
|
||||||
\end{split}
|
\end{split}
|
||||||
\end{equation}
|
\]
|
||||||
|
|
||||||
An alternative formulation (which is what \texttt{SciPy} uses)
|
An alternative formulation (which is what \texttt{SciPy} uses)
|
||||||
is defined as:
|
is defined as:
|
||||||
@ -132,7 +134,7 @@ The two most common families of iterative methods are:
|
|||||||
compute the sequence as:
|
compute the sequence as:
|
||||||
\[ \vec{x}_k = \matr{B}\vec{x}_{k-1} + \vec{d} \]
|
\[ \vec{x}_k = \matr{B}\vec{x}_{k-1} + \vec{d} \]
|
||||||
where $\matr{B}$ is called iteration matrix and $\vec{d}$ is computed from the $\vec{b}$ vector of the system.
|
where $\matr{B}$ is called iteration matrix and $\vec{d}$ is computed from the $\vec{b}$ vector of the system.
|
||||||
The time complexity per iteration $O(n^2)$.
|
The time complexity per iteration is $O(n^2)$.
|
||||||
|
|
||||||
\item[Gradient-like methods] \marginnote{Gradient-like methods}
|
\item[Gradient-like methods] \marginnote{Gradient-like methods}
|
||||||
have the form:
|
have the form:
|
||||||
@ -142,20 +144,20 @@ The two most common families of iterative methods are:
|
|||||||
|
|
||||||
\subsection{Stopping criteria}
|
\subsection{Stopping criteria}
|
||||||
\marginnote{Stopping criteria}
|
\marginnote{Stopping criteria}
|
||||||
One ore more stopping criteria are needed to determine when to truncate the sequence (as it is theoretically infinite).
|
One or more stopping criteria are needed to determine when to truncate the sequence (as it is theoretically infinite).
|
||||||
The most common approaches are:
|
The most common approaches are:
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
\item[Residual based]
|
\item[Residual based]
|
||||||
The algorithm is terminated when the current solution is close enough to the exact solution.
|
The algorithm is terminated when the current solution is close enough to the exact solution.
|
||||||
The residual at iteration $k$ is computed as $\vec{r}_k = \vec{b} - \matr{A}\vec{x}_k$.
|
The residual at iteration $k$ is computed as $\vec{r}_k = \vec{b} - \matr{A}\vec{x}_k$.
|
||||||
Given a tolerance $\varepsilon$, the algorithm stops when:
|
Given a tolerance $\varepsilon$, the algorithm may stop when:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item $\Vert \vec{r}_k \Vert \leq \varepsilon$
|
\item $\Vert \vec{r}_k \Vert \leq \varepsilon$ (absolute)
|
||||||
\item $\frac{\Vert \vec{r}_k \Vert}{\Vert \vec{b} \Vert} \leq \varepsilon$
|
\item $\frac{\Vert \vec{r}_k \Vert}{\Vert \vec{b} \Vert} \leq \varepsilon$ (relative)
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\item[Update based]
|
\item[Update based]
|
||||||
The algorithm is terminated when the change between iterations is very small.
|
The algorithm is terminated when the difference between iterations is very small.
|
||||||
Given a tolerance $\tau$, the algorithm stops when:
|
Given a tolerance $\tau$, the algorithm stops when:
|
||||||
\[ \Vert \vec{x}_{k} - \vec{x}_{k-1} \Vert \leq \tau \]
|
\[ \Vert \vec{x}_{k} - \vec{x}_{k-1} \Vert \leq \tau \]
|
||||||
\end{descriptionlist}
|
\end{descriptionlist}
|
||||||
@ -183,5 +185,5 @@ Finally, we can define the \textbf{condition number} of a matrix $\matr{A}$ as:
|
|||||||
\[ K(\matr{A}) = \Vert \matr{A} \Vert \cdot \Vert \matr{A}^{-1} \Vert \]
|
\[ K(\matr{A}) = \Vert \matr{A} \Vert \cdot \Vert \matr{A}^{-1} \Vert \]
|
||||||
|
|
||||||
A system is \textbf{ill-conditioned} if $K(\matr{A})$ is large \marginnote{Ill-conditioned}
|
A system is \textbf{ill-conditioned} if $K(\matr{A})$ is large \marginnote{Ill-conditioned}
|
||||||
(i.e. small perturbation on the input causes large changes in the output).
|
(i.e. a small perturbation of the input causes a large change of the output).
|
||||||
Otherwise it is \textbf{well-conditioned}. \marginnote{Well-conditioned}
|
Otherwise it is \textbf{well-conditioned}. \marginnote{Well-conditioned}
|
||||||
Reference in New Issue
Block a user