mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-14 18:51:52 +01:00
Add SMM vector calculus
This commit is contained in:
@ -6,7 +6,7 @@
|
||||
|
||||
\usepackage{geometry}
|
||||
\usepackage{graphicx, xcolor}
|
||||
\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm}
|
||||
\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm, upgreek}
|
||||
\usepackage{hyperref}
|
||||
\usepackage[nameinlink]{cleveref}
|
||||
\usepackage[all]{hypcap} % Links hyperref to object top and not caption
|
||||
@ -58,7 +58,7 @@
|
||||
\newtheorem*{definition}{Def}
|
||||
|
||||
\newcommand{\ubar}[1]{\text{\b{$#1$}}}
|
||||
\renewcommand{\vec}[1]{{\bm{#1}}}
|
||||
\renewcommand{\vec}[1]{{\mathbf{#1}}}
|
||||
\newcommand{\nullvec}[0]{\bar{\vec{0}}}
|
||||
\newcommand{\matr}[1]{{\bm{#1}}}
|
||||
|
||||
|
||||
@ -11,5 +11,6 @@
|
||||
\input{sections/_linear_algebra.tex}
|
||||
\input{sections/_linear_systems.tex}
|
||||
\input{sections/_matrix_decomp.tex}
|
||||
\input{sections/_vector_calculus.tex}
|
||||
|
||||
\end{document}
|
||||
@ -0,0 +1,131 @@
|
||||
\chapter{Vector calculus}
|
||||
|
||||
|
||||
\section{Gradient of real-valued multivariate functions}
|
||||
|
||||
\begin{description}
|
||||
\item[Gradient] \marginnote{Gradient}
|
||||
Given a function $f: \mathbb{R}^n \rightarrow \mathbb{R}$,
|
||||
the gradient is a row vector containing the partial derivatives of $f$:
|
||||
\[
|
||||
\nabla f(\vec{x}) =
|
||||
\begin{pmatrix}
|
||||
\frac{\partial f(\vec{x})}{\partial x_1} & \frac{\partial f(\vec{x})}{\partial x_2} & \dots & \frac{\partial f(\vec{x})}{\partial x_n}
|
||||
\end{pmatrix}
|
||||
\in \mathbb{R}^{1 \times n}
|
||||
\]
|
||||
|
||||
\item[Hessian] \marginnote{Hessian matrix}
|
||||
Given a function $f: \mathbb{R}^n \rightarrow \mathbb{R}$,
|
||||
the Hessian matrix $\matr{H} \in \mathbb{R}^{n \times n}$ contains the second derivatives of $f$:
|
||||
\[
|
||||
\matr{H} =
|
||||
\begin{pmatrix}
|
||||
\frac{\partial f}{\partial x_1^2} & \frac{\partial f}{\partial x_1 \partial x_2} & \dots & \frac{\partial f}{\partial x_1 \partial x_n} \\
|
||||
\frac{\partial f}{\partial x_2 \partial x_1} & \frac{\partial f}{\partial x_2^2} & \dots & \vdots \\
|
||||
\vdots & \vdots & \ddots & \vdots \\
|
||||
\frac{\partial f}{\partial x_n \partial x_1} & \dots & \dots & \frac{\partial f}{\partial x_n^2}
|
||||
\end{pmatrix}
|
||||
\]
|
||||
In other words, $H_{i,j} = \frac{\partial f}{\partial x_i \partial x_j}$.
|
||||
Moreover, $\matr{H}$ is symmetric.
|
||||
\end{description}
|
||||
|
||||
\subsection{Partial differentiation rules}
|
||||
\begin{description}
|
||||
\item[Product rule] \marginnote{Product rule}
|
||||
Let $f, g: \mathbb{R}^n \rightarrow \mathbb{R}$:
|
||||
\[
|
||||
\frac{\partial}{\partial \vec{x}} (f(\vec{x})g(\vec{x})) =
|
||||
\frac{\partial f}{\partial \vec{x}} g(\vec{x}) + f(\vec{x}) \frac{\partial g}{\partial \vec{x}}
|
||||
\]
|
||||
\item[Sum rule] \marginnote{Sum rule}
|
||||
Let $f, g: \mathbb{R}^n \rightarrow \mathbb{R}$:
|
||||
\[
|
||||
\frac{\partial}{\partial \vec{x}} (f(\vec{x}) + g(\vec{x})) =
|
||||
\frac{\partial f}{\partial \vec{x}} + \frac{\partial g}{\partial \vec{x}}
|
||||
\]
|
||||
\item[Chain rule] \marginnote{Chain rule}
|
||||
Let $f: \mathbb{R}^n \rightarrow \mathbb{R}$ and $\vec{g}$ a vector of $n$ functions $g_i: \mathbb{R}^m \rightarrow \mathbb{R}$:
|
||||
\[
|
||||
\frac{\partial}{\partial \vec{x}} (f \circ \vec{g})(\vec{x}) =
|
||||
\frac{\partial}{\partial \vec{x}} (f(\vec{g}(\vec{x}))) =
|
||||
\frac{\partial f}{\partial \vec{g}} \frac{\partial \vec{g}}{\partial \vec{x}}
|
||||
\]
|
||||
|
||||
More precisely, considering a $f: \mathbb{R}^2 \rightarrow \mathbb{R}$ of two variables
|
||||
$g_1(t), g_2(t): \mathbb{R} \rightarrow \mathbb{R}$ that are functions of $t$.
|
||||
The gradient of $f$ with respect to $t$ is:
|
||||
\[
|
||||
\frac{\text{d}f}{\text{d}t} =
|
||||
% \frac{\partial f}{\partial (g_1, g_2)} \frac{\partial (g_1, g_2)}{\partial t} =
|
||||
\begin{pmatrix}
|
||||
\frac{\partial f}{\partial g_1} & \frac{\partial f}{\partial g_2}
|
||||
\end{pmatrix}
|
||||
\begin{pmatrix}
|
||||
\frac{\partial g_1}{\partial t} \\ \frac{\partial g_2}{\partial t}
|
||||
\end{pmatrix}
|
||||
= \frac{\partial f}{\partial g_1} \frac{\partial g_1}{\partial t} + \frac{\partial f}{\partial g_2} \frac{\partial g_2}{\partial t}
|
||||
\]
|
||||
In other words, the first matrix represents the gradient of $f$ w.r.t. its variables and
|
||||
the second matrix contains in the $i$-th row the gradient of $g_i$.
|
||||
|
||||
Therefore, if $g_i$ are in turn multivariate functions $g_1(s, t), g_2(s, t): \mathbb{R}^2 \rightarrow \mathbb{R}$,
|
||||
the chain rule can be applies as:
|
||||
\[
|
||||
\frac{\text{d}f}{\text{d}(s, t)} =
|
||||
\begin{pmatrix}
|
||||
\frac{\partial f}{\partial g_1} & \frac{\partial f}{\partial g_2}
|
||||
\end{pmatrix}
|
||||
\begin{pmatrix}
|
||||
\frac{\partial g_1}{\partial s} & \frac{\partial g_1}{\partial t} \\
|
||||
\frac{\partial g_2}{\partial s} & \frac{\partial g_2}{\partial t}
|
||||
\end{pmatrix}
|
||||
\]
|
||||
|
||||
\begin{example}
|
||||
Let $f(x_1, x_2) = x_1^2 + 2x_2$, where $x_1 = \sin(t)$ and $x_2 = \cos(t)$.
|
||||
\[
|
||||
\begin{split}
|
||||
\frac{\text{d}f}{\text{d}t} & =
|
||||
\frac{\partial f}{\partial x_1}\frac{\partial x_1}{\partial t} + \frac{\partial f}{\partial x_2}\frac{\partial x_2}{\partial t} \\
|
||||
& = (2x_1)(\cos(t)) + (2)(-\sin(t)) \\
|
||||
& = 2\sin(t)\cos(t) - 2\sin(t)
|
||||
\end{split}
|
||||
\]
|
||||
\end{example}
|
||||
\end{description}
|
||||
|
||||
|
||||
|
||||
\section{Gradient of vector-valued multivariate functions}
|
||||
|
||||
\begin{description}
|
||||
\item[Vector-valued function]
|
||||
Function $\vec{f}: \mathbb{R}^n \rightarrow \mathbb{R}^m$ with $n \geq 1$ and $m > 1$.
|
||||
Given $\vec{x} \in \mathbb{R}^n$, the output can be represented as:
|
||||
\[
|
||||
\vec{f}(\vec{x}) =
|
||||
\begin{pmatrix}
|
||||
f_1(\vec{x}) \\ \vdots \\ f_m(\vec{x})
|
||||
\end{pmatrix} \in \mathbb{R}^m
|
||||
\]
|
||||
where $f_i: \mathbb{R}^n \rightarrow \mathbb{R}$.
|
||||
|
||||
\item[Jacobian] \marginnote{Jacobian matrix}
|
||||
Given $\vec{f}: \mathbb{R}^n \rightarrow \mathbb{R}^m$, the Jacobian matrix $\matr{J} \in \mathbb{R}^{m \times n}$
|
||||
contains the first-order derivatives of $\vec{f}$:
|
||||
\[
|
||||
\matr{J} = \nabla\vec{f}(\vec{x}) =
|
||||
\begin{pmatrix}
|
||||
\frac{\partial \vec{f}(\vec{x})}{\partial x_1} & \dots & \frac{\partial \vec{f}(\vec{x})}{\partial x_n}
|
||||
\end{pmatrix} =
|
||||
\begin{pmatrix}
|
||||
\frac{\partial f_1(\vec{x})}{\partial x_1} & \dots & \frac{\partial f_1(\vec{x})}{\partial x_n} \\
|
||||
\vdots & \ddots & \vdots \\
|
||||
\frac{\partial f_m(\vec{x})}{\partial x_1} & \dots & \frac{\partial f_m(\vec{x})}{\partial x_n} \\
|
||||
\end{pmatrix}
|
||||
\]
|
||||
In other words, $J_{i,j} = \frac{\partial f_i}{\partial x_j}$.
|
||||
Note that the Jacobian matrix is a generalization of the gradient in the real-valued case.
|
||||
\end{description}
|
||||
Reference in New Issue
Block a user