mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-14 18:51:52 +01:00
Fix typos <noupdate>
This commit is contained in:
@ -290,7 +290,7 @@
|
|||||||
\item[Generative adversarial network (GAN)] \marginnote{Generative adversarial network (GAN)}
|
\item[Generative adversarial network (GAN)] \marginnote{Generative adversarial network (GAN)}
|
||||||
Given:
|
Given:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item A generator $G(z; \theta)$ that takes an input latent vector $z_i \sim p_\text{lat}(z)$ and produces an image $\hat{x}_j \sim p_\text{gen}(x)$,
|
\item A generator $G(z; \theta)$ that takes as input a latent vector $z_i \sim p_\text{lat}(z)$ and produces an image $\hat{x}_j \sim p_\text{gen}(x)$,
|
||||||
\item A discriminator $D(x; \phi)$ that determines whether $x_i$ is a real image from $p_\text{real}(x)$.
|
\item A discriminator $D(x; \phi)$ that determines whether $x_i$ is a real image from $p_\text{real}(x)$.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
A generative adversarial network trains both $D$ and $G$ with the aim of making $p_\text{gen}$ converge to $p_\text{real}$.
|
A generative adversarial network trains both $D$ and $G$ with the aim of making $p_\text{gen}$ converge to $p_\text{real}$.
|
||||||
|
|||||||
@ -109,16 +109,16 @@
|
|||||||
\Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2^2 & \text{if $y^{(i, j)} = +1$} \\
|
\Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2^2 & \text{if $y^{(i, j)} = +1$} \\
|
||||||
\max\left\{0, m - \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2\right\}^2 & \text{if $y^{(i, j)} = 0$} \\
|
\max\left\{0, m - \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2\right\}^2 & \text{if $y^{(i, j)} = 0$} \\
|
||||||
\end{cases} \\
|
\end{cases} \\
|
||||||
&= y^{(i, j)} \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2^2 + (1-y^{(i, j)}) \max\left\{0, m - \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2\right\}
|
&= y^{(i, j)} \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2^2 + (1-y^{(i, j)}) \max\left\{0, m - \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2\right\}^2
|
||||||
\end{split}
|
\end{split}
|
||||||
\]
|
\]
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
A margin $m^+$ can also be added to the positive branch to prevent collapsing all embeddings of the same class to the same point.
|
A margin $m^+$ can also be included to the positive branch to prevent collapsing all embeddings of the same class to the same point.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
The negative branch $\max\left\{0, m - \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2\right\}$ is the hinge loss, which is used in SVM.
|
The negative branch $\max\left\{0, m - \Vert f(x^{(i)}) - f(x^{(j)}) \Vert_2\right\}^2$ is the hinge loss, which is used in SVM.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
|
|||||||
@ -202,7 +202,7 @@
|
|||||||
\item[Haar-like features] \marginnote{Haar-like features}
|
\item[Haar-like features] \marginnote{Haar-like features}
|
||||||
For face detection, a $24 \times 24$ patch of the image is considered (for now) and the weak classifiers define rectangular filters composed of 2 to 4 subsections applied at fixed positions of the patch.
|
For face detection, a $24 \times 24$ patch of the image is considered (for now) and the weak classifiers define rectangular filters composed of 2 to 4 subsections applied at fixed positions of the patch.
|
||||||
|
|
||||||
Given a patch $x$, a weak learned $\texttt{WL}_j$ classifies it as:
|
Given a patch $x$, a weak learner $\texttt{WL}_j$ classifies it as:
|
||||||
\[
|
\[
|
||||||
\texttt{WL}_j(x) = \begin{cases}
|
\texttt{WL}_j(x) = \begin{cases}
|
||||||
1 & \text{if $s_j f_j \geq s_j \rho_j$} \\
|
1 & \text{if $s_j f_j \geq s_j \rho_j$} \\
|
||||||
@ -594,7 +594,7 @@
|
|||||||
Consider $k$ different anchors so that the RPN outputs $k$ objectness scores (overall shape of $2k \times H_L \times W_L$) and $k$ corrections (overall shape of $4k \times H_L \times W_L$) at each pixel.
|
Consider $k$ different anchors so that the RPN outputs $k$ objectness scores (overall shape of $2k \times H_L \times W_L$) and $k$ corrections (overall shape of $4k \times H_L \times W_L$) at each pixel.
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
Virtually, this can be seen as putting together the outputs of $k$ different $1$-anchor RPN (with different anchors).
|
Virtually, this can be seen as putting together the outputs of $k$ different $1$-anchor RPNs (with different anchors).
|
||||||
\end{remark}
|
\end{remark}
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
|||||||
@ -134,7 +134,7 @@
|
|||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Multi-head self-attention (\texttt{MHSA})] \marginnote{Multi-head self-attention}
|
\item[Multi-head self-attention (\texttt{MHSA})] \marginnote{Multi-head self-attention}
|
||||||
Given an input $\matr{Y} \in \mathbb{R}^{M \times d_Y}$, a \texttt{MHSA} block parallelly passes it through $h$ different self-attention blocks to obtain the activations $\matr{A}^{(1)}, \dots, \matr{A}^{(h)}$. The output $\matr{A}$ of the block is obtained as a linear projection of the column-wise concatenation of the activations $\matr{A}^{(i)}$:
|
Given an input $\matr{Y} \in \mathbb{R}^{M \times d_Y}$, a \texttt{MHSA} block parallelly passes it through $h$ different self-attention blocks to obtain the activations $\matr{A}^{(1)}, \dots, \matr{A}^{(h)}$. The output $\matr{A}$ of the block is obtained as a linear projection of the column-wise concatenation of the activations $\matr{A}^{(i)}$:
|
||||||
\[ \mathbb{R}^{M \times d_Y} \ni \matr{A} = \left[ A^{(1)} \vert \dots \vert A^{(h)} \right] \matr{W}_O \]
|
\[ \mathbb{R}^{M \times d_Y} \ni \matr{A} = \left[ \matr{A}^{(1)} \vert \dots \vert \matr{A}^{(h)} \right] \matr{W}_O \]
|
||||||
where $\matr{W}_O \in \mathbb{R}^{hd_V \times d_Y}$ is the projection matrix.
|
where $\matr{W}_O \in \mathbb{R}^{hd_V \times d_Y}$ is the projection matrix.
|
||||||
|
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
|
|||||||
Reference in New Issue
Block a user