Add DL CNN

This commit is contained in:
2024-03-28 09:30:39 +01:00
parent 86d18864e8
commit 692acb2888
3 changed files with 104 additions and 1 deletions

View File

@ -9,5 +9,6 @@
\makenotesfront
\input{./sections/_expressivity.tex}
\input{./sections/_training.tex}
\input{./sections/_convolutional_nn.tex}
\end{document}

View File

@ -0,0 +1,88 @@
\chapter{Convolutional neural networks}
\begin{description}
\item[Convolution neuron] \marginnote{Convolution neuron}
Neuron influenced by only a subset of neurons in the previous layer.
\item[Receptive field] \marginnote{Receptive field}
Dimension of the input image influencing a neuron.
\item[Convolutional layer] \marginnote{Convolutional layer}
Layer composed of convolutional neurons.
Neurons in the same convolutional layer share the same weights and work as a convolutional filter.
\begin{remark}
The weights of the filters are learned.
\end{remark}
A convolutional layer has the following parameters:
\begin{descriptionlist}
\item[Kernel size] \marginnote{Kernel size}
Dimension (i.e. width and height) of the filter.
\item[Stride] \marginnote{Stride}
Offset between each filter application (i.e. stride $>1$ reduces the size of the output image).
\item[Padding] \marginnote{Padding}
Artificial enlargement of the image.
In practice, there are two modes of padding:
\begin{descriptionlist}
\item[Valid] No padding applied.
\item[Same] Apply the minimum padding needed.
\end{descriptionlist}
\item[Depth] \marginnote{Depth}
Number of different kernels to apply (i.e. augment the number of channels in the output image).
\end{descriptionlist}
The dimension along each axis of the output image is given by:
\[ \frac{W + P - K}{S} + 1 \]
where:
\begin{itemize}
\item $W$ is the size of the image (width or height).
\item $P$ is the padding.
\item $K$ is the kernel size.
\item $S$ is the stride.
\end{itemize}
\begin{remark}
If not specified, a kernel is applied to all the channels of the input image in parallel (but the weights of the kernel change at each channel).
\end{remark}
\end{description}
\begin{description}
\item[Pooling]
Layer that applies a function as a filter.
\begin{descriptionlist}
\item[Max-pooling] \marginnote{Max-pooling}
Filter that computes the maximum of the pixels within the kernel.
\item[Mean-pooling] \marginnote{Mean-pooling}
Filter that computes the average of the pixels within the kernel.
\end{descriptionlist}
\end{description}
\section{Parameters}
The number of parameters of a layer is given by:
\[ (K_\text{w} \cdot K_\text{h}) \cdot D_\text{in} \cdot D_\text{out} + D_\text{out} \]
where:
\begin{itemize}
\item $K_\text{w}$ is the width of the kernel.
\item $K_\text{h}$ is the height of the kernel.
\item $D_\text{in}$ is the input depth.
\item $D_\text{out}$ is the output depth.
\end{itemize}
Therefore, the number of FLOPS is of order:
\[ (K_\text{w} \cdot K_\text{h}) \cdot D_\text{in} \cdot D_\text{out} \cdot (O_\text{w} \cdot O_\text{h}) \]
where:
\begin{itemize}
\item $O_\text{w}$ is the width of the output image.
\item $O_\text{h}$ is the height of the output image.
\end{itemize}

View File

@ -56,3 +56,17 @@ Composition of perceptrons.
Still, deep neural networks allow to use less neural units.
\end{remark}
\end{description}
\subsection{Parameters}
The number of parameters of a layer is given by:
\[ S_\text{in} \cdot S_\text{out} + S_\text{out} \]
where:
\begin{itemize}
\item $S_\text{in}$ is the dimension of the input of the layer.
\item $S_\text{out}$ is the dimension of the output of the layer.
\end{itemize}
Therefore, the number of FLOPS is of order:
\[ S_\text{in} \cdot S_\text{out} \]