Add DL CNN

2026-02-04 15:51:43 +01:00 · 2024-03-28 09:30:39 +01:00
parent 86d18864e8
commit 692acb2888
3 changed files with 104 additions and 1 deletions
--- a/src/deep-learning/dl.tex
+++ b/src/deep-learning/dl.tex
@ -9,5 +9,6 @@
    \makenotesfront
    \input{./sections/_expressivity.tex}
    \input{./sections/_training.tex}
+    \input{./sections/_convolutional_nn.tex}
    
 \end{document}
--- a/src/deep-learning/sections/_convolutional_nn.tex
+++ b/src/deep-learning/sections/_convolutional_nn.tex
@ -0,0 +1,88 @@
+\chapter{Convolutional neural networks}
+
+
+\begin{description}
+    \item[Convolution neuron] \marginnote{Convolution neuron}
+        Neuron influenced by only a subset of neurons in the previous layer.
+        
+    \item[Receptive field] \marginnote{Receptive field}
+        Dimension of the input image influencing a neuron.
+
+    \item[Convolutional layer] \marginnote{Convolutional layer}
+        Layer composed of convolutional neurons.
+        Neurons in the same convolutional layer share the same weights and work as a convolutional filter.
+
+        \begin{remark}
+            The weights of the filters are learned.
+        \end{remark}
+
+        A convolutional layer has the following parameters:
+        \begin{descriptionlist}
+            \item[Kernel size] \marginnote{Kernel size}
+                Dimension (i.e. width and height) of the filter.
+
+            \item[Stride] \marginnote{Stride}
+                Offset between each filter application (i.e. stride $>1$ reduces the size of the output image).
+
+            \item[Padding] \marginnote{Padding}
+                Artificial enlargement of the image.
+                
+                In practice, there are two modes of padding:
+                \begin{descriptionlist}
+                    \item[Valid] No padding applied.
+                    \item[Same] Apply the minimum padding needed.
+                \end{descriptionlist}
+
+            \item[Depth] \marginnote{Depth}
+                Number of different kernels to apply (i.e. augment the number of channels in the output image).
+        \end{descriptionlist}
+
+        The dimension along each axis of the output image is given by:
+        \[ \frac{W + P - K}{S} + 1 \]
+        where:
+        \begin{itemize}
+            \item $W$ is the size of the image (width or height).
+            \item $P$ is the padding.
+            \item $K$ is the kernel size.
+            \item $S$ is the stride.
+        \end{itemize}
+
+        \begin{remark}
+            If not specified, a kernel is applied to all the channels of the input image in parallel (but the weights of the kernel change at each channel).
+        \end{remark}
+\end{description}
+
+
+\begin{description}
+    \item[Pooling]
+        Layer that applies a function as a filter.
+
+        \begin{descriptionlist}
+            \item[Max-pooling] \marginnote{Max-pooling}
+                Filter that computes the maximum of the pixels within the kernel.
+
+            \item[Mean-pooling] \marginnote{Mean-pooling}
+                Filter that computes the average of the pixels within the kernel.
+        \end{descriptionlist}
+\end{description}
+
+
+\section{Parameters}
+
+The number of parameters of a layer is given by:
+\[ (K_\text{w} \cdot K_\text{h}) \cdot D_\text{in} \cdot D_\text{out} + D_\text{out} \]
+where:
+\begin{itemize}
+    \item $K_\text{w}$ is the width of the kernel.
+    \item $K_\text{h}$ is the height of the kernel.
+    \item $D_\text{in}$ is the input depth.
+    \item $D_\text{out}$ is the output depth.
+\end{itemize}
+
+Therefore, the number of FLOPS is of order:
+\[ (K_\text{w} \cdot K_\text{h}) \cdot D_\text{in} \cdot D_\text{out} \cdot (O_\text{w} \cdot O_\text{h}) \]
+where:
+\begin{itemize}
+    \item $O_\text{w}$ is the width of the output image.
+    \item $O_\text{h}$ is the height of the output image.
+\end{itemize}
--- a/src/deep-learning/sections/_expressivity.tex
+++ b/src/deep-learning/sections/_expressivity.tex
@ -55,4 +55,18 @@ Composition of perceptrons.
        \begin{remark}
            Still, deep neural networks allow to use less neural units.
        \end{remark}
-\end{description}
+\end{description}
+
+
+\subsection{Parameters}
+
+The number of parameters of a layer is given by:
+\[ S_\text{in} \cdot S_\text{out} + S_\text{out} \]
+where:
+\begin{itemize}
+    \item $S_\text{in}$ is the dimension of the input of the layer.
+    \item $S_\text{out}$ is the dimension of the output of the layer.
+\end{itemize}
+
+Therefore, the number of FLOPS is of order:
+\[ S_\text{in} \cdot S_\text{out} \]