Update example environment style <noupdate>

2026-02-04 07:41:43 +01:00 · 2024-05-26 19:49:16 +02:00
parent d292a243e3
commit 79b3324e73
28 changed files with 121 additions and 79 deletions
--- a/src/ainotes.cls
+++ b/src/ainotes.cls
@ -21,6 +21,10 @@
 \usepackage{eurosym}
 \usepackage{bussproofs} % Deductive tree
 \usepackage{varwidth} 
+\usepackage[most]{tcolorbox}
+\usepackage{tikz}
+\tcbuselibrary{breakable}
+\usetikzlibrary{decorations.pathmorphing,calc}

 \geometry{ margin=3cm, lmargin=1.5cm, rmargin=4.5cm, marginparwidth=3cm }
 \hypersetup{ colorlinks, citecolor=black, filecolor=black, linkcolor=black, urlcolor=black, linktoc=all }
@ -48,6 +52,7 @@
 \lstset{style=mystyle}
 \lstset{language=Python}

+
 \NewDocumentEnvironment{descriptionlist}{}{%
    \begin{description}[labelindent=1em]
 }{
@ -57,15 +62,38 @@
 \renewcommand*{\marginfont}{\color{gray}\footnotesize}
 \renewcommand*\chapterpagestyle{scrheadings} % Header in chapter pages

+
 \theoremstyle{definition}
 \newtheorem{theorem}{Theorem}[section]
 \newtheorem{corollary}{Corollary}[theorem]
 \newtheorem{lemma}[theorem]{Lemma}
-\newtheorem*{example}{Example}
+\newtheorem*{privateexample}{Example}
 \theoremstyle{definition}
 \newtheorem*{definition}{Def}
 \newtheorem*{remark}{Remark}

+\newtcolorbox{marginbar}[3]{ % #1: color | #2: (number of lines - 1) | #3: line thickness
+    enhanced, blank, breakable,
+    overlay = {
+        \foreach \t in {0,...,#2}{
+            \draw[decorate, #3, #1]
+                ([xshift=-3-\t mm]frame.north west)
+                --
+                ([xshift=-3-\t mm]frame.south west);
+        },
+    }
+}
+
+\newenvironment{example}{%
+    \begin{marginbar}{lightgray}{0}{thick}
+    \begin{privateexample}
+}{%
+    \end{privateexample}
+    \end{marginbar}
+}
+
+
+
 \newcommand{\ubar}[1]{\text{\b{$#1$}}}
 \renewcommand{\vec}[1]{{\bm{\mathbf{#1}}}}
 \newcommand{\nullvec}[0]{\bar{\vec{0}}}
--- a/src/year1/cognition-and-neuroscience/module1/cn1.tex
+++ b/src/year1/cognition-and-neuroscience/module1/cn1.tex
@ -18,7 +18,14 @@
 \DeclareAcronym{cs}{short=CS, long=conditioned stimulus}
 \DeclareAcronym{cr}{short=CR, long=conditioned response}

-\newtheorem*{casestudy}{Case study}
+\newtheorem*{privatecasestudy}{Case study}
+\newenvironment{casestudy}{%
+    \begin{marginbar}{olive}{0}{thick}
+    \begin{privatecasestudy}
+}{%
+    \end{privatecasestudy}
+    \end{marginbar}
+}

 \begin{document}
    
--- a/src/year1/cognition-and-neuroscience/module1/sections/_nervous_system.tex
+++ b/src/year1/cognition-and-neuroscience/module1/sections/_nervous_system.tex
@ -108,7 +108,7 @@ Generally, a neuron does the following:
            \item[Cell body/soma] Metabolic center of the cell.
        \end{description}

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.5\textwidth]{img/neuron_eukaryotic.png}
            \caption{Neuron as an eukaryotic cell}
@ -151,7 +151,7 @@ There are three types of synapses:
    \item[Axoaxonic] \marginnote{Axoaxonic}
        Synapses that a neuron makes onto the synapses of another neuron.
        In this case, the transmitting neuron can be seen as a signal modulator of the receiving neuron.
-    \begin{figure}[h]
+    \begin{figure}[H]
        \begin{subfigure}{.3\textwidth}
            \centering
            \includegraphics[width=\linewidth]{./img/axosomatic.png}
@ -215,7 +215,7 @@ In a neuron, there are four regions that handle signals:
            \item[Electrical synapses] The \ac{ap} is directly transmitted to the next neurons.
        \end{description}

-    \begin{figure}[h]
+    \begin{figure}[H]
        \centering
        \includegraphics[width=0.8\textwidth]{./img/neuron_transmission.png}
        \caption{Transmitting regions of different types of neurons}
@ -286,7 +286,7 @@ In a neuron, there are four regions that handle signals:
                        \end{remark}
                \end{enumerate}

-                \begin{figure}[h]
+                \begin{figure}[H]
                    \centering
                    \includegraphics[width=0.8\textwidth]{./img/neuron_transmission2.png}
                    \caption{
--- a/src/year1/cognition-and-neuroscience/module2/cn2.tex
+++ b/src/year1/cognition-and-neuroscience/module2/cn2.tex
@ -7,7 +7,14 @@
 \def\lastupdate{{PLACEHOLDER-LAST-UPDATE}}
 \def\giturl{{PLACEHOLDER-GIT-URL}}

-\newtheorem*{casestudy}{Case study}
+\newtheorem*{privatecasestudy}{Case study}
+\newenvironment{casestudy}{%
+    \begin{marginbar}{olive}{0}{thick}
+    \begin{privatecasestudy}
+}{%
+    \end{privatecasestudy}
+    \end{marginbar}
+}

 \begin{document}
    
--- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_games.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_games.tex
@ -12,7 +12,7 @@ It considers the player as the entity that maximizes (\textsc{Max}) its utility
 the opponent as the entity that (optimally) minimizes (\textsc{Min}) the utility of the player.


-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.5\textwidth]{img/_minmax.pdf}
    \caption{Example of game tree with propagated scores}
@ -121,7 +121,7 @@ In the average case of a random distribution, the reduction is of order $O(b^{3d
 \end{lstlisting}
 \end{algorithm}

-\begin{figure}[h]
+\begin{figure}[H]
    \begin{subfigure}{.3\textwidth}
        \centering
        \includegraphics[width=\linewidth]{img/alphabeta_algo_example1.png}
--- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_intro.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_intro.tex
@ -113,7 +113,7 @@ Intelligence is defined as the ability to perceive or infer information and to r
 \marginnote{Perceptron}
 A neuron (\textbf{perceptron}) computes a weighted sum of its inputs and 
 passes the result to an activation function to produce the output.
-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.40\textwidth]{img/neuron.png}
    \caption{Representation of an artificial neuron}
@ -128,21 +128,21 @@ The expressivity of a neural network increases when more neurons are used:
 \begin{descriptionlist}
    \item[Single perceptron] 
        Able to compute a linear separation.
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.25\textwidth]{img/1perceptron.png}
            \caption{Separation performed by one perceptron}
        \end{figure}
    \item[Three-layer network] 
        Able to separate a convex region ($n_\text{edges} \leq n_\text{hidden neurons}$)
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.90\textwidth]{img/3layer.png}
            \caption{Separation performed by a three-layer network}
        \end{figure}
    \item[Four-layer network] 
        Able to separate regions of arbitrary shape.
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.40\textwidth]{img/4layer.png}
            \caption{Separation performed by a four-layer network}
--- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_local_search.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_local_search.tex
@ -15,7 +15,7 @@
            Problem: find a Hamiltonian tour of minimum cost in an undirected graph.
            
            A possible neighborhood of a state applies the $k$-exchange that guarantees to maintain a Hamiltonian tour.
-            \begin{figure}[ht]
+            \begin{figure}[H]
                \begin{subfigure}{.5\textwidth}
                    \centering
                    \includegraphics[width=.70\linewidth]{img/tsp_2-exchange.png}
@ -78,7 +78,7 @@ Can be seen as a search process over graphs:
    \item[Neighborhood graph] The search space topology.
    \item[Search graph] The explored space.
 \end{descriptionlist}
-\begin{figure}[ht]
+\begin{figure}[H]
    \begin{subfigure}{.5\textwidth}
        \centering
        \includegraphics[width=.55\linewidth]{img/_local_search_neigh_graph.pdf}
@ -197,7 +197,7 @@ Population based meta heuristics are built on the following concepts:
    \item[Natural selection] Fit organisms have many offspring while others become extinct.
 \end{descriptionlist}

-\begin{table}[ht]
+\begin{table}[H]
    \centering
    \begin{tabular}{c | c}
        \textbf{Biology} & \textbf{Artificial intelligence} \\
@ -224,7 +224,7 @@ The following terminology will be used:
    \item[Alleles] Domain of values of a gene.
 \end{descriptionlist}

-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.5\textwidth]{img/_genetic_terminology.pdf}
    \caption{}
@ -270,7 +270,7 @@ Genetic operators are:
    \end{descriptionlist}
 \end{example}

-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.4\textwidth]{img/_genetic_cycle.pdf}
    \caption{Evolutionary cycle}
--- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_planning.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_planning.tex
@ -423,7 +423,7 @@ At each step, one of the following refinement operations can be applied until th
    \item Add a causal link to the set of causal links.
 \end{itemize}

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.45\textwidth]{img/_nonlinear_plan_example.pdf}
    \caption{Example of search tree in non-linear planning}
--- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_search.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_search.tex
@ -30,7 +30,7 @@
        A leaf can be a state to expand, a solution or a dead-end.
        \Cref{alg:search_tree_search} describes a generic tree search algorithm.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.25\textwidth]{img/_search_tree.pdf}
            \caption{Search tree}
@ -122,7 +122,7 @@ Always expands the least deep node. The fringe is implemented as a queue (FIFO).

 The exponential space complexity makes BFS impractical for large problems.

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.30\textwidth]{img/_bfs.pdf}
    \caption{BFS visit order}
@ -147,7 +147,7 @@ Same as BFS, but always expands the node with the lowest cumulative cost.
    \end{tabular}
 \end{center}

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.50\textwidth]{img/_ucs.pdf}
    \caption{Uniform-cost search visit order. $(n)$ is the cumulative cost}
@ -175,7 +175,7 @@ Always expands the deepest node. The fringe is implemented as a stack (LIFO).
    \end{tabular}
 \end{center}

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.30\textwidth]{img/_dfs.pdf}
    \caption{DFS visit order}
@ -261,7 +261,7 @@ The fringe is ordered according to the estimated scores.
        \end{center}
        % The complexity can be reduced depending on the heuristic.

-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.65\textwidth]{img/_greedy_best_first_example.pdf}
            \caption{Hill climbing visit order}
@ -337,7 +337,7 @@ The fringe is ordered according to the estimated scores.

        In general, it is better to use heuristics with large values (i.e. heuristics that don't underestimate too much).

-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.65\textwidth]{img/_a_start_example.pdf}
            \caption{A$^*$ visit order}
--- a/src/year1/fundamentals-of-ai-and-kr/module2/sections/_ontologies.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module2/sections/_ontologies.tex
@ -160,7 +160,7 @@ A property of objects.
 \marginnote{Semantic networks}
 Graphical representation of objects and categories connected through labeled links.

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.4\textwidth]{img/semantic_network.png}
    \caption{Example of semantic network}
--- a/src/year1/fundamentals-of-ai-and-kr/module2/sections/_semantic_web.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module2/sections/_semantic_web.tex
@ -55,7 +55,7 @@
                            \texttt{http://www.example.org/index.html} has a \texttt{creator} with staff id \texttt{85740}.
                        \end{example}

-                    \item[XML]
+                    \item[XML] \phantom{}
                        \begin{example} \phantom{}
                            \begin{lstlisting}[mathescape=true, language=xml]
 <rdf:RDF
--- a/src/year1/fundamentals-of-ai-and-kr/module3/sections/_bayesian_net.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/sections/_bayesian_net.tex
@ -142,7 +142,7 @@
        \end{itemize}
        In other words, influence can flow from $X$ to $Y$ passing by $Z$.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.65\textwidth]{img/_active_trail.pdf}
            \caption{Example of active and non-active two-edge trails}
@ -214,7 +214,7 @@

    \item[Local semantics]
        Each node is conditionally independent of its non-descendants given its parents.
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.35\textwidth]{img/_local_independence.pdf}
            \caption{Local independence}
@ -228,7 +228,7 @@
    \item[Markov blanket]
        Each node is conditionally independent of all the other nodes 
        if its Markov blanket (parents, children, children's parents) is in the evidence.
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.35\textwidth]{img/_markov_blanket.pdf}
            \caption{Markov blanket}
@ -263,7 +263,7 @@ By construction, this algorithm guarantees the global semantics.
    Note that $P \perp G$.
    Let the order be fixed as follows: $P$, $G$, $H$.

-    \begin{figure}[h]
+    \begin{figure}[H]
        \begin{subfigure}{.3\textwidth}
            \centering
            \includegraphics[width=0.15\linewidth]{img/_monty_hall1.pdf}
@ -423,7 +423,7 @@ the number of variables in a conditional probability table.
 Noisy-OR distributions model a network of non-interacting causes with a common effect.
 A node $X$ has $k$ parents $U_1, \dots, U_k$ and possibly a leak node $U_L$ to capture unmodeled concepts. 

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.3\textwidth]{img/_noisy_or_example.pdf}
    \caption{Example of noisy-OR network}
@ -536,7 +536,7 @@ Possible approaches are:
    \item[Dynamic Bayesian network] \marginnote{Dynamic Bayesian network}
        Useful to model the evolution through time.
        A template variable $X_i$ is instantiated as $X_i^{(t)}$ at each time step.
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.3\textwidth]{img/_dynamic_bn_example.pdf}
            \caption{Example of dynamic Bayesian network}
--- a/src/year1/fundamentals-of-ai-and-kr/module3/sections/_exact_inference.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/sections/_exact_inference.tex
@ -41,7 +41,7 @@ Method that carries out summations right-to-left and stores intermediate results

 \begin{description}
    \item[Pointwise product of factors] $f(X, Y) \times g(Y, Z) = p(X, Y, Z)$
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.5\textwidth]{img/_pointwise_factors.pdf}
            \caption{Example of pointwise product}
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex
@ -25,7 +25,7 @@
            The pinhole camera is a good approximation of the geometry of the image formation mechanism of modern imaging devices.
        \end{remark}

-        \begin{figure}[h]
+        \begin{figure}[H]
            \begin{subfigure}{.4\textwidth}
                \centering
                \includegraphics[width=0.8\linewidth]{./img/pinhole.png}
@ -205,7 +205,7 @@ Geometric model of a pinhole camera.\\
                to find the object corresponding to $p_L$ in another image, 
                it is sufficient to search along the horizontal axis of $p_L$ looking for the same colors or patterns.

-                \begin{figure}[h]
+                \begin{figure}[H]
                    \centering
                    \includegraphics[width=0.5\textwidth]{./img/stereo_matching.png}
                    \caption{Example of stereo matching}
@ -259,7 +259,7 @@ then its length $l$ in the image plane is:
 In all the other cases (i.e. when the line is not parallel to the image plane), 
 the ratios of lengths and the parallelism of lines are not preserved.

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.25\textwidth]{./img/_perspective_projection_ratio.pdf}
    \caption{Example of not preserved ratios. It holds that $\frac{\overline{AB}}{\overline{BC}} \neq \frac{\overline{ab}}{\overline{bc}}$.}
@ -269,7 +269,7 @@ the ratios of lengths and the parallelism of lines are not preserved.
    \item[Vanishing point] \marginnote{Vanishing point}
        Intersection point of lines that are parallel in the scene but not in the image plane.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.6\textwidth]{./img/_vanishing_point.pdf}
            \caption{Example of vanishing point}
@ -402,7 +402,7 @@ the ratios of lengths and the parallelism of lines are not preserved.

 The image plane of a camera converts the received irradiance into electrical signals.

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.6\textwidth]{./img/_digitalization.pdf}
    \caption{Image digitalization steps}
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex
@ -62,7 +62,7 @@ where $\tilde{I}(p)$ is the real information.

        Alternatively, it can be seen as the amount of overlap between $f(\tau)$ and $g(t - \tau)$.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.4\textwidth]{./img/continuous_convolution_example.png}
            \caption{Example of convolution}
--- a/src/year1/machine-learning-and-data-mining/sections/_classification.tex
+++ b/src/year1/machine-learning-and-data-mining/sections/_classification.tex
@ -30,7 +30,7 @@
        \end{example}

    \item[Data exploration] \marginnote{Data exploration}
-        \begin{figure}[ht]
+        \begin{figure}[H]
            \begin{subfigure}{.5\textwidth}
                \centering
                \includegraphics[width=\linewidth]{img/_iris_boxplot_general.pdf}
@ -137,7 +137,7 @@ As $N$ is at the denominator, this means that for large values of $N$, the uncer
        Note that cross-validation is done on the training set, so a final test set can still be used to
        evaluate the resulting model.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.6\textwidth]{img/cross_validation.png}
            \caption{Cross-validation example}
@ -287,7 +287,7 @@ a macro (unweighted) average or a class-weighted average.
        When the area between the two curves is large and the curve is above the random classifier, 
        the model can be considered a good classifier.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.5\textwidth]{img/lift_chart.png}
            \caption{Example of lift chart}
@ -301,7 +301,7 @@ a macro (unweighted) average or a class-weighted average.
        A straight line is used to represent a random classifier.
        A threshold can be considered good if it is high on the y-axis and low on the x-axis.
        
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.35\textwidth]{img/roc_curve.png}
            \caption{Example of ROC curves}
@ -408,7 +408,7 @@ Possible solutions are:
            \item Classes distribution.
        \end{itemize}

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.5\textwidth]{img/_iris_decision_tree_example.pdf}
            \caption{Example of decision tree}
@ -458,7 +458,7 @@ Possible solutions are:
                Skipped.
        \end{descriptionlist}

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.35\textwidth]{img/impurity_comparison.png}
            \caption{Comparison of impurity measures}
@ -633,7 +633,7 @@ This has complexity $O(h)$, with $h$ the height of the tree.
    \item[Perceptron] \marginnote{Perceptron}
        A single artificial neuron that takes $n$ inputs $x_1, \dots, x_n$ and a bias $b$,
        and computes a linear combination of them with weights $w_1, \dots, w_n, w_b$.
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.25\textwidth]{img/_perceptron.pdf}
            \caption{Example of perceptron}
@ -686,7 +686,7 @@ In practice, a maximum number of iterations is set.
        In general, a subset of points (support vectors) \marginnote{Support vectors} 
        in the training set is sufficient to define the hulls.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.4\textwidth]{img/svm.png}
            \caption{Maximum margin hyperplane of linearly separable data}
@ -724,7 +724,7 @@ For non-linearly separable data, the boundary can be found using a non-linear ma
 to map the data into a new space (feature space) where a linear separation is possible.
 Then, the data and the boundary is mapped back into the original space.

-\begin{figure}[h]
+\begin{figure}[H]
    \begin{subfigure}{0.49\textwidth}
        \centering
        \includegraphics[width=\linewidth]{img/svm_kernel_example1.png}
@ -840,7 +840,7 @@ Train a set of base classifiers and make predictions by majority vote.
 If all the classifiers have the same but independent error rate, 
 the overall error of the ensemble model is lower (derived from a binomial distribution).

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.6\textwidth]{img/ensemble_error.png}
    \caption{Relationship between the error of base classifiers and ensemble models}
--- a/src/year1/machine-learning-and-data-mining/sections/_clustering.tex
+++ b/src/year1/machine-learning-and-data-mining/sections/_clustering.tex
@ -13,7 +13,7 @@
        0 indicates no difference while the upper bound varies.
 \end{description}

-\begin{table}[ht]
+\begin{table}[H]
    \centering
    \renewcommand{\arraystretch}{2}
    \begin{tabular}{c | c | c}
@ -64,7 +64,7 @@ Given two $D$-dimensional data entries $p$ and $q$, possible distance metrics ar
        The Mahalanobis distance of $p$ and $q$ increases when the segment connecting them 
        points towards a direction of greater variation of the data.

-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.35\textwidth]{img/mahalanobis.png}
            \caption{The Mahalanobis distance between $(A, B)$ is greater than $(A, C)$, while the Euclidean distance is the same.}
--- a/src/year1/machine-learning-and-data-mining/sections/_crisp.tex
+++ b/src/year1/machine-learning-and-data-mining/sections/_crisp.tex
@ -3,7 +3,7 @@
 \begin{description}
    \item[\Acl{crisp}] \marginnote{\acs{crisp}}
        Standardized process for data mining.
-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.45\textwidth]{img/crisp.png}
            \caption{\ac{crisp} workflow}
--- a/src/year1/machine-learning-and-data-mining/sections/_data_lake.tex
+++ b/src/year1/machine-learning-and-data-mining/sections/_data_lake.tex
@ -25,7 +25,7 @@
                Less expensive.
        \end{descriptionlist}

-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.5\textwidth]{img/_storage.pdf}
            \caption{Data storage technologies}
@ -155,7 +155,7 @@
    \item[Speed layer] 
        Receives the data and prepares real-time views. The views are also stored in the serving layer.
 \end{description}
-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.5\textwidth]{img/lambda_lake.png}
    \caption{Lambda lake architecture}
@ -165,7 +165,7 @@
 \marginnote{Kappa lake}
 The data are stored in a long-term store.
 Computations only happen in the speed layer (avoids lambda lake redundancy between batch layer and speed layer).
-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.5\textwidth]{img/kappa_lake.png}
    \caption{Kappa lake architecture}
@ -181,7 +181,7 @@ Framework that adds features on top of an existing data lake.
    \item Unified batch and streaming
    \item Schema enforcement
 \end{itemize}
-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.7\textwidth]{img/delta_lake.png}
    \caption{Delta lake architecture}
--- a/src/year1/machine-learning-and-data-mining/sections/_data_warehouse.tex
+++ b/src/year1/machine-learning-and-data-mining/sections/_data_warehouse.tex
@ -34,7 +34,7 @@
        Navigation path created by the operations that a user applied.
 \end{description}

-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.35\textwidth]{img/_olap_cube.pdf}
    \caption{\ac{olap} data cube}
@ -280,13 +280,13 @@ The architecture of a data warehouse should meet the following requirements:
        \end{descriptionlist}
 \end{description}

-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{img/dfm.png}
    \caption{Example of \ac{dfm}}
 \end{figure}

-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.5\textwidth]{img/dfm_events.png}
    \caption{Example of primary and secondary events}
@ -318,7 +318,7 @@ Aggregation operators can be classified as:
 \begin{description}
    \item[Additivity] \marginnote{Additive measure}
    A measure is additive along a dimension if an aggregation operator can be applied. 
-    \begin{table}[ht]
+    \begin{table}[H]
        \centering
        \begin{tabular}{l | c | c}
                                        & \textbf{Temporal hierarchies}                             & \textbf{Non-temporal hierarchies} \\
@ -340,7 +340,7 @@ There are two main strategies:
 \begin{descriptionlist}
    \item[Star schema] \marginnote{Star schema}
        A fact table that contains all the measures is linked to dimensional tables.
-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=\textwidth]{img/logical_star_schema.png}
            \caption{Example of star schema}
--- a/src/year1/machine-learning-and-data-mining/sections/_intro.tex
+++ b/src/year1/machine-learning-and-data-mining/sections/_intro.tex
@ -87,7 +87,7 @@ Different levels of insight can be extracted by:

    \item[Data mining] \marginnote{Data mining}
        Discovery process for unstructured decisions.
-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.8\textwidth]{img/data_mining_process.png}
            \caption{Data mining process}
--- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_finite_numbers.tex
+++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_finite_numbers.tex
@ -16,7 +16,7 @@

    \item[Inherent error] \marginnote{Inherent error}
        Caused by the finite representation of the data (floating-point).
-        \begin{figure}[h]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.6\textwidth]{img/_inherent_error.pdf}
            \caption{Inherent error visualization}
@ -97,7 +97,7 @@ Given a floating-point system $\mathcal{F}(\beta, t, L, U)$, the total amount of
 %
 Representable numbers are more sparse towards the exponent upper bound and more dense towards the lower bound.
 It must be noted that there is an underflow area around 0.
-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{img/floatingpoint_range.png}
    \caption{Floating-point numbers in $\mathcal{F}(2, 3, -1, 2)$}
@ -132,13 +132,13 @@ Depending on the approximation approach, machine precision can be computed as:
 Therefore, rounding results in more accurate representations.

 $\varepsilon_{\text{mach}}$ is the smallest distance among the representable numbers (\Cref{fig:finnum_eps}).
-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.2\textwidth]{img/machine_eps.png}
    \caption{Visualization of $\varepsilon_{\text{mach}}$ in $\mathcal{F}(2, 3, -1, 2)$}
    \label{fig:finnum_eps}
-\end{figure}\\
-%
+\end{figure}
+
 In alternative, $\varepsilon_{\text{mach}}$ can be defined as the smallest representable number such that:
 \begin{equation*}
    \texttt{fl}(1 + \varepsilon_{\text{mach}}) > 1.
--- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_gradient_methods.tex
+++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_gradient_methods.tex
@ -157,7 +157,7 @@ A generic gradient-like method can then be defined as:
    \item[Flat regions and local optima] \marginnote{Flat regions and local optima}
        Flat regions slow down the learning speed,
        while a local optima causes the method to converge at a poor solution.
-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.9\textwidth]{img/_descent_local_flat.pdf}
            \caption{Flat regions and local minima}
@ -194,7 +194,7 @@ A generic gradient-like method can then be defined as:
        A valley in the objective function causes a gradient method to bounce between the sides
        to a point where no significant progress can be made.

-        \begin{figure}[ht]
+        \begin{figure}[H]
            \begin{subfigure}{.5\textwidth}
                \centering
                \includegraphics[width=.30\linewidth]{img/cliff.png}
@ -217,7 +217,7 @@ A generic gradient-like method can then be defined as:
        Informally, a set is convex if, for any two points of the set,
        the points laying on the segment connecting them are also part of the set.

-        \begin{figure}[ht]
+        \begin{figure}[H]
            \begin{subfigure}{.5\textwidth}
                \centering
                \includegraphics[width=.25\linewidth]{img/convex_set.png}
@ -239,7 +239,7 @@ A generic gradient-like method can then be defined as:
        \]

        In other words, the segment connecting two points of the function lays above the graph.
-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.55\textwidth]{img/convex_function.png}
            \caption{Convex function}
--- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex
+++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex
@ -232,7 +232,7 @@ Common norms are:
        
        The vector $\vec{w} \in U^\perp$ s.t. $\Vert \vec{w} \Vert = 1$ is the \textbf{normal vector} of $U$. \marginnote{Normal vector}
        %
-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.4\textwidth]{img/_orthogonal_complement.pdf}
            \caption{Orthogonal complement of a subspace $U \subseteq \mathbb{R}^3$}
--- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_machine_learning.tex
+++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_machine_learning.tex
@ -160,7 +160,7 @@ The parameters are determined as the most likely to predict the correct label gi
                which corresponds to the least squares problem.
        \end{description}

-        \begin{figure}[ht]
+        \begin{figure}[H]
            \begin{subfigure}{.45\textwidth}
                \centering
                \includegraphics[width=.75\linewidth]{img/gaussian_mle_good.png}
--- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex
+++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex
@ -146,7 +146,7 @@ Therefore, the compression factor is given by: \marginnote{Compression factor}
    c_k = 1 - \frac{k(1 + m + n)}{mn}
 \]

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.60\textwidth]{img/_rank_k_approx.pdf}
    \caption{Approximation of an image}
@ -197,7 +197,7 @@ We can formulate this as a linear system:
 that can be solved as a linear least squares problem:
 \[ \min_{\vec{c} \in \mathbb{R}^n} \Vert \vec{y} - \matr{A}\vec{c} \Vert_2^2 \]

-\begin{figure}[h]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.40\textwidth]{img/linear_regression.png}
    \caption{Interpolation using a polynomial of degree 1}
--- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_probability.tex
+++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_probability.tex
@ -504,7 +504,7 @@ Moreover, we have that:
                $\bm{\mu} = \nullvec$ and $\matr{\Sigma} = \matr{I}$ (multivariate).
        \end{description}

-        \begin{figure}[ht]
+        \begin{figure}[H]
            \centering
            \includegraphics[width=0.40\textwidth]{img/normal_distribution.png}
            \caption{Normal distributions and standard normal distribution}
--- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_vector_calculus.tex
+++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_vector_calculus.tex
@ -197,7 +197,7 @@ Each $\vec{f}_i$ takes as input the output of the previous layer $\vec{x}_{i-1}$
 where $\sigma_i$ is an activation function\footnote{\url{https://en.wikipedia.org/wiki/Activation_function}} (a function to add nonlinearity), 
 while $\matr{A}_{i-1}$ (linear mapping) and $\vec{b}_{i-1}$ (biases) are the parameters of $\vec{f}_i$.

-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.7\textwidth]{img/_forward_pass.pdf}
    \caption{Forward pass}
@ -231,7 +231,7 @@ This can be done by using the chain rule to compute the partial derivatives of $
    \end{split}
 \]

-\begin{figure}[ht]
+\begin{figure}[H]
    \centering
    \includegraphics[width=0.7\textwidth]{img/_backward_pass.pdf}
    \caption{Backward pass}