mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-14 18:51:52 +01:00
Update example environment style <noupdate>
This commit is contained in:
@ -21,6 +21,10 @@
|
||||
\usepackage{eurosym}
|
||||
\usepackage{bussproofs} % Deductive tree
|
||||
\usepackage{varwidth}
|
||||
\usepackage[most]{tcolorbox}
|
||||
\usepackage{tikz}
|
||||
\tcbuselibrary{breakable}
|
||||
\usetikzlibrary{decorations.pathmorphing,calc}
|
||||
|
||||
\geometry{ margin=3cm, lmargin=1.5cm, rmargin=4.5cm, marginparwidth=3cm }
|
||||
\hypersetup{ colorlinks, citecolor=black, filecolor=black, linkcolor=black, urlcolor=black, linktoc=all }
|
||||
@ -48,6 +52,7 @@
|
||||
\lstset{style=mystyle}
|
||||
\lstset{language=Python}
|
||||
|
||||
|
||||
\NewDocumentEnvironment{descriptionlist}{}{%
|
||||
\begin{description}[labelindent=1em]
|
||||
}{
|
||||
@ -57,15 +62,38 @@
|
||||
\renewcommand*{\marginfont}{\color{gray}\footnotesize}
|
||||
\renewcommand*\chapterpagestyle{scrheadings} % Header in chapter pages
|
||||
|
||||
|
||||
\theoremstyle{definition}
|
||||
\newtheorem{theorem}{Theorem}[section]
|
||||
\newtheorem{corollary}{Corollary}[theorem]
|
||||
\newtheorem{lemma}[theorem]{Lemma}
|
||||
\newtheorem*{example}{Example}
|
||||
\newtheorem*{privateexample}{Example}
|
||||
\theoremstyle{definition}
|
||||
\newtheorem*{definition}{Def}
|
||||
\newtheorem*{remark}{Remark}
|
||||
|
||||
\newtcolorbox{marginbar}[3]{ % #1: color | #2: (number of lines - 1) | #3: line thickness
|
||||
enhanced, blank, breakable,
|
||||
overlay = {
|
||||
\foreach \t in {0,...,#2}{
|
||||
\draw[decorate, #3, #1]
|
||||
([xshift=-3-\t mm]frame.north west)
|
||||
--
|
||||
([xshift=-3-\t mm]frame.south west);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
\newenvironment{example}{%
|
||||
\begin{marginbar}{lightgray}{0}{thick}
|
||||
\begin{privateexample}
|
||||
}{%
|
||||
\end{privateexample}
|
||||
\end{marginbar}
|
||||
}
|
||||
|
||||
|
||||
|
||||
\newcommand{\ubar}[1]{\text{\b{$#1$}}}
|
||||
\renewcommand{\vec}[1]{{\bm{\mathbf{#1}}}}
|
||||
\newcommand{\nullvec}[0]{\bar{\vec{0}}}
|
||||
|
||||
@ -18,7 +18,14 @@
|
||||
\DeclareAcronym{cs}{short=CS, long=conditioned stimulus}
|
||||
\DeclareAcronym{cr}{short=CR, long=conditioned response}
|
||||
|
||||
\newtheorem*{casestudy}{Case study}
|
||||
\newtheorem*{privatecasestudy}{Case study}
|
||||
\newenvironment{casestudy}{%
|
||||
\begin{marginbar}{olive}{0}{thick}
|
||||
\begin{privatecasestudy}
|
||||
}{%
|
||||
\end{privatecasestudy}
|
||||
\end{marginbar}
|
||||
}
|
||||
|
||||
\begin{document}
|
||||
|
||||
|
||||
@ -108,7 +108,7 @@ Generally, a neuron does the following:
|
||||
\item[Cell body/soma] Metabolic center of the cell.
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/neuron_eukaryotic.png}
|
||||
\caption{Neuron as an eukaryotic cell}
|
||||
@ -151,7 +151,7 @@ There are three types of synapses:
|
||||
\item[Axoaxonic] \marginnote{Axoaxonic}
|
||||
Synapses that a neuron makes onto the synapses of another neuron.
|
||||
In this case, the transmitting neuron can be seen as a signal modulator of the receiving neuron.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.3\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{./img/axosomatic.png}
|
||||
@ -215,7 +215,7 @@ In a neuron, there are four regions that handle signals:
|
||||
\item[Electrical synapses] The \ac{ap} is directly transmitted to the next neurons.
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.8\textwidth]{./img/neuron_transmission.png}
|
||||
\caption{Transmitting regions of different types of neurons}
|
||||
@ -286,7 +286,7 @@ In a neuron, there are four regions that handle signals:
|
||||
\end{remark}
|
||||
\end{enumerate}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.8\textwidth]{./img/neuron_transmission2.png}
|
||||
\caption{
|
||||
|
||||
@ -7,7 +7,14 @@
|
||||
\def\lastupdate{{PLACEHOLDER-LAST-UPDATE}}
|
||||
\def\giturl{{PLACEHOLDER-GIT-URL}}
|
||||
|
||||
\newtheorem*{casestudy}{Case study}
|
||||
\newtheorem*{privatecasestudy}{Case study}
|
||||
\newenvironment{casestudy}{%
|
||||
\begin{marginbar}{olive}{0}{thick}
|
||||
\begin{privatecasestudy}
|
||||
}{%
|
||||
\end{privatecasestudy}
|
||||
\end{marginbar}
|
||||
}
|
||||
|
||||
\begin{document}
|
||||
|
||||
|
||||
@ -12,7 +12,7 @@ It considers the player as the entity that maximizes (\textsc{Max}) its utility
|
||||
the opponent as the entity that (optimally) minimizes (\textsc{Min}) the utility of the player.
|
||||
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/_minmax.pdf}
|
||||
\caption{Example of game tree with propagated scores}
|
||||
@ -121,7 +121,7 @@ In the average case of a random distribution, the reduction is of order $O(b^{3d
|
||||
\end{lstlisting}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.3\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{img/alphabeta_algo_example1.png}
|
||||
|
||||
@ -113,7 +113,7 @@ Intelligence is defined as the ability to perceive or infer information and to r
|
||||
\marginnote{Perceptron}
|
||||
A neuron (\textbf{perceptron}) computes a weighted sum of its inputs and
|
||||
passes the result to an activation function to produce the output.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.40\textwidth]{img/neuron.png}
|
||||
\caption{Representation of an artificial neuron}
|
||||
@ -128,21 +128,21 @@ The expressivity of a neural network increases when more neurons are used:
|
||||
\begin{descriptionlist}
|
||||
\item[Single perceptron]
|
||||
Able to compute a linear separation.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.25\textwidth]{img/1perceptron.png}
|
||||
\caption{Separation performed by one perceptron}
|
||||
\end{figure}
|
||||
\item[Three-layer network]
|
||||
Able to separate a convex region ($n_\text{edges} \leq n_\text{hidden neurons}$)
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.90\textwidth]{img/3layer.png}
|
||||
\caption{Separation performed by a three-layer network}
|
||||
\end{figure}
|
||||
\item[Four-layer network]
|
||||
Able to separate regions of arbitrary shape.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.40\textwidth]{img/4layer.png}
|
||||
\caption{Separation performed by a four-layer network}
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
Problem: find a Hamiltonian tour of minimum cost in an undirected graph.
|
||||
|
||||
A possible neighborhood of a state applies the $k$-exchange that guarantees to maintain a Hamiltonian tour.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=.70\linewidth]{img/tsp_2-exchange.png}
|
||||
@ -78,7 +78,7 @@ Can be seen as a search process over graphs:
|
||||
\item[Neighborhood graph] The search space topology.
|
||||
\item[Search graph] The explored space.
|
||||
\end{descriptionlist}
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=.55\linewidth]{img/_local_search_neigh_graph.pdf}
|
||||
@ -197,7 +197,7 @@ Population based meta heuristics are built on the following concepts:
|
||||
\item[Natural selection] Fit organisms have many offspring while others become extinct.
|
||||
\end{descriptionlist}
|
||||
|
||||
\begin{table}[ht]
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\begin{tabular}{c | c}
|
||||
\textbf{Biology} & \textbf{Artificial intelligence} \\
|
||||
@ -224,7 +224,7 @@ The following terminology will be used:
|
||||
\item[Alleles] Domain of values of a gene.
|
||||
\end{descriptionlist}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/_genetic_terminology.pdf}
|
||||
\caption{}
|
||||
@ -270,7 +270,7 @@ Genetic operators are:
|
||||
\end{descriptionlist}
|
||||
\end{example}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.4\textwidth]{img/_genetic_cycle.pdf}
|
||||
\caption{Evolutionary cycle}
|
||||
|
||||
@ -423,7 +423,7 @@ At each step, one of the following refinement operations can be applied until th
|
||||
\item Add a causal link to the set of causal links.
|
||||
\end{itemize}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.45\textwidth]{img/_nonlinear_plan_example.pdf}
|
||||
\caption{Example of search tree in non-linear planning}
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
A leaf can be a state to expand, a solution or a dead-end.
|
||||
\Cref{alg:search_tree_search} describes a generic tree search algorithm.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.25\textwidth]{img/_search_tree.pdf}
|
||||
\caption{Search tree}
|
||||
@ -122,7 +122,7 @@ Always expands the least deep node. The fringe is implemented as a queue (FIFO).
|
||||
|
||||
The exponential space complexity makes BFS impractical for large problems.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.30\textwidth]{img/_bfs.pdf}
|
||||
\caption{BFS visit order}
|
||||
@ -147,7 +147,7 @@ Same as BFS, but always expands the node with the lowest cumulative cost.
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.50\textwidth]{img/_ucs.pdf}
|
||||
\caption{Uniform-cost search visit order. $(n)$ is the cumulative cost}
|
||||
@ -175,7 +175,7 @@ Always expands the deepest node. The fringe is implemented as a stack (LIFO).
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.30\textwidth]{img/_dfs.pdf}
|
||||
\caption{DFS visit order}
|
||||
@ -261,7 +261,7 @@ The fringe is ordered according to the estimated scores.
|
||||
\end{center}
|
||||
% The complexity can be reduced depending on the heuristic.
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.65\textwidth]{img/_greedy_best_first_example.pdf}
|
||||
\caption{Hill climbing visit order}
|
||||
@ -337,7 +337,7 @@ The fringe is ordered according to the estimated scores.
|
||||
|
||||
In general, it is better to use heuristics with large values (i.e. heuristics that don't underestimate too much).
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.65\textwidth]{img/_a_start_example.pdf}
|
||||
\caption{A$^*$ visit order}
|
||||
|
||||
@ -160,7 +160,7 @@ A property of objects.
|
||||
\marginnote{Semantic networks}
|
||||
Graphical representation of objects and categories connected through labeled links.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.4\textwidth]{img/semantic_network.png}
|
||||
\caption{Example of semantic network}
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
\texttt{http://www.example.org/index.html} has a \texttt{creator} with staff id \texttt{85740}.
|
||||
\end{example}
|
||||
|
||||
\item[XML]
|
||||
\item[XML] \phantom{}
|
||||
\begin{example} \phantom{}
|
||||
\begin{lstlisting}[mathescape=true, language=xml]
|
||||
<rdf:RDF
|
||||
|
||||
@ -142,7 +142,7 @@
|
||||
\end{itemize}
|
||||
In other words, influence can flow from $X$ to $Y$ passing by $Z$.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.65\textwidth]{img/_active_trail.pdf}
|
||||
\caption{Example of active and non-active two-edge trails}
|
||||
@ -214,7 +214,7 @@
|
||||
|
||||
\item[Local semantics]
|
||||
Each node is conditionally independent of its non-descendants given its parents.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/_local_independence.pdf}
|
||||
\caption{Local independence}
|
||||
@ -228,7 +228,7 @@
|
||||
\item[Markov blanket]
|
||||
Each node is conditionally independent of all the other nodes
|
||||
if its Markov blanket (parents, children, children's parents) is in the evidence.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/_markov_blanket.pdf}
|
||||
\caption{Markov blanket}
|
||||
@ -263,7 +263,7 @@ By construction, this algorithm guarantees the global semantics.
|
||||
Note that $P \perp G$.
|
||||
Let the order be fixed as follows: $P$, $G$, $H$.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.3\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=0.15\linewidth]{img/_monty_hall1.pdf}
|
||||
@ -423,7 +423,7 @@ the number of variables in a conditional probability table.
|
||||
Noisy-OR distributions model a network of non-interacting causes with a common effect.
|
||||
A node $X$ has $k$ parents $U_1, \dots, U_k$ and possibly a leak node $U_L$ to capture unmodeled concepts.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.3\textwidth]{img/_noisy_or_example.pdf}
|
||||
\caption{Example of noisy-OR network}
|
||||
@ -536,7 +536,7 @@ Possible approaches are:
|
||||
\item[Dynamic Bayesian network] \marginnote{Dynamic Bayesian network}
|
||||
Useful to model the evolution through time.
|
||||
A template variable $X_i$ is instantiated as $X_i^{(t)}$ at each time step.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.3\textwidth]{img/_dynamic_bn_example.pdf}
|
||||
\caption{Example of dynamic Bayesian network}
|
||||
|
||||
@ -41,7 +41,7 @@ Method that carries out summations right-to-left and stores intermediate results
|
||||
|
||||
\begin{description}
|
||||
\item[Pointwise product of factors] $f(X, Y) \times g(Y, Z) = p(X, Y, Z)$
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/_pointwise_factors.pdf}
|
||||
\caption{Example of pointwise product}
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
The pinhole camera is a good approximation of the geometry of the image formation mechanism of modern imaging devices.
|
||||
\end{remark}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.4\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=0.8\linewidth]{./img/pinhole.png}
|
||||
@ -205,7 +205,7 @@ Geometric model of a pinhole camera.\\
|
||||
to find the object corresponding to $p_L$ in another image,
|
||||
it is sufficient to search along the horizontal axis of $p_L$ looking for the same colors or patterns.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{./img/stereo_matching.png}
|
||||
\caption{Example of stereo matching}
|
||||
@ -259,7 +259,7 @@ then its length $l$ in the image plane is:
|
||||
In all the other cases (i.e. when the line is not parallel to the image plane),
|
||||
the ratios of lengths and the parallelism of lines are not preserved.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.25\textwidth]{./img/_perspective_projection_ratio.pdf}
|
||||
\caption{Example of not preserved ratios. It holds that $\frac{\overline{AB}}{\overline{BC}} \neq \frac{\overline{ab}}{\overline{bc}}$.}
|
||||
@ -269,7 +269,7 @@ the ratios of lengths and the parallelism of lines are not preserved.
|
||||
\item[Vanishing point] \marginnote{Vanishing point}
|
||||
Intersection point of lines that are parallel in the scene but not in the image plane.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.6\textwidth]{./img/_vanishing_point.pdf}
|
||||
\caption{Example of vanishing point}
|
||||
@ -402,7 +402,7 @@ the ratios of lengths and the parallelism of lines are not preserved.
|
||||
|
||||
The image plane of a camera converts the received irradiance into electrical signals.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.6\textwidth]{./img/_digitalization.pdf}
|
||||
\caption{Image digitalization steps}
|
||||
|
||||
@ -62,7 +62,7 @@ where $\tilde{I}(p)$ is the real information.
|
||||
|
||||
Alternatively, it can be seen as the amount of overlap between $f(\tau)$ and $g(t - \tau)$.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.4\textwidth]{./img/continuous_convolution_example.png}
|
||||
\caption{Example of convolution}
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
\end{example}
|
||||
|
||||
\item[Data exploration] \marginnote{Data exploration}
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{img/_iris_boxplot_general.pdf}
|
||||
@ -137,7 +137,7 @@ As $N$ is at the denominator, this means that for large values of $N$, the uncer
|
||||
Note that cross-validation is done on the training set, so a final test set can still be used to
|
||||
evaluate the resulting model.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.6\textwidth]{img/cross_validation.png}
|
||||
\caption{Cross-validation example}
|
||||
@ -287,7 +287,7 @@ a macro (unweighted) average or a class-weighted average.
|
||||
When the area between the two curves is large and the curve is above the random classifier,
|
||||
the model can be considered a good classifier.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/lift_chart.png}
|
||||
\caption{Example of lift chart}
|
||||
@ -301,7 +301,7 @@ a macro (unweighted) average or a class-weighted average.
|
||||
A straight line is used to represent a random classifier.
|
||||
A threshold can be considered good if it is high on the y-axis and low on the x-axis.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/roc_curve.png}
|
||||
\caption{Example of ROC curves}
|
||||
@ -408,7 +408,7 @@ Possible solutions are:
|
||||
\item Classes distribution.
|
||||
\end{itemize}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/_iris_decision_tree_example.pdf}
|
||||
\caption{Example of decision tree}
|
||||
@ -458,7 +458,7 @@ Possible solutions are:
|
||||
Skipped.
|
||||
\end{descriptionlist}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/impurity_comparison.png}
|
||||
\caption{Comparison of impurity measures}
|
||||
@ -633,7 +633,7 @@ This has complexity $O(h)$, with $h$ the height of the tree.
|
||||
\item[Perceptron] \marginnote{Perceptron}
|
||||
A single artificial neuron that takes $n$ inputs $x_1, \dots, x_n$ and a bias $b$,
|
||||
and computes a linear combination of them with weights $w_1, \dots, w_n, w_b$.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.25\textwidth]{img/_perceptron.pdf}
|
||||
\caption{Example of perceptron}
|
||||
@ -686,7 +686,7 @@ In practice, a maximum number of iterations is set.
|
||||
In general, a subset of points (support vectors) \marginnote{Support vectors}
|
||||
in the training set is sufficient to define the hulls.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.4\textwidth]{img/svm.png}
|
||||
\caption{Maximum margin hyperplane of linearly separable data}
|
||||
@ -724,7 +724,7 @@ For non-linearly separable data, the boundary can be found using a non-linear ma
|
||||
to map the data into a new space (feature space) where a linear separation is possible.
|
||||
Then, the data and the boundary is mapped back into the original space.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{0.49\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{img/svm_kernel_example1.png}
|
||||
@ -840,7 +840,7 @@ Train a set of base classifiers and make predictions by majority vote.
|
||||
If all the classifiers have the same but independent error rate,
|
||||
the overall error of the ensemble model is lower (derived from a binomial distribution).
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.6\textwidth]{img/ensemble_error.png}
|
||||
\caption{Relationship between the error of base classifiers and ensemble models}
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
0 indicates no difference while the upper bound varies.
|
||||
\end{description}
|
||||
|
||||
\begin{table}[ht]
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\renewcommand{\arraystretch}{2}
|
||||
\begin{tabular}{c | c | c}
|
||||
@ -64,7 +64,7 @@ Given two $D$-dimensional data entries $p$ and $q$, possible distance metrics ar
|
||||
The Mahalanobis distance of $p$ and $q$ increases when the segment connecting them
|
||||
points towards a direction of greater variation of the data.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/mahalanobis.png}
|
||||
\caption{The Mahalanobis distance between $(A, B)$ is greater than $(A, C)$, while the Euclidean distance is the same.}
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
\begin{description}
|
||||
\item[\Acl{crisp}] \marginnote{\acs{crisp}}
|
||||
Standardized process for data mining.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.45\textwidth]{img/crisp.png}
|
||||
\caption{\ac{crisp} workflow}
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
Less expensive.
|
||||
\end{descriptionlist}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/_storage.pdf}
|
||||
\caption{Data storage technologies}
|
||||
@ -155,7 +155,7 @@
|
||||
\item[Speed layer]
|
||||
Receives the data and prepares real-time views. The views are also stored in the serving layer.
|
||||
\end{description}
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/lambda_lake.png}
|
||||
\caption{Lambda lake architecture}
|
||||
@ -165,7 +165,7 @@
|
||||
\marginnote{Kappa lake}
|
||||
The data are stored in a long-term store.
|
||||
Computations only happen in the speed layer (avoids lambda lake redundancy between batch layer and speed layer).
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/kappa_lake.png}
|
||||
\caption{Kappa lake architecture}
|
||||
@ -181,7 +181,7 @@ Framework that adds features on top of an existing data lake.
|
||||
\item Unified batch and streaming
|
||||
\item Schema enforcement
|
||||
\end{itemize}
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.7\textwidth]{img/delta_lake.png}
|
||||
\caption{Delta lake architecture}
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
Navigation path created by the operations that a user applied.
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/_olap_cube.pdf}
|
||||
\caption{\ac{olap} data cube}
|
||||
@ -280,13 +280,13 @@ The architecture of a data warehouse should meet the following requirements:
|
||||
\end{descriptionlist}
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.8\textwidth]{img/dfm.png}
|
||||
\caption{Example of \ac{dfm}}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/dfm_events.png}
|
||||
\caption{Example of primary and secondary events}
|
||||
@ -318,7 +318,7 @@ Aggregation operators can be classified as:
|
||||
\begin{description}
|
||||
\item[Additivity] \marginnote{Additive measure}
|
||||
A measure is additive along a dimension if an aggregation operator can be applied.
|
||||
\begin{table}[ht]
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\begin{tabular}{l | c | c}
|
||||
& \textbf{Temporal hierarchies} & \textbf{Non-temporal hierarchies} \\
|
||||
@ -340,7 +340,7 @@ There are two main strategies:
|
||||
\begin{descriptionlist}
|
||||
\item[Star schema] \marginnote{Star schema}
|
||||
A fact table that contains all the measures is linked to dimensional tables.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{img/logical_star_schema.png}
|
||||
\caption{Example of star schema}
|
||||
|
||||
@ -87,7 +87,7 @@ Different levels of insight can be extracted by:
|
||||
|
||||
\item[Data mining] \marginnote{Data mining}
|
||||
Discovery process for unstructured decisions.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.8\textwidth]{img/data_mining_process.png}
|
||||
\caption{Data mining process}
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
\item[Inherent error] \marginnote{Inherent error}
|
||||
Caused by the finite representation of the data (floating-point).
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.6\textwidth]{img/_inherent_error.pdf}
|
||||
\caption{Inherent error visualization}
|
||||
@ -97,7 +97,7 @@ Given a floating-point system $\mathcal{F}(\beta, t, L, U)$, the total amount of
|
||||
%
|
||||
Representable numbers are more sparse towards the exponent upper bound and more dense towards the lower bound.
|
||||
It must be noted that there is an underflow area around 0.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.8\textwidth]{img/floatingpoint_range.png}
|
||||
\caption{Floating-point numbers in $\mathcal{F}(2, 3, -1, 2)$}
|
||||
@ -132,13 +132,13 @@ Depending on the approximation approach, machine precision can be computed as:
|
||||
Therefore, rounding results in more accurate representations.
|
||||
|
||||
$\varepsilon_{\text{mach}}$ is the smallest distance among the representable numbers (\Cref{fig:finnum_eps}).
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.2\textwidth]{img/machine_eps.png}
|
||||
\caption{Visualization of $\varepsilon_{\text{mach}}$ in $\mathcal{F}(2, 3, -1, 2)$}
|
||||
\label{fig:finnum_eps}
|
||||
\end{figure}\\
|
||||
%
|
||||
\end{figure}
|
||||
|
||||
In alternative, $\varepsilon_{\text{mach}}$ can be defined as the smallest representable number such that:
|
||||
\begin{equation*}
|
||||
\texttt{fl}(1 + \varepsilon_{\text{mach}}) > 1.
|
||||
|
||||
@ -157,7 +157,7 @@ A generic gradient-like method can then be defined as:
|
||||
\item[Flat regions and local optima] \marginnote{Flat regions and local optima}
|
||||
Flat regions slow down the learning speed,
|
||||
while a local optima causes the method to converge at a poor solution.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.9\textwidth]{img/_descent_local_flat.pdf}
|
||||
\caption{Flat regions and local minima}
|
||||
@ -194,7 +194,7 @@ A generic gradient-like method can then be defined as:
|
||||
A valley in the objective function causes a gradient method to bounce between the sides
|
||||
to a point where no significant progress can be made.
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=.30\linewidth]{img/cliff.png}
|
||||
@ -217,7 +217,7 @@ A generic gradient-like method can then be defined as:
|
||||
Informally, a set is convex if, for any two points of the set,
|
||||
the points laying on the segment connecting them are also part of the set.
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=.25\linewidth]{img/convex_set.png}
|
||||
@ -239,7 +239,7 @@ A generic gradient-like method can then be defined as:
|
||||
\]
|
||||
|
||||
In other words, the segment connecting two points of the function lays above the graph.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.55\textwidth]{img/convex_function.png}
|
||||
\caption{Convex function}
|
||||
|
||||
@ -232,7 +232,7 @@ Common norms are:
|
||||
|
||||
The vector $\vec{w} \in U^\perp$ s.t. $\Vert \vec{w} \Vert = 1$ is the \textbf{normal vector} of $U$. \marginnote{Normal vector}
|
||||
%
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.4\textwidth]{img/_orthogonal_complement.pdf}
|
||||
\caption{Orthogonal complement of a subspace $U \subseteq \mathbb{R}^3$}
|
||||
|
||||
@ -160,7 +160,7 @@ The parameters are determined as the most likely to predict the correct label gi
|
||||
which corresponds to the least squares problem.
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.45\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=.75\linewidth]{img/gaussian_mle_good.png}
|
||||
|
||||
@ -146,7 +146,7 @@ Therefore, the compression factor is given by: \marginnote{Compression factor}
|
||||
c_k = 1 - \frac{k(1 + m + n)}{mn}
|
||||
\]
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.60\textwidth]{img/_rank_k_approx.pdf}
|
||||
\caption{Approximation of an image}
|
||||
@ -197,7 +197,7 @@ We can formulate this as a linear system:
|
||||
that can be solved as a linear least squares problem:
|
||||
\[ \min_{\vec{c} \in \mathbb{R}^n} \Vert \vec{y} - \matr{A}\vec{c} \Vert_2^2 \]
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.40\textwidth]{img/linear_regression.png}
|
||||
\caption{Interpolation using a polynomial of degree 1}
|
||||
|
||||
@ -504,7 +504,7 @@ Moreover, we have that:
|
||||
$\bm{\mu} = \nullvec$ and $\matr{\Sigma} = \matr{I}$ (multivariate).
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.40\textwidth]{img/normal_distribution.png}
|
||||
\caption{Normal distributions and standard normal distribution}
|
||||
|
||||
@ -197,7 +197,7 @@ Each $\vec{f}_i$ takes as input the output of the previous layer $\vec{x}_{i-1}$
|
||||
where $\sigma_i$ is an activation function\footnote{\url{https://en.wikipedia.org/wiki/Activation_function}} (a function to add nonlinearity),
|
||||
while $\matr{A}_{i-1}$ (linear mapping) and $\vec{b}_{i-1}$ (biases) are the parameters of $\vec{f}_i$.
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.7\textwidth]{img/_forward_pass.pdf}
|
||||
\caption{Forward pass}
|
||||
@ -231,7 +231,7 @@ This can be done by using the chain rule to compute the partial derivatives of $
|
||||
\end{split}
|
||||
\]
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.7\textwidth]{img/_backward_pass.pdf}
|
||||
\caption{Backward pass}
|
||||
|
||||
Reference in New Issue
Block a user