mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-18 04:11:47 +01:00
Update example environment style <noupdate>
This commit is contained in:
@ -30,7 +30,7 @@
|
||||
\end{example}
|
||||
|
||||
\item[Data exploration] \marginnote{Data exploration}
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{img/_iris_boxplot_general.pdf}
|
||||
@ -137,7 +137,7 @@ As $N$ is at the denominator, this means that for large values of $N$, the uncer
|
||||
Note that cross-validation is done on the training set, so a final test set can still be used to
|
||||
evaluate the resulting model.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.6\textwidth]{img/cross_validation.png}
|
||||
\caption{Cross-validation example}
|
||||
@ -287,7 +287,7 @@ a macro (unweighted) average or a class-weighted average.
|
||||
When the area between the two curves is large and the curve is above the random classifier,
|
||||
the model can be considered a good classifier.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/lift_chart.png}
|
||||
\caption{Example of lift chart}
|
||||
@ -301,7 +301,7 @@ a macro (unweighted) average or a class-weighted average.
|
||||
A straight line is used to represent a random classifier.
|
||||
A threshold can be considered good if it is high on the y-axis and low on the x-axis.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/roc_curve.png}
|
||||
\caption{Example of ROC curves}
|
||||
@ -408,7 +408,7 @@ Possible solutions are:
|
||||
\item Classes distribution.
|
||||
\end{itemize}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/_iris_decision_tree_example.pdf}
|
||||
\caption{Example of decision tree}
|
||||
@ -458,7 +458,7 @@ Possible solutions are:
|
||||
Skipped.
|
||||
\end{descriptionlist}
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/impurity_comparison.png}
|
||||
\caption{Comparison of impurity measures}
|
||||
@ -633,7 +633,7 @@ This has complexity $O(h)$, with $h$ the height of the tree.
|
||||
\item[Perceptron] \marginnote{Perceptron}
|
||||
A single artificial neuron that takes $n$ inputs $x_1, \dots, x_n$ and a bias $b$,
|
||||
and computes a linear combination of them with weights $w_1, \dots, w_n, w_b$.
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.25\textwidth]{img/_perceptron.pdf}
|
||||
\caption{Example of perceptron}
|
||||
@ -686,7 +686,7 @@ In practice, a maximum number of iterations is set.
|
||||
In general, a subset of points (support vectors) \marginnote{Support vectors}
|
||||
in the training set is sufficient to define the hulls.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.4\textwidth]{img/svm.png}
|
||||
\caption{Maximum margin hyperplane of linearly separable data}
|
||||
@ -724,7 +724,7 @@ For non-linearly separable data, the boundary can be found using a non-linear ma
|
||||
to map the data into a new space (feature space) where a linear separation is possible.
|
||||
Then, the data and the boundary is mapped back into the original space.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\begin{subfigure}{0.49\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{img/svm_kernel_example1.png}
|
||||
@ -840,7 +840,7 @@ Train a set of base classifiers and make predictions by majority vote.
|
||||
If all the classifiers have the same but independent error rate,
|
||||
the overall error of the ensemble model is lower (derived from a binomial distribution).
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.6\textwidth]{img/ensemble_error.png}
|
||||
\caption{Relationship between the error of base classifiers and ensemble models}
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
0 indicates no difference while the upper bound varies.
|
||||
\end{description}
|
||||
|
||||
\begin{table}[ht]
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\renewcommand{\arraystretch}{2}
|
||||
\begin{tabular}{c | c | c}
|
||||
@ -64,7 +64,7 @@ Given two $D$-dimensional data entries $p$ and $q$, possible distance metrics ar
|
||||
The Mahalanobis distance of $p$ and $q$ increases when the segment connecting them
|
||||
points towards a direction of greater variation of the data.
|
||||
|
||||
\begin{figure}[h]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/mahalanobis.png}
|
||||
\caption{The Mahalanobis distance between $(A, B)$ is greater than $(A, C)$, while the Euclidean distance is the same.}
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
\begin{description}
|
||||
\item[\Acl{crisp}] \marginnote{\acs{crisp}}
|
||||
Standardized process for data mining.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.45\textwidth]{img/crisp.png}
|
||||
\caption{\ac{crisp} workflow}
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
Less expensive.
|
||||
\end{descriptionlist}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/_storage.pdf}
|
||||
\caption{Data storage technologies}
|
||||
@ -155,7 +155,7 @@
|
||||
\item[Speed layer]
|
||||
Receives the data and prepares real-time views. The views are also stored in the serving layer.
|
||||
\end{description}
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/lambda_lake.png}
|
||||
\caption{Lambda lake architecture}
|
||||
@ -165,7 +165,7 @@
|
||||
\marginnote{Kappa lake}
|
||||
The data are stored in a long-term store.
|
||||
Computations only happen in the speed layer (avoids lambda lake redundancy between batch layer and speed layer).
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/kappa_lake.png}
|
||||
\caption{Kappa lake architecture}
|
||||
@ -181,7 +181,7 @@ Framework that adds features on top of an existing data lake.
|
||||
\item Unified batch and streaming
|
||||
\item Schema enforcement
|
||||
\end{itemize}
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.7\textwidth]{img/delta_lake.png}
|
||||
\caption{Delta lake architecture}
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
Navigation path created by the operations that a user applied.
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.35\textwidth]{img/_olap_cube.pdf}
|
||||
\caption{\ac{olap} data cube}
|
||||
@ -280,13 +280,13 @@ The architecture of a data warehouse should meet the following requirements:
|
||||
\end{descriptionlist}
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.8\textwidth]{img/dfm.png}
|
||||
\caption{Example of \ac{dfm}}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.5\textwidth]{img/dfm_events.png}
|
||||
\caption{Example of primary and secondary events}
|
||||
@ -318,7 +318,7 @@ Aggregation operators can be classified as:
|
||||
\begin{description}
|
||||
\item[Additivity] \marginnote{Additive measure}
|
||||
A measure is additive along a dimension if an aggregation operator can be applied.
|
||||
\begin{table}[ht]
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\begin{tabular}{l | c | c}
|
||||
& \textbf{Temporal hierarchies} & \textbf{Non-temporal hierarchies} \\
|
||||
@ -340,7 +340,7 @@ There are two main strategies:
|
||||
\begin{descriptionlist}
|
||||
\item[Star schema] \marginnote{Star schema}
|
||||
A fact table that contains all the measures is linked to dimensional tables.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{img/logical_star_schema.png}
|
||||
\caption{Example of star schema}
|
||||
|
||||
@ -87,7 +87,7 @@ Different levels of insight can be extracted by:
|
||||
|
||||
\item[Data mining] \marginnote{Data mining}
|
||||
Discovery process for unstructured decisions.
|
||||
\begin{figure}[ht]
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=0.8\textwidth]{img/data_mining_process.png}
|
||||
\caption{Data mining process}
|
||||
|
||||
Reference in New Issue
Block a user