diff --git a/src/ainotes.cls b/src/ainotes.cls index 7763c7f..166a2dd 100644 --- a/src/ainotes.cls +++ b/src/ainotes.cls @@ -21,6 +21,10 @@ \usepackage{eurosym} \usepackage{bussproofs} % Deductive tree \usepackage{varwidth} +\usepackage[most]{tcolorbox} +\usepackage{tikz} +\tcbuselibrary{breakable} +\usetikzlibrary{decorations.pathmorphing,calc} \geometry{ margin=3cm, lmargin=1.5cm, rmargin=4.5cm, marginparwidth=3cm } \hypersetup{ colorlinks, citecolor=black, filecolor=black, linkcolor=black, urlcolor=black, linktoc=all } @@ -48,6 +52,7 @@ \lstset{style=mystyle} \lstset{language=Python} + \NewDocumentEnvironment{descriptionlist}{}{% \begin{description}[labelindent=1em] }{ @@ -57,15 +62,38 @@ \renewcommand*{\marginfont}{\color{gray}\footnotesize} \renewcommand*\chapterpagestyle{scrheadings} % Header in chapter pages + \theoremstyle{definition} \newtheorem{theorem}{Theorem}[section] \newtheorem{corollary}{Corollary}[theorem] \newtheorem{lemma}[theorem]{Lemma} -\newtheorem*{example}{Example} +\newtheorem*{privateexample}{Example} \theoremstyle{definition} \newtheorem*{definition}{Def} \newtheorem*{remark}{Remark} +\newtcolorbox{marginbar}[3]{ % #1: color | #2: (number of lines - 1) | #3: line thickness + enhanced, blank, breakable, + overlay = { + \foreach \t in {0,...,#2}{ + \draw[decorate, #3, #1] + ([xshift=-3-\t mm]frame.north west) + -- + ([xshift=-3-\t mm]frame.south west); + }, + } +} + +\newenvironment{example}{% + \begin{marginbar}{lightgray}{0}{thick} + \begin{privateexample} +}{% + \end{privateexample} + \end{marginbar} +} + + + \newcommand{\ubar}[1]{\text{\b{$#1$}}} \renewcommand{\vec}[1]{{\bm{\mathbf{#1}}}} \newcommand{\nullvec}[0]{\bar{\vec{0}}} diff --git a/src/year1/cognition-and-neuroscience/module1/cn1.tex b/src/year1/cognition-and-neuroscience/module1/cn1.tex index ba922dd..a39c692 100644 --- a/src/year1/cognition-and-neuroscience/module1/cn1.tex +++ b/src/year1/cognition-and-neuroscience/module1/cn1.tex @@ -18,7 +18,14 @@ \DeclareAcronym{cs}{short=CS, long=conditioned stimulus} \DeclareAcronym{cr}{short=CR, long=conditioned response} -\newtheorem*{casestudy}{Case study} +\newtheorem*{privatecasestudy}{Case study} +\newenvironment{casestudy}{% + \begin{marginbar}{olive}{0}{thick} + \begin{privatecasestudy} +}{% + \end{privatecasestudy} + \end{marginbar} +} \begin{document} diff --git a/src/year1/cognition-and-neuroscience/module1/sections/_nervous_system.tex b/src/year1/cognition-and-neuroscience/module1/sections/_nervous_system.tex index d996611..d30d2e0 100644 --- a/src/year1/cognition-and-neuroscience/module1/sections/_nervous_system.tex +++ b/src/year1/cognition-and-neuroscience/module1/sections/_nervous_system.tex @@ -108,7 +108,7 @@ Generally, a neuron does the following: \item[Cell body/soma] Metabolic center of the cell. \end{description} - \begin{figure}[h] + \begin{figure}[H] \centering \includegraphics[width=0.5\textwidth]{img/neuron_eukaryotic.png} \caption{Neuron as an eukaryotic cell} @@ -151,7 +151,7 @@ There are three types of synapses: \item[Axoaxonic] \marginnote{Axoaxonic} Synapses that a neuron makes onto the synapses of another neuron. In this case, the transmitting neuron can be seen as a signal modulator of the receiving neuron. - \begin{figure}[h] + \begin{figure}[H] \begin{subfigure}{.3\textwidth} \centering \includegraphics[width=\linewidth]{./img/axosomatic.png} @@ -215,7 +215,7 @@ In a neuron, there are four regions that handle signals: \item[Electrical synapses] The \ac{ap} is directly transmitted to the next neurons. \end{description} - \begin{figure}[h] + \begin{figure}[H] \centering \includegraphics[width=0.8\textwidth]{./img/neuron_transmission.png} \caption{Transmitting regions of different types of neurons} @@ -286,7 +286,7 @@ In a neuron, there are four regions that handle signals: \end{remark} \end{enumerate} - \begin{figure}[h] + \begin{figure}[H] \centering \includegraphics[width=0.8\textwidth]{./img/neuron_transmission2.png} \caption{ diff --git a/src/year1/cognition-and-neuroscience/module2/cn2.tex b/src/year1/cognition-and-neuroscience/module2/cn2.tex index 875361a..ed93baf 100644 --- a/src/year1/cognition-and-neuroscience/module2/cn2.tex +++ b/src/year1/cognition-and-neuroscience/module2/cn2.tex @@ -7,7 +7,14 @@ \def\lastupdate{{PLACEHOLDER-LAST-UPDATE}} \def\giturl{{PLACEHOLDER-GIT-URL}} -\newtheorem*{casestudy}{Case study} +\newtheorem*{privatecasestudy}{Case study} +\newenvironment{casestudy}{% + \begin{marginbar}{olive}{0}{thick} + \begin{privatecasestudy} +}{% + \end{privatecasestudy} + \end{marginbar} +} \begin{document} diff --git a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_games.tex b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_games.tex index 161d297..2c49a91 100644 --- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_games.tex +++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_games.tex @@ -12,7 +12,7 @@ It considers the player as the entity that maximizes (\textsc{Max}) its utility the opponent as the entity that (optimally) minimizes (\textsc{Min}) the utility of the player. -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.5\textwidth]{img/_minmax.pdf} \caption{Example of game tree with propagated scores} @@ -121,7 +121,7 @@ In the average case of a random distribution, the reduction is of order $O(b^{3d \end{lstlisting} \end{algorithm} -\begin{figure}[h] +\begin{figure}[H] \begin{subfigure}{.3\textwidth} \centering \includegraphics[width=\linewidth]{img/alphabeta_algo_example1.png} diff --git a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_intro.tex b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_intro.tex index d48028b..a31c893 100644 --- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_intro.tex +++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_intro.tex @@ -113,7 +113,7 @@ Intelligence is defined as the ability to perceive or infer information and to r \marginnote{Perceptron} A neuron (\textbf{perceptron}) computes a weighted sum of its inputs and passes the result to an activation function to produce the output. -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.40\textwidth]{img/neuron.png} \caption{Representation of an artificial neuron} @@ -128,21 +128,21 @@ The expressivity of a neural network increases when more neurons are used: \begin{descriptionlist} \item[Single perceptron] Able to compute a linear separation. - \begin{figure}[h] + \begin{figure}[H] \centering \includegraphics[width=0.25\textwidth]{img/1perceptron.png} \caption{Separation performed by one perceptron} \end{figure} \item[Three-layer network] Able to separate a convex region ($n_\text{edges} \leq n_\text{hidden neurons}$) - \begin{figure}[h] + \begin{figure}[H] \centering \includegraphics[width=0.90\textwidth]{img/3layer.png} \caption{Separation performed by a three-layer network} \end{figure} \item[Four-layer network] Able to separate regions of arbitrary shape. - \begin{figure}[h] + \begin{figure}[H] \centering \includegraphics[width=0.40\textwidth]{img/4layer.png} \caption{Separation performed by a four-layer network} diff --git a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_local_search.tex b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_local_search.tex index d9ea07f..9e8f128 100644 --- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_local_search.tex +++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_local_search.tex @@ -15,7 +15,7 @@ Problem: find a Hamiltonian tour of minimum cost in an undirected graph. A possible neighborhood of a state applies the $k$-exchange that guarantees to maintain a Hamiltonian tour. - \begin{figure}[ht] + \begin{figure}[H] \begin{subfigure}{.5\textwidth} \centering \includegraphics[width=.70\linewidth]{img/tsp_2-exchange.png} @@ -78,7 +78,7 @@ Can be seen as a search process over graphs: \item[Neighborhood graph] The search space topology. \item[Search graph] The explored space. \end{descriptionlist} -\begin{figure}[ht] +\begin{figure}[H] \begin{subfigure}{.5\textwidth} \centering \includegraphics[width=.55\linewidth]{img/_local_search_neigh_graph.pdf} @@ -197,7 +197,7 @@ Population based meta heuristics are built on the following concepts: \item[Natural selection] Fit organisms have many offspring while others become extinct. \end{descriptionlist} -\begin{table}[ht] +\begin{table}[H] \centering \begin{tabular}{c | c} \textbf{Biology} & \textbf{Artificial intelligence} \\ @@ -224,7 +224,7 @@ The following terminology will be used: \item[Alleles] Domain of values of a gene. \end{descriptionlist} -\begin{figure}[ht] +\begin{figure}[H] \centering \includegraphics[width=0.5\textwidth]{img/_genetic_terminology.pdf} \caption{} @@ -270,7 +270,7 @@ Genetic operators are: \end{descriptionlist} \end{example} -\begin{figure}[ht] +\begin{figure}[H] \centering \includegraphics[width=0.4\textwidth]{img/_genetic_cycle.pdf} \caption{Evolutionary cycle} diff --git a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_planning.tex b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_planning.tex index 58b018a..5758f04 100644 --- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_planning.tex +++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_planning.tex @@ -423,7 +423,7 @@ At each step, one of the following refinement operations can be applied until th \item Add a causal link to the set of causal links. \end{itemize} -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.45\textwidth]{img/_nonlinear_plan_example.pdf} \caption{Example of search tree in non-linear planning} diff --git a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_search.tex b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_search.tex index df8409d..269181d 100644 --- a/src/year1/fundamentals-of-ai-and-kr/module1/sections/_search.tex +++ b/src/year1/fundamentals-of-ai-and-kr/module1/sections/_search.tex @@ -30,7 +30,7 @@ A leaf can be a state to expand, a solution or a dead-end. \Cref{alg:search_tree_search} describes a generic tree search algorithm. - \begin{figure}[h] + \begin{figure}[H] \centering \includegraphics[width=0.25\textwidth]{img/_search_tree.pdf} \caption{Search tree} @@ -122,7 +122,7 @@ Always expands the least deep node. The fringe is implemented as a queue (FIFO). The exponential space complexity makes BFS impractical for large problems. -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.30\textwidth]{img/_bfs.pdf} \caption{BFS visit order} @@ -147,7 +147,7 @@ Same as BFS, but always expands the node with the lowest cumulative cost. \end{tabular} \end{center} -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.50\textwidth]{img/_ucs.pdf} \caption{Uniform-cost search visit order. $(n)$ is the cumulative cost} @@ -175,7 +175,7 @@ Always expands the deepest node. The fringe is implemented as a stack (LIFO). \end{tabular} \end{center} -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.30\textwidth]{img/_dfs.pdf} \caption{DFS visit order} @@ -261,7 +261,7 @@ The fringe is ordered according to the estimated scores. \end{center} % The complexity can be reduced depending on the heuristic. - \begin{figure}[ht] + \begin{figure}[H] \centering \includegraphics[width=0.65\textwidth]{img/_greedy_best_first_example.pdf} \caption{Hill climbing visit order} @@ -337,7 +337,7 @@ The fringe is ordered according to the estimated scores. In general, it is better to use heuristics with large values (i.e. heuristics that don't underestimate too much). - \begin{figure}[ht] + \begin{figure}[H] \centering \includegraphics[width=0.65\textwidth]{img/_a_start_example.pdf} \caption{A$^*$ visit order} diff --git a/src/year1/fundamentals-of-ai-and-kr/module2/sections/_ontologies.tex b/src/year1/fundamentals-of-ai-and-kr/module2/sections/_ontologies.tex index f909a22..fbe91e6 100644 --- a/src/year1/fundamentals-of-ai-and-kr/module2/sections/_ontologies.tex +++ b/src/year1/fundamentals-of-ai-and-kr/module2/sections/_ontologies.tex @@ -160,7 +160,7 @@ A property of objects. \marginnote{Semantic networks} Graphical representation of objects and categories connected through labeled links. -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.4\textwidth]{img/semantic_network.png} \caption{Example of semantic network} diff --git a/src/year1/fundamentals-of-ai-and-kr/module2/sections/_semantic_web.tex b/src/year1/fundamentals-of-ai-and-kr/module2/sections/_semantic_web.tex index 800e3a5..fb51274 100644 --- a/src/year1/fundamentals-of-ai-and-kr/module2/sections/_semantic_web.tex +++ b/src/year1/fundamentals-of-ai-and-kr/module2/sections/_semantic_web.tex @@ -55,7 +55,7 @@ \texttt{http://www.example.org/index.html} has a \texttt{creator} with staff id \texttt{85740}. \end{example} - \item[XML] + \item[XML] \phantom{} \begin{example} \phantom{} \begin{lstlisting}[mathescape=true, language=xml] 1. diff --git a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_gradient_methods.tex b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_gradient_methods.tex index 9ce0e39..31583f3 100644 --- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_gradient_methods.tex +++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_gradient_methods.tex @@ -157,7 +157,7 @@ A generic gradient-like method can then be defined as: \item[Flat regions and local optima] \marginnote{Flat regions and local optima} Flat regions slow down the learning speed, while a local optima causes the method to converge at a poor solution. - \begin{figure}[ht] + \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{img/_descent_local_flat.pdf} \caption{Flat regions and local minima} @@ -194,7 +194,7 @@ A generic gradient-like method can then be defined as: A valley in the objective function causes a gradient method to bounce between the sides to a point where no significant progress can be made. - \begin{figure}[ht] + \begin{figure}[H] \begin{subfigure}{.5\textwidth} \centering \includegraphics[width=.30\linewidth]{img/cliff.png} @@ -217,7 +217,7 @@ A generic gradient-like method can then be defined as: Informally, a set is convex if, for any two points of the set, the points laying on the segment connecting them are also part of the set. - \begin{figure}[ht] + \begin{figure}[H] \begin{subfigure}{.5\textwidth} \centering \includegraphics[width=.25\linewidth]{img/convex_set.png} @@ -239,7 +239,7 @@ A generic gradient-like method can then be defined as: \] In other words, the segment connecting two points of the function lays above the graph. - \begin{figure}[ht] + \begin{figure}[H] \centering \includegraphics[width=0.55\textwidth]{img/convex_function.png} \caption{Convex function} diff --git a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex index b4b0442..72e6bef 100644 --- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex +++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_linear_algebra.tex @@ -232,7 +232,7 @@ Common norms are: The vector $\vec{w} \in U^\perp$ s.t. $\Vert \vec{w} \Vert = 1$ is the \textbf{normal vector} of $U$. \marginnote{Normal vector} % - \begin{figure}[ht] + \begin{figure}[H] \centering \includegraphics[width=0.4\textwidth]{img/_orthogonal_complement.pdf} \caption{Orthogonal complement of a subspace $U \subseteq \mathbb{R}^3$} diff --git a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_machine_learning.tex b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_machine_learning.tex index c67e561..27cf356 100644 --- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_machine_learning.tex +++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_machine_learning.tex @@ -160,7 +160,7 @@ The parameters are determined as the most likely to predict the correct label gi which corresponds to the least squares problem. \end{description} - \begin{figure}[ht] + \begin{figure}[H] \begin{subfigure}{.45\textwidth} \centering \includegraphics[width=.75\linewidth]{img/gaussian_mle_good.png} diff --git a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex index 93cda94..ef2cb08 100644 --- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex +++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_matrix_decomp.tex @@ -146,7 +146,7 @@ Therefore, the compression factor is given by: \marginnote{Compression factor} c_k = 1 - \frac{k(1 + m + n)}{mn} \] -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.60\textwidth]{img/_rank_k_approx.pdf} \caption{Approximation of an image} @@ -197,7 +197,7 @@ We can formulate this as a linear system: that can be solved as a linear least squares problem: \[ \min_{\vec{c} \in \mathbb{R}^n} \Vert \vec{y} - \matr{A}\vec{c} \Vert_2^2 \] -\begin{figure}[h] +\begin{figure}[H] \centering \includegraphics[width=0.40\textwidth]{img/linear_regression.png} \caption{Interpolation using a polynomial of degree 1} diff --git a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_probability.tex b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_probability.tex index a000c38..4512326 100644 --- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_probability.tex +++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_probability.tex @@ -504,7 +504,7 @@ Moreover, we have that: $\bm{\mu} = \nullvec$ and $\matr{\Sigma} = \matr{I}$ (multivariate). \end{description} - \begin{figure}[ht] + \begin{figure}[H] \centering \includegraphics[width=0.40\textwidth]{img/normal_distribution.png} \caption{Normal distributions and standard normal distribution} diff --git a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_vector_calculus.tex b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_vector_calculus.tex index a1eb5df..588a1a9 100644 --- a/src/year1/statistical-and-mathematical-methods-for-ai/sections/_vector_calculus.tex +++ b/src/year1/statistical-and-mathematical-methods-for-ai/sections/_vector_calculus.tex @@ -197,7 +197,7 @@ Each $\vec{f}_i$ takes as input the output of the previous layer $\vec{x}_{i-1}$ where $\sigma_i$ is an activation function\footnote{\url{https://en.wikipedia.org/wiki/Activation_function}} (a function to add nonlinearity), while $\matr{A}_{i-1}$ (linear mapping) and $\vec{b}_{i-1}$ (biases) are the parameters of $\vec{f}_i$. -\begin{figure}[ht] +\begin{figure}[H] \centering \includegraphics[width=0.7\textwidth]{img/_forward_pass.pdf} \caption{Forward pass} @@ -231,7 +231,7 @@ This can be done by using the chain rule to compute the partial derivatives of $ \end{split} \] -\begin{figure}[ht] +\begin{figure}[H] \centering \includegraphics[width=0.7\textwidth]{img/_backward_pass.pdf} \caption{Backward pass}