diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_active_trail.drawio b/src/fundamentals-of-ai-and-kr/module3/img/_active_trail.drawio new file mode 100644 index 0000000..b48e729 --- /dev/null +++ b/src/fundamentals-of-ai-and-kr/module3/img/_active_trail.drawio @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_active_trail.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_active_trail.pdf new file mode 100644 index 0000000..4c30d49 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_active_trail.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.drawio b/src/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.drawio new file mode 100644 index 0000000..ed68c44 --- /dev/null +++ b/src/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.drawio @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.pdf new file mode 100644 index 0000000..5a13308 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_local_independence.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_local_independence.pdf new file mode 100644 index 0000000..bdcf355 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_local_independence.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_markov_blanket.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_markov_blanket.pdf new file mode 100644 index 0000000..838cf4c Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_markov_blanket.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall1.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall1.pdf new file mode 100644 index 0000000..8d01eff Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall1.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall2.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall2.pdf new file mode 100644 index 0000000..a57b38d Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall2.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall3.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall3.pdf new file mode 100644 index 0000000..07c86d6 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_monty_hall3.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_v_structure.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_v_structure.pdf new file mode 100644 index 0000000..596b750 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_v_structure.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/main.tex b/src/fundamentals-of-ai-and-kr/module3/main.tex index 538fa9b..680d7e6 100644 --- a/src/fundamentals-of-ai-and-kr/module3/main.tex +++ b/src/fundamentals-of-ai-and-kr/module3/main.tex @@ -9,5 +9,6 @@ \input{sections/_intro.tex} \input{sections/_probability.tex} + \input{sections/_bayesian_net.tex} \end{document} \ No newline at end of file diff --git a/src/fundamentals-of-ai-and-kr/module3/sections/_bayesian_net.tex b/src/fundamentals-of-ai-and-kr/module3/sections/_bayesian_net.tex new file mode 100644 index 0000000..891b316 --- /dev/null +++ b/src/fundamentals-of-ai-and-kr/module3/sections/_bayesian_net.tex @@ -0,0 +1,282 @@ +\chapter{Bayesian networks} + + +\section{Bayes' rule} + +\begin{description} + \item[Bayes' rule] \marginnote{Bayes' rule} + \[ \prob{a \,\vert\, b} = \frac{\prob{b \,\vert\, a} \prob{a}}{\prob{b}} \] + + \item[Bayes' rule and conditional independence] + Given the random variables $\texttt{Cause}$ and\\ + $\texttt{Effect}_1, \dots, \texttt{Effect}_n$, with $\texttt{Effect}_i$ independent from each other, + we can compute $\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n)$ as follows: + \[ + \textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n) = + \left(\prod_i \textbf{P}(\texttt{Effect}_i \,\vert\, \texttt{Cause})\right) \textbf{P}(\texttt{Cause}) + \] + The number of parameters is linear. + + \begin{example} + Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$: + \[ + \begin{split} + \textbf{P}&(\texttt{Cavity} \,\vert\, \texttt{toothache} \land \texttt{catch}) \\ + &= \alpha\textbf{P}(\texttt{toothache} \land \texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\ + &= \alpha\textbf{P}(\texttt{toothache} \,\vert\, \texttt{Cavity}) + \textbf{P}(\texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\ + \end{split} + \] + \end{example} +\end{description} + + +\section{Bayesian network reasoning} + +\begin{description} + \item[Bayesian network] \marginnote{Bayesian network} + Graph for conditional independence assertions and a compact specification of full joint distributions. + \begin{itemize} + \item Directed acyclic graph. + \item Nodes represent variables. + \item The conditional distribution of a node is given by its parents + \[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) \] + In other words, if there is an edge from $A$ to $B$, then $A$ (cause) influences $B$ (effect). + \end{itemize} + + \begin{description} + \item[Conditional probability table (CPT)] \marginnote{Conditional probability table (CPT)} + In the case of boolean variables, the conditional distribution of a node can be represented using + a table by considering all the combinations of the parents. + + \begin{example} + Given the boolean variables $A$, $B$ and $C$, with $C$ depending on $A$ and $B$, we have that:\\ + \begin{minipage}{.48\linewidth} + \centering + \includegraphics[width=0.35\linewidth]{img/_cpt_graph.pdf} + \end{minipage} + \begin{minipage}{.48\linewidth} + \centering + \begin{tabular}{c|c|c|c} + A & B & $\prob{c \vert A, B}$ & $\prob{\lnot c \vert A, B}$ \\ + \hline + a & b & $\alpha$ & $1-\alpha$ \\ + $\lnot$a & b & $\beta$ & $1-\beta$ \\ + a & $\lnot$b & $\gamma$ & $1-\gamma$ \\ + $\lnot$a & $\lnot$b & $\delta$ & $1-\delta$ \\ + \end{tabular} + \end{minipage} + \end{example} + \end{description} + + \item[Reasoning patterns] \marginnote{Reasoning patterns} + Given a Bayesian network, the following reasoning patterns can be used: + \begin{descriptionlist} + \item[Causal] \marginnote{Causal reasoning} + To make a prediction. From the cause, derive the effect. + \begin{example} + Knowing $\texttt{Intelligence}$, it is possible to make a prediction of $\texttt{Letter}$. + \begin{center} + \includegraphics[width=0.5\linewidth]{img/_causal_example.pdf} + \end{center} + \end{example} + + \item[Evidential] \marginnote{Evidential reasoning} + To find an explanation. From the effect, derive the cause. + \begin{example} + Knowing $\texttt{Grade}$, it is possible to explain it by estimating\\$\texttt{Intelligence}$. + \begin{center} + \includegraphics[width=0.7\linewidth]{img/_evidential_example.pdf} + \end{center} + \end{example} + + \item[Explain away] \marginnote{Explain away reasoning} + Observation obtained "passing through" other observations. + \begin{example} + Knowing $\texttt{Difficulty}$ and $\texttt{Grade}$, + it is possible to estimate \\$\texttt{Intelligence}$. + + Note that if $\texttt{Grade}$ was not known, + $\texttt{Difficulty}$ and $\texttt{Intelligence}$ would be independent. + \begin{center} + \includegraphics[width=0.75\linewidth]{img/_explainaway_example.pdf} + \end{center} + \end{example} + \end{descriptionlist} + + \item[Independence] \marginnote{Bayesian network independence} + Intuitively, an effect is independent from a cause, + if there is another cause in the middle whose value is already known. + \begin{example} + \phantom{} + + \begin{minipage}{.3\linewidth} + \centering + \includegraphics[width=0.85\linewidth]{img/_independence_example.pdf} + \end{minipage} + \begin{minipage}{.6\linewidth} + \[ \textbf{P} \models (\texttt{L} \perp \texttt{D}, \texttt{I}, \texttt{S} \,\vert\, \texttt{G}) \] + \[ \textbf{P} \models (\texttt{S} \perp \texttt{L} \,\vert\, \texttt{G}) \] + \[ \textbf{P} \models (\texttt{S} \perp \texttt{D}) \text{ but } + \textbf{P} \models (\texttt{S} \,\cancel{\perp}\, \texttt{D} \,\vert\, \texttt{G}) \text{ (explain away)} \] + \end{minipage} + \end{example} + + + \item[V-structure] \marginnote{V-structure} + Effect with two causes. + If the effect is not in the evidence, the causes are independent. + + \begin{figure}[H] + \centering + \includegraphics[width=0.2\textwidth]{img/_v_structure.pdf} + \caption{V-structure} + \end{figure} + + \item[Active two-edge trail] \marginnote{Active two-edge trail} + The trail $X \leftrightharpoons Z \leftrightharpoons Y$ is active either if: + \begin{itemize} + \item $X$, $Z$, $Y$ is a v-structure $X \rightarrow Z \leftarrow Y$ + and $Z$ or one of its children is in the evidence. + \item $Z$ is not in the evidence. + \end{itemize} + In other words, influence can flow from $X$ to $Y$ passing by $Z$. + + \begin{figure}[h] + \centering + \includegraphics[width=0.65\textwidth]{img/_active_trail.pdf} + \caption{Example of active and non-active two-edge trails} + \end{figure} + + \item[Active trail] \marginnote{Active trail} + A trail $X_1 \leftrightharpoons \dots \leftrightharpoons X_n$ is active iff + each two-edge trail $X_{i-1} \leftrightharpoons X_i \leftrightharpoons X_{i+1}$ along the trail is active. + + \item[D-separation] \marginnote{D-separation} + Two sets of nodes $\vec{X}$ and $\vec{Y}$ are d-separated given the evidence $\vec{Z}$ if + there is no active trail between any $X \in \vec{X}$ and $Y \in \vec{Y}$. + + \begin{theorem} + Two d-separated nodes are independent. + In other words, two nodes are independent if there is no active trail between them. + \end{theorem} + + \item[Independence algorithm] \phantom{} + \begin{description} + \item[Blocked node] + A node is blocked if it blocks the flow. + This happens if one and only one of the following conditions are met: + \begin{itemize} + \item The node is in the middle of an unmarked v-structure. + \item The node is in the evidence. + \end{itemize} + \end{description} + To determine if $X \perp Y$ given the evidence $\vec{Z}$: + \begin{enumerate} + \item Traverse the graph bottom-up marking all nodes in $\vec{Z}$ or + having a child in $\vec{Z}$. + \item Find a path from $X$ to $Y$ that does not pass through a blocked node. + \item If $Y$ is not reachable from $X$, then $X$ and $Y$ are independent. + Otherwise $X$ and $Y$ are dependent. + \end{enumerate} + + \begin{example} + To determine if $J \perp D$: + \begin{center} + \includegraphics[width=0.5\textwidth]{img/_d_sep_example.pdf} + \end{center} + As a path has been found, $J \,\cancel{\perp}\, D$. + \end{example} + + + \item[Global semantics] \marginnote{Global semantics} + Given a Bayesian network, the full joint distribution can be defined as + the product of the local conditional distributions: + \[ \prob{x_1, \dots, x_n} = \prod_{i=1}^{n} \prob{x_i \,\vert\, \texttt{parents}(X_i)} \] + + \begin{example} + Given the following Bayesian network: + + \begin{minipage}{.3\linewidth} + \centering + \includegraphics[width=0.7\linewidth]{img/_global_semantics_example.pdf} + \end{minipage} + \begin{minipage}{.6\linewidth} + \[ + \begin{split} + &\prob{j \land m \land a \land \lnot b \land \lnot e} \\ + &= \prob{\lnot b} \prob{\lnot e} \prob{a \,\vert\, \lnot b, \lnot e} + \prob{j \,\vert\, a} \prob{m \,\vert\, a} + \end{split} + \] + \end{minipage} + \end{example} + + \item[Local semantics] + Each node is conditionally independent of its non-descendants given its parents. + \begin{figure}[h] + \centering + \includegraphics[width=0.35\textwidth]{img/_local_independence.pdf} + \caption{Local independence} + \end{figure} + + \begin{theorem} + Local semantics $\iff$ Global semantics + \end{theorem} + + + \item[Markov blanket] + Each node is conditionally independent of all other nodes + if its Markov blanket (parents, children, children's parents) is in the evidence. + \begin{figure}[h] + \centering + \includegraphics[width=0.35\textwidth]{img/_markov_blanket.pdf} + \caption{Markov blanket} + \end{figure} +\end{description} + + + +\section{Building Bayesian networks} + +The following algorithm can be used to construct a Bayesian network of $n$ random variables: +\begin{enumerate} + \item Choose an ordering of the variables $X_1, \dots, X_n$. + \item For $i=1, \dots, n$: + \begin{itemize} + \item Add $X_i$ to the network. + \item Select the parents of $X_i$ from $X_1, \dots, X_{i-1}$ such that: + \[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) = + \textbf{P}(X_i \,\vert\, X_1, \dots, X_{i-1}) \] + \end{itemize} +\end{enumerate} +By construction, this algorithm guarantees the global semantics. + +\begin{example}[Monty Hall] + The variables are: + \begin{itemize} + \item $G$: the choice of the guest. + \item $H$: the choice of the host. + \item $P$: the position of the prize. + \end{itemize} + Note that $P \perp G$. + Let the order be fixed as follows: $P$, $G$, $H$. + + \begin{figure}[h] + \begin{subfigure}{.3\textwidth} + \centering + \includegraphics[width=0.2\linewidth]{img/_monty_hall1.pdf} + \caption{First interaction} + \end{subfigure} + \begin{subfigure}{.3\textwidth} + \centering + \includegraphics[width=0.6\linewidth]{img/_monty_hall2.pdf} + \caption{Second interaction (note that $P \perp G$)} + \end{subfigure} + \begin{subfigure}{.3\textwidth} + \centering + \includegraphics[width=0.6\linewidth]{img/_monty_hall3.pdf} + \caption{Third interaction} + \end{subfigure} + \end{figure} +\end{example} \ No newline at end of file diff --git a/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex b/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex index 7ba962e..0e39af0 100644 --- a/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex +++ b/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex @@ -234,142 +234,3 @@ can be computed as the sum of the atomic events where $\phi$ is true: -\section{Bayesian networks} - -\begin{description} - \item[Bayes' rule] \marginnote{Bayes' rule} - \[ \prob{a \,\vert\, b} = \frac{\prob{b \,\vert\, a} \prob{a}}{\prob{b}} \] - - \item[Bayes' rule and conditional independence] - Given the random variables $\texttt{Cause}$ and\\ - $\texttt{Effect}_1, \dots, \texttt{Effect}_n$, with $\texttt{Effect}_i$ independent from each other, - we can compute $\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n)$ as follows: - \[ - \textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n) = - \left(\prod_i \textbf{P}(\texttt{Effect}_i \,\vert\, \texttt{Cause})\right) \textbf{P}(\texttt{Cause}) - \] - The number of parameters is linear. - - \begin{example} - Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$: - \[ - \begin{split} - \textbf{P}&(\texttt{Cavity} \,\vert\, \texttt{toothache} \land \texttt{catch}) \\ - &= \alpha\textbf{P}(\texttt{toothache} \land \texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\ - &= \alpha\textbf{P}(\texttt{toothache} \,\vert\, \texttt{Cavity}) - \textbf{P}(\texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\ - \end{split} - \] - \end{example} - - \item[Bayesian network] \marginnote{Bayesian network} - Graph for conditional independence assertions and a compact specification of full joint distributions. - \begin{itemize} - \item Directed acyclic graph. - \item Nodes represent variables. - \item The conditional distribution of a node is given by its parents - \[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) \] - In other words, if there is an edge from $A$ to $B$, then $A$ (cause) influences $B$ (effect). - \end{itemize} - - \begin{description} - \item[Conditional probability table (CPT)] \marginnote{Conditional probability table (CPT)} - In the case of boolean variables, the conditional distribution of a node can be represented using - a table by considering all the combinations of the parents. - - \begin{example} - Given the boolean variables $A$, $B$ and $C$, with $C$ depending on $A$ and $B$, we have that:\\ - \begin{minipage}{.48\linewidth} - \centering - \includegraphics[width=0.35\linewidth]{img/_cpt_graph.pdf} - \end{minipage} - \begin{minipage}{.48\linewidth} - \centering - \begin{tabular}{c|c|c|c} - A & B & $\prob{c \vert A, B}$ & $\prob{\lnot c \vert A, B}$ \\ - \hline - a & b & $\alpha$ & $1-\alpha$ \\ - $\lnot$a & b & $\beta$ & $1-\beta$ \\ - a & $\lnot$b & $\gamma$ & $1-\gamma$ \\ - $\lnot$a & $\lnot$b & $\delta$ & $1-\delta$ \\ - \end{tabular} - \end{minipage} - \end{example} - \end{description} - - \item[Reasoning patterns] \marginnote{Reasoning patterns} - Given a Bayesian network, the following reasoning patterns can be used: - \begin{descriptionlist} - \item[Causal] \marginnote{Causal reasoning} - To make a prediction. From the cause, derive the effect. - \begin{example} - Knowing $\texttt{Intelligence}$, it is possible to make a prediction of $\texttt{Letter}$. - \begin{center} - \includegraphics[width=0.5\linewidth]{img/_causal_example.pdf} - \end{center} - \end{example} - - \item[Evidential] \marginnote{Evidential reasoning} - To find an explanation. From the effect, derive the cause. - \begin{example} - Knowing $\texttt{Grade}$, it is possible to explain it by estimating\\$\texttt{Intelligence}$. - \begin{center} - \includegraphics[width=0.65\linewidth]{img/_evidential_example.pdf} - \end{center} - \end{example} - - \item[Explain away] \marginnote{Explain away reasoning} - Observation obtained "passing through" other observations. - \begin{example} - Knowing $\texttt{Difficulty}$ and $\texttt{Grade}$, - it is possible to estimate \\$\texttt{Intelligence}$. - - Note that if $\texttt{Grade}$ was not known, - $\texttt{Difficulty}$ and $\texttt{Intelligence}$ would be independent. - \begin{center} - \includegraphics[width=0.70\linewidth]{img/_explainaway_example.pdf} - \end{center} - \end{example} - \end{descriptionlist} - - \item[Global semantics] \marginnote{Global semantics} - Given a Bayesian network, the full joint distribution can be defined as - the product of the local conditional distributions: - \[ \prob{x_1, \dots, x_n} = \prod_{i=1}^{n} \prob{x_i \,\vert\, \texttt{parents}(X_i)} \] - - \begin{example} - Given the following Bayesian network: - - \begin{minipage}{.3\linewidth} - \centering - \includegraphics[width=0.7\linewidth]{img/_global_semantics_example.pdf} - \end{minipage} - \begin{minipage}{.6\linewidth} - \[ - \begin{split} - &\prob{j \land m \land a \land \lnot b \land \lnot e} \\ - &= \prob{\lnot b} \prob{\lnot e} \prob{a \,\vert\, \lnot b, \lnot e} - \prob{j \,\vert\, a} \prob{m \,\vert\, a} - \end{split} - \] - \end{minipage} - \end{example} - - \item[Independence] \marginnote{Bayesian network independence} - Intuitively, an effect is independent from a cause, - if there is another cause in the middle whose value is already known. - \begin{example} - \phantom{} - - \begin{minipage}{.3\linewidth} - \centering - \includegraphics[width=0.75\linewidth]{img/_independence_example.pdf} - \end{minipage} - \begin{minipage}{.6\linewidth} - \[ \textbf{P} \models (\texttt{L} \perp \texttt{D}, \texttt{I}, \texttt{S} \,\vert\, \texttt{G}) \] - \[ \textbf{P} \models (\texttt{S} \perp \texttt{L} \,\vert\, \texttt{G}) \] - \[ \textbf{P} \models (\texttt{S} \perp \texttt{D}) \text{ but } - \textbf{P} \models (\texttt{S} \,\cancel{\perp}\, \texttt{D} \,\vert\, \texttt{G}) \text{ (explain away)} \] - \end{minipage} - \end{example} -\end{description} \ No newline at end of file