diff --git a/src/ainotes.cls b/src/ainotes.cls index d350d25..5d97ee4 100644 --- a/src/ainotes.cls +++ b/src/ainotes.cls @@ -6,7 +6,7 @@ \usepackage{geometry} \usepackage{graphicx, xcolor} -\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm, upgreek} +\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm, upgreek, cancel} \usepackage{hyperref} \usepackage[nameinlink]{cleveref} \usepackage[all]{hypcap} % Links hyperref to object top and not caption diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_causal_example.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_causal_example.pdf new file mode 100644 index 0000000..135e68d Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_causal_example.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_cpt_graph.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_cpt_graph.pdf new file mode 100644 index 0000000..c964452 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_cpt_graph.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_evidential_example.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_evidential_example.pdf new file mode 100644 index 0000000..87f4bb2 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_evidential_example.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_explainaway_example.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_explainaway_example.pdf new file mode 100644 index 0000000..4d5e0e3 Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_explainaway_example.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_global_semantics_example.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_global_semantics_example.pdf new file mode 100644 index 0000000..97ccf0a Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_global_semantics_example.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/img/_independence_example.pdf b/src/fundamentals-of-ai-and-kr/module3/img/_independence_example.pdf new file mode 100644 index 0000000..a83357f Binary files /dev/null and b/src/fundamentals-of-ai-and-kr/module3/img/_independence_example.pdf differ diff --git a/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex b/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex index 735307d..7ba962e 100644 --- a/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex +++ b/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex @@ -49,11 +49,14 @@ $\textbf{P}(\texttt{Weather}, \texttt{Cavity}) = $ \begin{center} \small - \begin{tabular}{c | cccc} - & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\ + \begin{tabular}{|c | c|c|c|c|} + \cline{2-5} + \multicolumn{1}{c|}{} & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\ \hline \texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\ - \texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 + \hline + \texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 \\ + \hline \end{tabular} \end{center} \end{example} @@ -125,6 +128,7 @@ can be computed as the sum of the atomic events where $\phi$ is true: \multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\ \hline \texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\ + \hline $\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\ \hline \end{tabular} @@ -147,11 +151,14 @@ can be computed as the sum of the atomic events where $\phi$ is true: Given the joint distribution: \begin{center} \small - \begin{tabular}{c | cccc} - & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\ + \begin{tabular}{|c | c|c|c|c|} + \cline{2-5} + \multicolumn{1}{c|}{} & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\ \hline \texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\ - \texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 + \hline + \texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 \\ + \hline \end{tabular} \end{center} We have that $\prob{\texttt{Weather}=\texttt{sunny}} = 0.144 + 0.576$ @@ -176,6 +183,7 @@ can be computed as the sum of the atomic events where $\phi$ is true: \multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\ \hline \texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\ + \hline $\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\ \hline \end{tabular} @@ -183,10 +191,10 @@ can be computed as the sum of the atomic events where $\phi$ is true: We have that: \[ - \textbf{P}(\texttt{cavity} \vert \texttt{toothache}) = + \textbf{P}(\texttt{Cavity} \vert \texttt{toothache}) = \langle \frac{\prob{\texttt{cavity}, \texttt{toothache}, \texttt{catch}}}{\prob{\texttt{toothache}}}, - \frac{\prob{\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}} + \frac{\prob{\lnot\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}} \rangle \] \end{example} @@ -195,9 +203,173 @@ can be computed as the sum of the atomic events where $\phi$ is true: Given a set of query variables $\bm{Y}$, the evidence variables $\vec{e}$ and the other hidden variables $\bm{H}$, the probability of the query can be computed as: \[ - \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) - = \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}, \bm{H}=\vec{h}) + \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y}, \bm{E}=\vec{e}) + = \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y}, \bm{E}=\vec{e}, \bm{H}=\vec{h}) \] The problem of this approach is that it has exponential time and space complexity - which makes it not applicable in practice. + that makes it not applicable in practice. + + To reduce the size of the variables, conditional independence can be exploited. + \begin{example} + Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$, + we can compute the distribution $\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ as follows: + \[ + \begin{split} + \textbf{P}&(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity}) = \\ + &= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Catch}, \texttt{Cavity}) + \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity}) \\ + &= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity}) + \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity}) + \end{split} + \] + $\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ has 7 independent values that grows exponentially + ($2 \cdot 2 \cdot 2 = 8$ values, but one of them can be omitted as a probability always sums up to 1). + + $\textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity})$ + has 5 independent values that grows linearly ($4 + 4 + 2 = 10$, but a value of $\textbf{P}(\texttt{Cavity})$ can be omitted. + The conditional probabilities require two tables (one for each prior) each with 2 values, + but for each table a value can be omitted, therefore requiring $2$ independent values per conditional probability instead of $4$). + \end{example} \end{description} + + + +\section{Bayesian networks} + +\begin{description} + \item[Bayes' rule] \marginnote{Bayes' rule} + \[ \prob{a \,\vert\, b} = \frac{\prob{b \,\vert\, a} \prob{a}}{\prob{b}} \] + + \item[Bayes' rule and conditional independence] + Given the random variables $\texttt{Cause}$ and\\ + $\texttt{Effect}_1, \dots, \texttt{Effect}_n$, with $\texttt{Effect}_i$ independent from each other, + we can compute $\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n)$ as follows: + \[ + \textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n) = + \left(\prod_i \textbf{P}(\texttt{Effect}_i \,\vert\, \texttt{Cause})\right) \textbf{P}(\texttt{Cause}) + \] + The number of parameters is linear. + + \begin{example} + Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$: + \[ + \begin{split} + \textbf{P}&(\texttt{Cavity} \,\vert\, \texttt{toothache} \land \texttt{catch}) \\ + &= \alpha\textbf{P}(\texttt{toothache} \land \texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\ + &= \alpha\textbf{P}(\texttt{toothache} \,\vert\, \texttt{Cavity}) + \textbf{P}(\texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\ + \end{split} + \] + \end{example} + + \item[Bayesian network] \marginnote{Bayesian network} + Graph for conditional independence assertions and a compact specification of full joint distributions. + \begin{itemize} + \item Directed acyclic graph. + \item Nodes represent variables. + \item The conditional distribution of a node is given by its parents + \[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) \] + In other words, if there is an edge from $A$ to $B$, then $A$ (cause) influences $B$ (effect). + \end{itemize} + + \begin{description} + \item[Conditional probability table (CPT)] \marginnote{Conditional probability table (CPT)} + In the case of boolean variables, the conditional distribution of a node can be represented using + a table by considering all the combinations of the parents. + + \begin{example} + Given the boolean variables $A$, $B$ and $C$, with $C$ depending on $A$ and $B$, we have that:\\ + \begin{minipage}{.48\linewidth} + \centering + \includegraphics[width=0.35\linewidth]{img/_cpt_graph.pdf} + \end{minipage} + \begin{minipage}{.48\linewidth} + \centering + \begin{tabular}{c|c|c|c} + A & B & $\prob{c \vert A, B}$ & $\prob{\lnot c \vert A, B}$ \\ + \hline + a & b & $\alpha$ & $1-\alpha$ \\ + $\lnot$a & b & $\beta$ & $1-\beta$ \\ + a & $\lnot$b & $\gamma$ & $1-\gamma$ \\ + $\lnot$a & $\lnot$b & $\delta$ & $1-\delta$ \\ + \end{tabular} + \end{minipage} + \end{example} + \end{description} + + \item[Reasoning patterns] \marginnote{Reasoning patterns} + Given a Bayesian network, the following reasoning patterns can be used: + \begin{descriptionlist} + \item[Causal] \marginnote{Causal reasoning} + To make a prediction. From the cause, derive the effect. + \begin{example} + Knowing $\texttt{Intelligence}$, it is possible to make a prediction of $\texttt{Letter}$. + \begin{center} + \includegraphics[width=0.5\linewidth]{img/_causal_example.pdf} + \end{center} + \end{example} + + \item[Evidential] \marginnote{Evidential reasoning} + To find an explanation. From the effect, derive the cause. + \begin{example} + Knowing $\texttt{Grade}$, it is possible to explain it by estimating\\$\texttt{Intelligence}$. + \begin{center} + \includegraphics[width=0.65\linewidth]{img/_evidential_example.pdf} + \end{center} + \end{example} + + \item[Explain away] \marginnote{Explain away reasoning} + Observation obtained "passing through" other observations. + \begin{example} + Knowing $\texttt{Difficulty}$ and $\texttt{Grade}$, + it is possible to estimate \\$\texttt{Intelligence}$. + + Note that if $\texttt{Grade}$ was not known, + $\texttt{Difficulty}$ and $\texttt{Intelligence}$ would be independent. + \begin{center} + \includegraphics[width=0.70\linewidth]{img/_explainaway_example.pdf} + \end{center} + \end{example} + \end{descriptionlist} + + \item[Global semantics] \marginnote{Global semantics} + Given a Bayesian network, the full joint distribution can be defined as + the product of the local conditional distributions: + \[ \prob{x_1, \dots, x_n} = \prod_{i=1}^{n} \prob{x_i \,\vert\, \texttt{parents}(X_i)} \] + + \begin{example} + Given the following Bayesian network: + + \begin{minipage}{.3\linewidth} + \centering + \includegraphics[width=0.7\linewidth]{img/_global_semantics_example.pdf} + \end{minipage} + \begin{minipage}{.6\linewidth} + \[ + \begin{split} + &\prob{j \land m \land a \land \lnot b \land \lnot e} \\ + &= \prob{\lnot b} \prob{\lnot e} \prob{a \,\vert\, \lnot b, \lnot e} + \prob{j \,\vert\, a} \prob{m \,\vert\, a} + \end{split} + \] + \end{minipage} + \end{example} + + \item[Independence] \marginnote{Bayesian network independence} + Intuitively, an effect is independent from a cause, + if there is another cause in the middle whose value is already known. + \begin{example} + \phantom{} + + \begin{minipage}{.3\linewidth} + \centering + \includegraphics[width=0.75\linewidth]{img/_independence_example.pdf} + \end{minipage} + \begin{minipage}{.6\linewidth} + \[ \textbf{P} \models (\texttt{L} \perp \texttt{D}, \texttt{I}, \texttt{S} \,\vert\, \texttt{G}) \] + \[ \textbf{P} \models (\texttt{S} \perp \texttt{L} \,\vert\, \texttt{G}) \] + \[ \textbf{P} \models (\texttt{S} \perp \texttt{D}) \text{ but } + \textbf{P} \models (\texttt{S} \,\cancel{\perp}\, \texttt{D} \,\vert\, \texttt{G}) \text{ (explain away)} \] + \end{minipage} + \end{example} +\end{description} \ No newline at end of file