Add FAIKR3 Bayesian networks

This commit is contained in:
2023-10-27 13:40:10 +02:00
parent 3eedb2eaa8
commit b2a6979ab4
8 changed files with 184 additions and 12 deletions

View File

@ -6,7 +6,7 @@
\usepackage{geometry}
\usepackage{graphicx, xcolor}
\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm, upgreek}
\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm, upgreek, cancel}
\usepackage{hyperref}
\usepackage[nameinlink]{cleveref}
\usepackage[all]{hypcap} % Links hyperref to object top and not caption

View File

@ -49,11 +49,14 @@
$\textbf{P}(\texttt{Weather}, \texttt{Cavity}) = $
\begin{center}
\small
\begin{tabular}{c | cccc}
& \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
\begin{tabular}{|c | c|c|c|c|}
\cline{2-5}
\multicolumn{1}{c|}{} & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
\hline
\texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08
\hline
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 \\
\hline
\end{tabular}
\end{center}
\end{example}
@ -125,6 +128,7 @@ can be computed as the sum of the atomic events where $\phi$ is true:
\multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\
\hline
\texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\
\hline
$\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\
\hline
\end{tabular}
@ -147,11 +151,14 @@ can be computed as the sum of the atomic events where $\phi$ is true:
Given the joint distribution:
\begin{center}
\small
\begin{tabular}{c | cccc}
& \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
\begin{tabular}{|c | c|c|c|c|}
\cline{2-5}
\multicolumn{1}{c|}{} & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
\hline
\texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08
\hline
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 \\
\hline
\end{tabular}
\end{center}
We have that $\prob{\texttt{Weather}=\texttt{sunny}} = 0.144 + 0.576$
@ -176,6 +183,7 @@ can be computed as the sum of the atomic events where $\phi$ is true:
\multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\
\hline
\texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\
\hline
$\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\
\hline
\end{tabular}
@ -183,10 +191,10 @@ can be computed as the sum of the atomic events where $\phi$ is true:
We have that:
\[
\textbf{P}(\texttt{cavity} \vert \texttt{toothache}) =
\textbf{P}(\texttt{Cavity} \vert \texttt{toothache}) =
\langle
\frac{\prob{\texttt{cavity}, \texttt{toothache}, \texttt{catch}}}{\prob{\texttt{toothache}}},
\frac{\prob{\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}}
\frac{\prob{\lnot\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}}
\rangle
\]
\end{example}
@ -195,9 +203,173 @@ can be computed as the sum of the atomic events where $\phi$ is true:
Given a set of query variables $\bm{Y}$, the evidence variables $\vec{e}$ and the other hidden variables $\bm{H}$,
the probability of the query can be computed as:
\[
\textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e})
= \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}, \bm{H}=\vec{h})
\textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y}, \bm{E}=\vec{e})
= \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y}, \bm{E}=\vec{e}, \bm{H}=\vec{h})
\]
The problem of this approach is that it has exponential time and space complexity
which makes it not applicable in practice.
that makes it not applicable in practice.
To reduce the size of the variables, conditional independence can be exploited.
\begin{example}
Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$,
we can compute the distribution $\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ as follows:
\[
\begin{split}
\textbf{P}&(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity}) = \\
&= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Catch}, \texttt{Cavity})
\textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity}) \\
&= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity})
\textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity})
\end{split}
\]
$\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ has 7 independent values that grows exponentially
($2 \cdot 2 \cdot 2 = 8$ values, but one of them can be omitted as a probability always sums up to 1).
$\textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity})$
has 5 independent values that grows linearly ($4 + 4 + 2 = 10$, but a value of $\textbf{P}(\texttt{Cavity})$ can be omitted.
The conditional probabilities require two tables (one for each prior) each with 2 values,
but for each table a value can be omitted, therefore requiring $2$ independent values per conditional probability instead of $4$).
\end{example}
\end{description}
\section{Bayesian networks}
\begin{description}
\item[Bayes' rule] \marginnote{Bayes' rule}
\[ \prob{a \,\vert\, b} = \frac{\prob{b \,\vert\, a} \prob{a}}{\prob{b}} \]
\item[Bayes' rule and conditional independence]
Given the random variables $\texttt{Cause}$ and\\
$\texttt{Effect}_1, \dots, \texttt{Effect}_n$, with $\texttt{Effect}_i$ independent from each other,
we can compute $\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n)$ as follows:
\[
\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n) =
\left(\prod_i \textbf{P}(\texttt{Effect}_i \,\vert\, \texttt{Cause})\right) \textbf{P}(\texttt{Cause})
\]
The number of parameters is linear.
\begin{example}
Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$:
\[
\begin{split}
\textbf{P}&(\texttt{Cavity} \,\vert\, \texttt{toothache} \land \texttt{catch}) \\
&= \alpha\textbf{P}(\texttt{toothache} \land \texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\
&= \alpha\textbf{P}(\texttt{toothache} \,\vert\, \texttt{Cavity})
\textbf{P}(\texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\
\end{split}
\]
\end{example}
\item[Bayesian network] \marginnote{Bayesian network}
Graph for conditional independence assertions and a compact specification of full joint distributions.
\begin{itemize}
\item Directed acyclic graph.
\item Nodes represent variables.
\item The conditional distribution of a node is given by its parents
\[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) \]
In other words, if there is an edge from $A$ to $B$, then $A$ (cause) influences $B$ (effect).
\end{itemize}
\begin{description}
\item[Conditional probability table (CPT)] \marginnote{Conditional probability table (CPT)}
In the case of boolean variables, the conditional distribution of a node can be represented using
a table by considering all the combinations of the parents.
\begin{example}
Given the boolean variables $A$, $B$ and $C$, with $C$ depending on $A$ and $B$, we have that:\\
\begin{minipage}{.48\linewidth}
\centering
\includegraphics[width=0.35\linewidth]{img/_cpt_graph.pdf}
\end{minipage}
\begin{minipage}{.48\linewidth}
\centering
\begin{tabular}{c|c|c|c}
A & B & $\prob{c \vert A, B}$ & $\prob{\lnot c \vert A, B}$ \\
\hline
a & b & $\alpha$ & $1-\alpha$ \\
$\lnot$a & b & $\beta$ & $1-\beta$ \\
a & $\lnot$b & $\gamma$ & $1-\gamma$ \\
$\lnot$a & $\lnot$b & $\delta$ & $1-\delta$ \\
\end{tabular}
\end{minipage}
\end{example}
\end{description}
\item[Reasoning patterns] \marginnote{Reasoning patterns}
Given a Bayesian network, the following reasoning patterns can be used:
\begin{descriptionlist}
\item[Causal] \marginnote{Causal reasoning}
To make a prediction. From the cause, derive the effect.
\begin{example}
Knowing $\texttt{Intelligence}$, it is possible to make a prediction of $\texttt{Letter}$.
\begin{center}
\includegraphics[width=0.5\linewidth]{img/_causal_example.pdf}
\end{center}
\end{example}
\item[Evidential] \marginnote{Evidential reasoning}
To find an explanation. From the effect, derive the cause.
\begin{example}
Knowing $\texttt{Grade}$, it is possible to explain it by estimating\\$\texttt{Intelligence}$.
\begin{center}
\includegraphics[width=0.65\linewidth]{img/_evidential_example.pdf}
\end{center}
\end{example}
\item[Explain away] \marginnote{Explain away reasoning}
Observation obtained "passing through" other observations.
\begin{example}
Knowing $\texttt{Difficulty}$ and $\texttt{Grade}$,
it is possible to estimate \\$\texttt{Intelligence}$.
Note that if $\texttt{Grade}$ was not known,
$\texttt{Difficulty}$ and $\texttt{Intelligence}$ would be independent.
\begin{center}
\includegraphics[width=0.70\linewidth]{img/_explainaway_example.pdf}
\end{center}
\end{example}
\end{descriptionlist}
\item[Global semantics] \marginnote{Global semantics}
Given a Bayesian network, the full joint distribution can be defined as
the product of the local conditional distributions:
\[ \prob{x_1, \dots, x_n} = \prod_{i=1}^{n} \prob{x_i \,\vert\, \texttt{parents}(X_i)} \]
\begin{example}
Given the following Bayesian network:
\begin{minipage}{.3\linewidth}
\centering
\includegraphics[width=0.7\linewidth]{img/_global_semantics_example.pdf}
\end{minipage}
\begin{minipage}{.6\linewidth}
\[
\begin{split}
&\prob{j \land m \land a \land \lnot b \land \lnot e} \\
&= \prob{\lnot b} \prob{\lnot e} \prob{a \,\vert\, \lnot b, \lnot e}
\prob{j \,\vert\, a} \prob{m \,\vert\, a}
\end{split}
\]
\end{minipage}
\end{example}
\item[Independence] \marginnote{Bayesian network independence}
Intuitively, an effect is independent from a cause,
if there is another cause in the middle whose value is already known.
\begin{example}
\phantom{}
\begin{minipage}{.3\linewidth}
\centering
\includegraphics[width=0.75\linewidth]{img/_independence_example.pdf}
\end{minipage}
\begin{minipage}{.6\linewidth}
\[ \textbf{P} \models (\texttt{L} \perp \texttt{D}, \texttt{I}, \texttt{S} \,\vert\, \texttt{G}) \]
\[ \textbf{P} \models (\texttt{S} \perp \texttt{L} \,\vert\, \texttt{G}) \]
\[ \textbf{P} \models (\texttt{S} \perp \texttt{D}) \text{ but }
\textbf{P} \models (\texttt{S} \,\cancel{\perp}\, \texttt{D} \,\vert\, \texttt{G}) \text{ (explain away)} \]
\end{minipage}
\end{example}
\end{description}