mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-15 02:52:22 +01:00
Add FAIKR3 Bayesian networks
This commit is contained in:
@ -6,7 +6,7 @@
|
||||
|
||||
\usepackage{geometry}
|
||||
\usepackage{graphicx, xcolor}
|
||||
\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm, upgreek}
|
||||
\usepackage{amsmath, amsfonts, amssymb, amsthm, mathtools, bm, upgreek, cancel}
|
||||
\usepackage{hyperref}
|
||||
\usepackage[nameinlink]{cleveref}
|
||||
\usepackage[all]{hypcap} % Links hyperref to object top and not caption
|
||||
|
||||
BIN
src/fundamentals-of-ai-and-kr/module3/img/_causal_example.pdf
Normal file
BIN
src/fundamentals-of-ai-and-kr/module3/img/_causal_example.pdf
Normal file
Binary file not shown.
BIN
src/fundamentals-of-ai-and-kr/module3/img/_cpt_graph.pdf
Normal file
BIN
src/fundamentals-of-ai-and-kr/module3/img/_cpt_graph.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -49,11 +49,14 @@
|
||||
$\textbf{P}(\texttt{Weather}, \texttt{Cavity}) = $
|
||||
\begin{center}
|
||||
\small
|
||||
\begin{tabular}{c | cccc}
|
||||
& \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
|
||||
\begin{tabular}{|c | c|c|c|c|}
|
||||
\cline{2-5}
|
||||
\multicolumn{1}{c|}{} & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
|
||||
\hline
|
||||
\texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\
|
||||
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08
|
||||
\hline
|
||||
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
\end{example}
|
||||
@ -125,6 +128,7 @@ can be computed as the sum of the atomic events where $\phi$ is true:
|
||||
\multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\
|
||||
\hline
|
||||
\texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\
|
||||
\hline
|
||||
$\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
@ -147,11 +151,14 @@ can be computed as the sum of the atomic events where $\phi$ is true:
|
||||
Given the joint distribution:
|
||||
\begin{center}
|
||||
\small
|
||||
\begin{tabular}{c | cccc}
|
||||
& \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
|
||||
\begin{tabular}{|c | c|c|c|c|}
|
||||
\cline{2-5}
|
||||
\multicolumn{1}{c|}{} & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
|
||||
\hline
|
||||
\texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\
|
||||
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08
|
||||
\hline
|
||||
\texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
We have that $\prob{\texttt{Weather}=\texttt{sunny}} = 0.144 + 0.576$
|
||||
@ -176,6 +183,7 @@ can be computed as the sum of the atomic events where $\phi$ is true:
|
||||
\multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\
|
||||
\hline
|
||||
\texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\
|
||||
\hline
|
||||
$\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
@ -183,10 +191,10 @@ can be computed as the sum of the atomic events where $\phi$ is true:
|
||||
|
||||
We have that:
|
||||
\[
|
||||
\textbf{P}(\texttt{cavity} \vert \texttt{toothache}) =
|
||||
\textbf{P}(\texttt{Cavity} \vert \texttt{toothache}) =
|
||||
\langle
|
||||
\frac{\prob{\texttt{cavity}, \texttt{toothache}, \texttt{catch}}}{\prob{\texttt{toothache}}},
|
||||
\frac{\prob{\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}}
|
||||
\frac{\prob{\lnot\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}}
|
||||
\rangle
|
||||
\]
|
||||
\end{example}
|
||||
@ -195,9 +203,173 @@ can be computed as the sum of the atomic events where $\phi$ is true:
|
||||
Given a set of query variables $\bm{Y}$, the evidence variables $\vec{e}$ and the other hidden variables $\bm{H}$,
|
||||
the probability of the query can be computed as:
|
||||
\[
|
||||
\textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e})
|
||||
= \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}, \bm{H}=\vec{h})
|
||||
\textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y}, \bm{E}=\vec{e})
|
||||
= \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y}, \bm{E}=\vec{e}, \bm{H}=\vec{h})
|
||||
\]
|
||||
The problem of this approach is that it has exponential time and space complexity
|
||||
which makes it not applicable in practice.
|
||||
that makes it not applicable in practice.
|
||||
|
||||
To reduce the size of the variables, conditional independence can be exploited.
|
||||
\begin{example}
|
||||
Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$,
|
||||
we can compute the distribution $\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ as follows:
|
||||
\[
|
||||
\begin{split}
|
||||
\textbf{P}&(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity}) = \\
|
||||
&= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Catch}, \texttt{Cavity})
|
||||
\textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity}) \\
|
||||
&= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity})
|
||||
\textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity})
|
||||
\end{split}
|
||||
\]
|
||||
$\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ has 7 independent values that grows exponentially
|
||||
($2 \cdot 2 \cdot 2 = 8$ values, but one of them can be omitted as a probability always sums up to 1).
|
||||
|
||||
$\textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity})$
|
||||
has 5 independent values that grows linearly ($4 + 4 + 2 = 10$, but a value of $\textbf{P}(\texttt{Cavity})$ can be omitted.
|
||||
The conditional probabilities require two tables (one for each prior) each with 2 values,
|
||||
but for each table a value can be omitted, therefore requiring $2$ independent values per conditional probability instead of $4$).
|
||||
\end{example}
|
||||
\end{description}
|
||||
|
||||
|
||||
|
||||
\section{Bayesian networks}
|
||||
|
||||
\begin{description}
|
||||
\item[Bayes' rule] \marginnote{Bayes' rule}
|
||||
\[ \prob{a \,\vert\, b} = \frac{\prob{b \,\vert\, a} \prob{a}}{\prob{b}} \]
|
||||
|
||||
\item[Bayes' rule and conditional independence]
|
||||
Given the random variables $\texttt{Cause}$ and\\
|
||||
$\texttt{Effect}_1, \dots, \texttt{Effect}_n$, with $\texttt{Effect}_i$ independent from each other,
|
||||
we can compute $\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n)$ as follows:
|
||||
\[
|
||||
\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n) =
|
||||
\left(\prod_i \textbf{P}(\texttt{Effect}_i \,\vert\, \texttt{Cause})\right) \textbf{P}(\texttt{Cause})
|
||||
\]
|
||||
The number of parameters is linear.
|
||||
|
||||
\begin{example}
|
||||
Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$:
|
||||
\[
|
||||
\begin{split}
|
||||
\textbf{P}&(\texttt{Cavity} \,\vert\, \texttt{toothache} \land \texttt{catch}) \\
|
||||
&= \alpha\textbf{P}(\texttt{toothache} \land \texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\
|
||||
&= \alpha\textbf{P}(\texttt{toothache} \,\vert\, \texttt{Cavity})
|
||||
\textbf{P}(\texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\
|
||||
\end{split}
|
||||
\]
|
||||
\end{example}
|
||||
|
||||
\item[Bayesian network] \marginnote{Bayesian network}
|
||||
Graph for conditional independence assertions and a compact specification of full joint distributions.
|
||||
\begin{itemize}
|
||||
\item Directed acyclic graph.
|
||||
\item Nodes represent variables.
|
||||
\item The conditional distribution of a node is given by its parents
|
||||
\[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) \]
|
||||
In other words, if there is an edge from $A$ to $B$, then $A$ (cause) influences $B$ (effect).
|
||||
\end{itemize}
|
||||
|
||||
\begin{description}
|
||||
\item[Conditional probability table (CPT)] \marginnote{Conditional probability table (CPT)}
|
||||
In the case of boolean variables, the conditional distribution of a node can be represented using
|
||||
a table by considering all the combinations of the parents.
|
||||
|
||||
\begin{example}
|
||||
Given the boolean variables $A$, $B$ and $C$, with $C$ depending on $A$ and $B$, we have that:\\
|
||||
\begin{minipage}{.48\linewidth}
|
||||
\centering
|
||||
\includegraphics[width=0.35\linewidth]{img/_cpt_graph.pdf}
|
||||
\end{minipage}
|
||||
\begin{minipage}{.48\linewidth}
|
||||
\centering
|
||||
\begin{tabular}{c|c|c|c}
|
||||
A & B & $\prob{c \vert A, B}$ & $\prob{\lnot c \vert A, B}$ \\
|
||||
\hline
|
||||
a & b & $\alpha$ & $1-\alpha$ \\
|
||||
$\lnot$a & b & $\beta$ & $1-\beta$ \\
|
||||
a & $\lnot$b & $\gamma$ & $1-\gamma$ \\
|
||||
$\lnot$a & $\lnot$b & $\delta$ & $1-\delta$ \\
|
||||
\end{tabular}
|
||||
\end{minipage}
|
||||
\end{example}
|
||||
\end{description}
|
||||
|
||||
\item[Reasoning patterns] \marginnote{Reasoning patterns}
|
||||
Given a Bayesian network, the following reasoning patterns can be used:
|
||||
\begin{descriptionlist}
|
||||
\item[Causal] \marginnote{Causal reasoning}
|
||||
To make a prediction. From the cause, derive the effect.
|
||||
\begin{example}
|
||||
Knowing $\texttt{Intelligence}$, it is possible to make a prediction of $\texttt{Letter}$.
|
||||
\begin{center}
|
||||
\includegraphics[width=0.5\linewidth]{img/_causal_example.pdf}
|
||||
\end{center}
|
||||
\end{example}
|
||||
|
||||
\item[Evidential] \marginnote{Evidential reasoning}
|
||||
To find an explanation. From the effect, derive the cause.
|
||||
\begin{example}
|
||||
Knowing $\texttt{Grade}$, it is possible to explain it by estimating\\$\texttt{Intelligence}$.
|
||||
\begin{center}
|
||||
\includegraphics[width=0.65\linewidth]{img/_evidential_example.pdf}
|
||||
\end{center}
|
||||
\end{example}
|
||||
|
||||
\item[Explain away] \marginnote{Explain away reasoning}
|
||||
Observation obtained "passing through" other observations.
|
||||
\begin{example}
|
||||
Knowing $\texttt{Difficulty}$ and $\texttt{Grade}$,
|
||||
it is possible to estimate \\$\texttt{Intelligence}$.
|
||||
|
||||
Note that if $\texttt{Grade}$ was not known,
|
||||
$\texttt{Difficulty}$ and $\texttt{Intelligence}$ would be independent.
|
||||
\begin{center}
|
||||
\includegraphics[width=0.70\linewidth]{img/_explainaway_example.pdf}
|
||||
\end{center}
|
||||
\end{example}
|
||||
\end{descriptionlist}
|
||||
|
||||
\item[Global semantics] \marginnote{Global semantics}
|
||||
Given a Bayesian network, the full joint distribution can be defined as
|
||||
the product of the local conditional distributions:
|
||||
\[ \prob{x_1, \dots, x_n} = \prod_{i=1}^{n} \prob{x_i \,\vert\, \texttt{parents}(X_i)} \]
|
||||
|
||||
\begin{example}
|
||||
Given the following Bayesian network:
|
||||
|
||||
\begin{minipage}{.3\linewidth}
|
||||
\centering
|
||||
\includegraphics[width=0.7\linewidth]{img/_global_semantics_example.pdf}
|
||||
\end{minipage}
|
||||
\begin{minipage}{.6\linewidth}
|
||||
\[
|
||||
\begin{split}
|
||||
&\prob{j \land m \land a \land \lnot b \land \lnot e} \\
|
||||
&= \prob{\lnot b} \prob{\lnot e} \prob{a \,\vert\, \lnot b, \lnot e}
|
||||
\prob{j \,\vert\, a} \prob{m \,\vert\, a}
|
||||
\end{split}
|
||||
\]
|
||||
\end{minipage}
|
||||
\end{example}
|
||||
|
||||
\item[Independence] \marginnote{Bayesian network independence}
|
||||
Intuitively, an effect is independent from a cause,
|
||||
if there is another cause in the middle whose value is already known.
|
||||
\begin{example}
|
||||
\phantom{}
|
||||
|
||||
\begin{minipage}{.3\linewidth}
|
||||
\centering
|
||||
\includegraphics[width=0.75\linewidth]{img/_independence_example.pdf}
|
||||
\end{minipage}
|
||||
\begin{minipage}{.6\linewidth}
|
||||
\[ \textbf{P} \models (\texttt{L} \perp \texttt{D}, \texttt{I}, \texttt{S} \,\vert\, \texttt{G}) \]
|
||||
\[ \textbf{P} \models (\texttt{S} \perp \texttt{L} \,\vert\, \texttt{G}) \]
|
||||
\[ \textbf{P} \models (\texttt{S} \perp \texttt{D}) \text{ but }
|
||||
\textbf{P} \models (\texttt{S} \,\cancel{\perp}\, \texttt{D} \,\vert\, \texttt{G}) \text{ (explain away)} \]
|
||||
\end{minipage}
|
||||
\end{example}
|
||||
\end{description}
|
||||
Reference in New Issue
Block a user