From 6e133a9f791ca559583cc30742d2854b4d51be60 Mon Sep 17 00:00:00 2001 From: NotXia <35894453+NotXia@users.noreply.github.com> Date: Thu, 19 Oct 2023 20:45:49 +0200 Subject: [PATCH] Add FAIKR3 joint distribution inference --- .../module3/main.tex | 1 + .../module3/sections/_intro.tex | 85 -------- .../module3/sections/_probability.tex | 203 ++++++++++++++++++ 3 files changed, 204 insertions(+), 85 deletions(-) create mode 100644 src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex diff --git a/src/fundamentals-of-ai-and-kr/module3/main.tex b/src/fundamentals-of-ai-and-kr/module3/main.tex index 9e83fb6..538fa9b 100644 --- a/src/fundamentals-of-ai-and-kr/module3/main.tex +++ b/src/fundamentals-of-ai-and-kr/module3/main.tex @@ -8,5 +8,6 @@ \makenotesfront \input{sections/_intro.tex} + \input{sections/_probability.tex} \end{document} \ No newline at end of file diff --git a/src/fundamentals-of-ai-and-kr/module3/sections/_intro.tex b/src/fundamentals-of-ai-and-kr/module3/sections/_intro.tex index 0dce1eb..de3f24c 100644 --- a/src/fundamentals-of-ai-and-kr/module3/sections/_intro.tex +++ b/src/fundamentals-of-ai-and-kr/module3/sections/_intro.tex @@ -45,89 +45,4 @@ Defined as: \[ \text{Decision theory} = \text{Utility theory} + \text{Probability theory} \] where the utility theory depends on one's preferences. -\end{description} - - -\subsection{Probability} - -\begin{description} - \item[Sample space] \marginnote{Sample space} - Set $\Omega$ of all possible worlds. - \begin{descriptionlist} - \item[Event] \marginnote{Event} - Subset $A \subseteq \Omega$. - \item[Sample point/Possible world/Atomic event] \marginnote{Sample point} - Element $\omega \in \Omega$. - \end{descriptionlist} - - \item[Probability space] \marginnote{Probability space} - A probability space/model is a function $\prob{\cdot}: \Omega \rightarrow [0, 1]$ assigned to a sample space such that: - \begin{itemize} - \item $0 \leq \prob{\omega} \leq 1$ - \item $\sum_{\omega \in \Omega} \prob{\omega} = 1$ - \item $\prob{A} = \sum_{\omega \in A} \prob{\omega}$ - \end{itemize} - - \item[Random variable] \marginnote{Random variable} - A function from an event to some range (e.g. reals, booleans, \dots). - - \item[Probability distribution] \marginnote{Probability distribution} - For any random variable $X$: - \[ \prob{X = x_i} = \sum_{\omega \text{ st } X(\omega)=x_i} \prob{\omega} \] - - \item[Proposition] \marginnote{Proposition} - Event where a random variable has a certain value. - \[ a = \{ \omega \,\vert\, A(\omega) = \texttt{true} \} \] - \[ \lnot a = \{ \omega \,\vert\, A(\omega) = \texttt{false} \} \] - \[ (\texttt{Weather} = \texttt{rain}) = \{ \omega \,\vert\, B(\omega) = \texttt{rain} \} \] - - \item[Prior probability] \marginnote{Prior probability} - Prior/unconditional probability of a proposition based on known evidence. - - \item[Probability distribution (all)] \marginnote{Probability distribution (all)} - Gives all the probabilities of a random variable. - \[ \textbf{P}(A) = \langle \prob{A=a_1}, \dots, \prob{A=a_n} \rangle \] - - \item[Joint probability distribution] \marginnote{Joint probability distribution} - The joint probability distribution of a set of random variables gives - the probability of all the different combinations of their atomic events. - - Note: Every question on a domain can, in theory, be answered using the joint distribution. - In practice, it is hard to apply. - - \begin{example} - $\textbf{P}(\texttt{Weather}, \texttt{Cavity}) = $ - \begin{center} - \small - \begin{tabular}{c | cccc} - & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\ - \hline - \texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\ - \texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 - \end{tabular} - \end{center} - \end{example} - - \item[Probability density function] \marginnote{Probability density function} - The probability density function (PDF) of a random variable $X$ is a function $p: \mathbb{R} \rightarrow \mathbb{R}$ - such that: - \[ \int_{\mathcal{T}_X} p(x) \,dx = 1 \] - \begin{descriptionlist} - \item[Uniform distribution] \marginnote{Uniform distribution} - \[ - p(x) = \text{Unif}[a, b](x) = - \begin{cases} - \frac{1}{b-a} & a \leq x \leq b \\ - 0 & \text{otherwise} - \end{cases} - \] - \item[Gaussian (normal) distribution] \marginnote{Gaussian (normal) distribution} - \[ \mathcal{N}(\mu, \sigma^2) = \frac{1}{\sigma\sqrt{2\pi}}e^{\frac{-(x-\mu)^2}{2\sigma^2}} \] - - $\mathcal{N}(0, 1)$ is the standard gaussian. - \end{descriptionlist} - - \item[Conditional probability] \marginnote{Conditional probability} - Probability of a prior knowledge with new evidence: - \[ \prob{a \vert b} = \frac{\prob{a \land b}}{\prob{b}} \] \end{description} \ No newline at end of file diff --git a/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex b/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex new file mode 100644 index 0000000..735307d --- /dev/null +++ b/src/fundamentals-of-ai-and-kr/module3/sections/_probability.tex @@ -0,0 +1,203 @@ +\chapter{Probability} + +\begin{description} + \item[Sample space] \marginnote{Sample space} + Set $\Omega$ of all possible worlds. + \begin{descriptionlist} + \item[Event] \marginnote{Event} + Subset $A \subseteq \Omega$. + \item[Sample point/Possible world/Atomic event] \marginnote{Sample point} + Element $\omega \in \Omega$. + \end{descriptionlist} + + \item[Probability space] \marginnote{Probability space} + A probability space/model is a function $\prob{\cdot}: \Omega \rightarrow [0, 1]$ assigned to a sample space such that: + \begin{itemize} + \item $0 \leq \prob{\omega} \leq 1$ + \item $\sum_{\omega \in \Omega} \prob{\omega} = 1$ + \item $\prob{A} = \sum_{\omega \in A} \prob{\omega}$ + \end{itemize} + + \item[Random variable] \marginnote{Random variable} + A function from an event to some range (e.g. reals, booleans, \dots). + + \item[Probability distribution] \marginnote{Probability distribution} + For any random variable $X$: + \[ \prob{X = x_i} = \sum_{\omega \text{ st } X(\omega)=x_i} \prob{\omega} \] + + \item[Proposition] \marginnote{Proposition} + Event where a random variable has a certain value. + \[ a = \{ \omega \,\vert\, A(\omega) = \texttt{true} \} \] + \[ \lnot a = \{ \omega \,\vert\, A(\omega) = \texttt{false} \} \] + \[ (\texttt{Weather} = \texttt{rain}) = \{ \omega \,\vert\, B(\omega) = \texttt{rain} \} \] + + \item[Prior probability] \marginnote{Prior probability} + Prior/unconditional probability of a proposition based on known evidence. + + \item[Probability distribution (all)] \marginnote{Probability distribution (all)} + Gives all the probabilities of a random variable. + \[ \textbf{P}(A) = \langle \prob{A=a_1}, \dots, \prob{A=a_n} \rangle \] + + \item[Joint probability distribution] \marginnote{Joint probability distribution} + The joint probability distribution of a set of random variables gives + the probability of all the different combinations of their atomic events. + + Note: Every question on a domain can, in theory, be answered using the joint distribution. + In practice, it is hard to apply. + + \begin{example} + $\textbf{P}(\texttt{Weather}, \texttt{Cavity}) = $ + \begin{center} + \small + \begin{tabular}{c | cccc} + & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\ + \hline + \texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\ + \texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 + \end{tabular} + \end{center} + \end{example} + + \item[Probability density function] \marginnote{Probability density function} + The probability density function (PDF) of a random variable $X$ is a function $p: \mathbb{R} \rightarrow \mathbb{R}$ + such that: + \[ \int_{\mathcal{T}_X} p(x) \,dx = 1 \] + \begin{descriptionlist} + \item[Uniform distribution] \marginnote{Uniform distribution} + \[ + p(x) = \text{Unif}[a, b](x) = + \begin{cases} + \frac{1}{b-a} & a \leq x \leq b \\ + 0 & \text{otherwise} + \end{cases} + \] + \item[Gaussian (normal) distribution] \marginnote{Gaussian (normal) distribution} + \[ \mathcal{N}(\mu, \sigma^2) = \frac{1}{\sigma\sqrt{2\pi}}e^{\frac{-(x-\mu)^2}{2\sigma^2}} \] + + $\mathcal{N}(0, 1)$ is the standard Gaussian. + \end{descriptionlist} + + \item[Conditional probability] \marginnote{Conditional probability} + Probability of a prior knowledge with new evidence: + \[ \prob{a \vert b} = \frac{\prob{a \land b}}{\prob{b}} \] + The product rule gives an alternative formulation: + \[ \prob{a \land b} = \prob{a \vert b}{\prob{b}} = \prob{b \vert a}{\prob{a}} \] + + \begin{description} + \item[Chain rule] \marginnote{Chain rule} + Successive application of the product rule: + \[ + \begin{split} + \textbf{P}(X_1, \dots, X_n) &= \textbf{P}(X_1, \dots, X_{n-1}) \textbf{P}(X_n \vert X_1, \dots, X_{n-1}) \\ + &= \textbf{P}(X_1, \dots, X_{n-2}) \textbf{P}(X_{n-1} \vert X_1, \dots, X_{n-2}) \textbf{P}(X_n \vert X_1, \dots, X_{n-1}) \\ + &= \prod_{i=1}^{n} \textbf{P}(X_i \vert X_1, \dots, X_{i-1}) + \end{split} + \] + \end{description} + + \item[Independence] \marginnote{Independence} + Two random variables $A$ and $B$ are independent ($A \perp B$) iff: + \[ + \textbf{P}(A \vert B) = \textbf{P}(A) \,\text{ or }\, + \textbf{P}(B \vert A) = \textbf{P}(B) \,\text{ or }\, + \textbf{P}(A, B) = \textbf{P}(A)\textbf{P}(B) + \] + + \item[Conditional independence] \marginnote{Conditional independence} + Two random variables $A$ and $B$ are conditionally independent iff: + \[ \textbf{P}(A \,\vert\, C, B) = \textbf{P}(A \,\vert\, C) \] +\end{description} + + + +\section{Inference with full joint distributions} +Given a joint distribution, the probability of any proposition $\phi$ +can be computed as the sum of the atomic events where $\phi$ is true: +\[ \prob{\phi} = \sum_{\omega:\, \omega \models \phi} \prob{\omega} \] + +\begin{example} + Given the following joint distribution: + \begin{center} + \begin{tabular}{|c|c|c|c|c|} + \cline{2-5} + \multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\texttt{toothache}} & \multicolumn{2}{c|}{$\lnot$\texttt{toothache}} \\ + \cline{2-5} + \multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\ + \hline + \texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\ + $\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\ + \hline + \end{tabular} + \end{center} + + We have that: + \begin{itemize} + \item $\prob{\texttt{toothache}} = 0.108 + 0.012 + 0.016 + 0.064 = 0.2$ + \item $\prob{\texttt{cavity} \vee \texttt{toothache}} = 0.108 + 0.012 + 0.072 + 0.008 + 0.016 + 0.064 = 0.28$ + \item $\prob{\lnot\texttt{cavity} \,\vert\, \texttt{toothache}} = \frac{\prob{\lnot\texttt{cavity} \land \texttt{toothache}}}{\prob{\texttt{toothache}}} = + \frac{0.016 + 0.064}{0.2} = 0.4$ + \end{itemize} +\end{example} + +\begin{description} + \item[Marginalization] \marginnote{Marginalization} + The probability that a random variable assumes a specific value is given by + the sum off all the joint probabilities where that random variable assumes the given value. + \begin{example} + Given the joint distribution: + \begin{center} + \small + \begin{tabular}{c | cccc} + & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\ + \hline + \texttt{Cavity=true} & 0.144 & 0.02 & 0.016 & 0.02 \\ + \texttt{Cavity=false} & 0.576 & 0.08 & 0.064 & 0.08 + \end{tabular} + \end{center} + We have that $\prob{\texttt{Weather}=\texttt{sunny}} = 0.144 + 0.576$ + \end{example} + \item[Conditioning] \marginnote{Conditioning} + Adding a condition to a probability (reduction and renormalization). + + \item[Normalization] \marginnote{Normalization} + Given a conditional probability distribution $\textbf{P}(A \vert B)$, + it can be formulated as: + \[ \textbf{P}(A \vert B) = \alpha\textbf{P}(A, B) \] + where $\alpha$ is a normalization constant. + In fact, fixed the evidence $B$, the denominator to compute the conditional probability is the same for each probability. + + \begin{example} + Given the joint distribution: + \begin{center} + \begin{tabular}{|c|c|c|c|c|} + \cline{2-5} + \multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\texttt{toothache}} & \multicolumn{2}{c|}{$\lnot$\texttt{toothache}} \\ + \cline{2-5} + \multicolumn{1}{c|}{} & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\ + \hline + \texttt{cavity} & 0.108 & 0.012 & 0.072 & 0.008 \\ + $\lnot$\texttt{cavity} & 0.016 & 0.064 & 0.144 & 0.576 \\ + \hline + \end{tabular} + \end{center} + + We have that: + \[ + \textbf{P}(\texttt{cavity} \vert \texttt{toothache}) = + \langle + \frac{\prob{\texttt{cavity}, \texttt{toothache}, \texttt{catch}}}{\prob{\texttt{toothache}}}, + \frac{\prob{\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}} + \rangle + \] + \end{example} + + \item[Probability query] \marginnote{Probability query} + Given a set of query variables $\bm{Y}$, the evidence variables $\vec{e}$ and the other hidden variables $\bm{H}$, + the probability of the query can be computed as: + \[ + \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) + = \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}, \bm{H}=\vec{h}) + \] + The problem of this approach is that it has exponential time and space complexity + which makes it not applicable in practice. +\end{description}