Moved FAIKR3 in year1

2026-02-04 15:51:43 +01:00 · 2023-12-08 19:45:01 +01:00
parent ac34ec0076
commit e66b724d8c
38 changed files with 1 additions and 1 deletions
--- a/src/year1/fundamentals-of-ai-and-kr/metadata.json
+++ b/src/year1/fundamentals-of-ai-and-kr/metadata.json
@ -0,0 +1,19 @@
+{
+    "name": "Fundamentals of Artificial Intelligence and Knowledge Representation",
+    "year": 1,
+    "semester": 1,
+    "pdfs": [
+        {
+            "name": "FAIKR module 1",
+            "path": "module1/faikr1.pdf"
+        },
+        {
+            "name": "FAIKR module 2",
+            "path": "module2/faikr2.pdf"
+        },
+        {
+            "name": "FAIKR module 3",
+            "path": "module3/faikr3.pdf"
+        }
+    ]
+}
--- a/src/year1/fundamentals-of-ai-and-kr/module3/ainotes.cls
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/ainotes.cls
@ -0,0 +1 @@
+../../../ainotes.cls
--- a/src/year1/fundamentals-of-ai-and-kr/module3/faikr3.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/faikr3.tex
@ -0,0 +1,18 @@
+\documentclass[11pt]{ainotes}
+
+\title{Fundamentals of Artificial Intelligence and Knowledge Representation\\(Module 3)}
+\date{2023 -- 2024}
+\def\lastupdate{{PLACEHOLDER-LAST-UPDATE}}
+
+\begin{document}
+
+    \makenotesfront
+
+    \input{sections/_intro.tex}
+    \input{sections/_probability.tex}
+    \input{sections/_bayesian_net.tex}
+    \input{sections/_exact_inference.tex}
+    \input{sections/_approx_inference.tex}
+    \eoc
+
+\end{document}
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_active_trail.drawio
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_active_trail.drawio
@ -0,0 +1,172 @@
+<mxfile host="app.diagrams.net" modified="2023-11-03T14:02:14.670Z" agent="Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0" etag="qJOM_4TLvx20vM_2exk3" version="22.0.0" type="device">
+  <diagram name="Pagina-1" id="VxJ7IpKM2QnTvTh_py_2">
+    <mxGraphModel dx="1050" dy="606" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-1" value="Y" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="700" y="340" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-2" value="X" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="580" y="340" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-3" value="Z" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="640" y="410" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-5" value="" style="endArrow=classic;html=1;rounded=0;entryX=1;entryY=0;entryDx=0;entryDy=0;exitX=0;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeWidth=2;strokeColor=#e01b24;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-1" target="BRH9kT0y2_Ae_BUota2q-3">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="730" y="440" as="sourcePoint" />
+            <mxPoint x="740" y="400" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-6" value="" style="endArrow=classic;html=1;rounded=0;entryX=0;entryY=0;entryDx=0;entryDy=0;exitX=1;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeWidth=2;strokeColor=#e01b24;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-2" target="BRH9kT0y2_Ae_BUota2q-3">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="740" y="355" as="sourcePoint" />
+            <mxPoint x="708" y="412" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-7" value="X" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="120" y="40" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-8" value="Y" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="120" y="140" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-9" value="" style="endArrow=classic;html=1;rounded=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeColor=#26a269;strokeWidth=2;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-7" target="BRH9kT0y2_Ae_BUota2q-8">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="211" y="541" as="sourcePoint" />
+            <mxPoint x="249" y="569" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-10" value="X" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="240" y="40" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-11" value="Z" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="240" y="140" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-12" value="" style="endArrow=classic;html=1;rounded=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeColor=#26a269;strokeWidth=2;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-10" target="BRH9kT0y2_Ae_BUota2q-11">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="331" y="541" as="sourcePoint" />
+            <mxPoint x="369" y="569" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-13" value="Y" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="240" y="240" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-15" value="" style="endArrow=classic;html=1;rounded=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeColor=#26a269;strokeWidth=2;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-11" target="BRH9kT0y2_Ae_BUota2q-13">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="280" y="110" as="sourcePoint" />
+            <mxPoint x="280" y="150" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-16" value="X" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="360" y="40" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-17" value="Z" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="1">
+          <mxGeometry x="360" y="140" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-18" value="" style="endArrow=classic;html=1;rounded=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeColor=#e01b24;strokeWidth=2;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-16" target="BRH9kT0y2_Ae_BUota2q-17">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="451" y="541" as="sourcePoint" />
+            <mxPoint x="489" y="569" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-19" value="Y" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="360" y="240" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-20" value="" style="endArrow=classic;html=1;rounded=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeColor=#e01b24;strokeWidth=2;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-17" target="BRH9kT0y2_Ae_BUota2q-19">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="400" y="110" as="sourcePoint" />
+            <mxPoint x="400" y="150" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-25" value="Y" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="240" y="410" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-26" value="X" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="120" y="410" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-27" value="Z" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="180" y="340" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-28" value="" style="endArrow=none;html=1;rounded=0;entryX=1;entryY=1;entryDx=0;entryDy=0;fontSize=24;fontFamily=Verdana;endFill=0;startArrow=classic;startFill=1;strokeWidth=2;exitX=0;exitY=0;exitDx=0;exitDy=0;strokeColor=#26a269;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-25" target="BRH9kT0y2_Ae_BUota2q-27">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="220" y="445" as="sourcePoint" />
+            <mxPoint x="280" y="330" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-29" value="" style="endArrow=none;html=1;rounded=0;entryX=0;entryY=1;entryDx=0;entryDy=0;exitX=1;exitY=0;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;endFill=0;startArrow=classic;startFill=1;strokeWidth=2;strokeColor=#26a269;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-26" target="BRH9kT0y2_Ae_BUota2q-27">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="280" y="285" as="sourcePoint" />
+            <mxPoint x="248" y="342" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-35" value="Y" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="450" y="410" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-36" value="X" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="330" y="410" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-37" value="Z" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="1">
+          <mxGeometry x="390" y="340" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-38" value="" style="endArrow=none;html=1;rounded=0;entryX=1;entryY=1;entryDx=0;entryDy=0;fontSize=24;fontFamily=Verdana;endFill=0;startArrow=classic;startFill=1;strokeWidth=2;exitX=0;exitY=0;exitDx=0;exitDy=0;strokeColor=#e01b24;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-35" target="BRH9kT0y2_Ae_BUota2q-37">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="420" y="435" as="sourcePoint" />
+            <mxPoint x="490" y="330" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-39" value="" style="endArrow=none;html=1;rounded=0;entryX=0;entryY=1;entryDx=0;entryDy=0;exitX=1;exitY=0;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;endFill=0;startArrow=classic;startFill=1;strokeWidth=2;strokeColor=#e01b24;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-36" target="BRH9kT0y2_Ae_BUota2q-37">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="490" y="285" as="sourcePoint" />
+            <mxPoint x="458" y="342" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-40" value="Y" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="910" y="340" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-41" value="X" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;" vertex="1" parent="1">
+          <mxGeometry x="790" y="340" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-42" value="Z" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;fontFamily=Verdana;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="1">
+          <mxGeometry x="850" y="410" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-43" value="" style="endArrow=classic;html=1;rounded=0;entryX=1;entryY=0;entryDx=0;entryDy=0;exitX=0;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeWidth=2;strokeColor=#26a269;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-40" target="BRH9kT0y2_Ae_BUota2q-42">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="940" y="440" as="sourcePoint" />
+            <mxPoint x="950" y="400" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-44" value="" style="endArrow=classic;html=1;rounded=0;entryX=0;entryY=0;entryDx=0;entryDy=0;exitX=1;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeWidth=2;strokeColor=#26a269;" edge="1" parent="1" source="BRH9kT0y2_Ae_BUota2q-41" target="BRH9kT0y2_Ae_BUota2q-42">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="950" y="355" as="sourcePoint" />
+            <mxPoint x="918" y="412" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-45" value="Z" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=24;horizontal=1;fontFamily=Verdana;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="1">
+          <mxGeometry x="560" y="70" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-46" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Evidence&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=24;fontFamily=Times New Roman;" vertex="1" parent="1">
+          <mxGeometry x="630" y="85" width="250" height="30" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-47" value="" style="endArrow=classic;html=1;rounded=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;fontSize=24;fontFamily=Verdana;strokeColor=#e01b24;strokeWidth=2;" edge="1" parent="1">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="560" y="209" as="sourcePoint" />
+            <mxPoint x="620" y="209" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-48" value="" style="endArrow=classic;html=1;rounded=0;fontSize=24;fontFamily=Verdana;strokeColor=#26a269;strokeWidth=2;" edge="1" parent="1">
+          <mxGeometry width="50" height="50" relative="1" as="geometry">
+            <mxPoint x="560" y="160" as="sourcePoint" />
+            <mxPoint x="620" y="160" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-51" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Active trail&lt;br style=&quot;font-size: 24px;&quot;&gt;&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=24;fontFamily=Times New Roman;" vertex="1" parent="1">
+          <mxGeometry x="630" y="140" width="250" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="BRH9kT0y2_Ae_BUota2q-52" value="Non-&lt;font style=&quot;font-size: 24px;&quot;&gt;active trail&lt;br style=&quot;font-size: 24px;&quot;&gt;&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=24;fontFamily=Times New Roman;" vertex="1" parent="1">
+          <mxGeometry x="630" y="190" width="250" height="40" as="geometry" />
+        </mxCell>
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_active_trail.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_active_trail.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_approx_infer_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_approx_infer_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_burglary_enumeration.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_burglary_enumeration.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_burglary_net.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_burglary_net.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_car_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_car_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_causal_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_causal_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_causal_network_example1.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_causal_network_example1.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_causal_network_example2.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_causal_network_example2.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_cpt_graph.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_cpt_graph.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.drawio
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.drawio
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_d_sep_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_do_operator_example1.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_do_operator_example1.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_do_operator_example2.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_do_operator_example2.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_dynamic_bn_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_dynamic_bn_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_evidential_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_evidential_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_explainaway_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_explainaway_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_global_semantics_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_global_semantics_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_independence_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_independence_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_linear_gaussian_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_linear_gaussian_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_local_independence.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_local_independence.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_markov_blanket.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_markov_blanket.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_markov_chain_sampling.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_markov_chain_sampling.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_monty_hall1.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_monty_hall1.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_monty_hall2.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_monty_hall2.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_monty_hall3.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_monty_hall3.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_noisy_or_example.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_noisy_or_example.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_pointwise_factors.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_pointwise_factors.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/img/_v_structure.pdf
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/img/_v_structure.pdf
--- a/src/year1/fundamentals-of-ai-and-kr/module3/sections/_approx_inference.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/sections/_approx_inference.tex
@ -0,0 +1,196 @@
+\chapter{Approximate inference}
+
+\begin{description}
+    \item[Stochastic simulation] \marginnote{Stochastic simulation}
+        Class of methods that draw $N$ samples from the distribution
+        and estimate an approximate posterior $\hat{\mathcal{P}}$.
+
+        \begin{description}
+            \item[$\delta$-stochastic absolute approximation] 
+                Given $\delta \in ]0, 0.5[$ and $\varepsilon \in ]0, 0.5[$, a $\delta$-stochastic absolute approximation has error:
+                \[ \left\vert \prob{X | \matr{E}} - \hat{\mathcal{P}}(X | \matr{E}) \right\vert \leq \varepsilon \]
+                Moreover, the method might fail (with greater error) with probability $\delta$.
+
+            \item[$\delta$-stochastic relative approximation] 
+                Given $\delta \in ]0, 0.5[$ and $\varepsilon \in ]0, 0.5[$, a $\delta$-stochastic relative approximation has error:
+                \[ \frac{\left\vert \prob{X | \matr{E}} - \hat{\mathcal{P}}(X | \matr{E}) \right\vert}{\prob{X | \matr{E}}} \leq \varepsilon \]
+                Moreover, the method might fail (with greater error) with probability $\delta$.
+        \end{description}
+
+        \begin{theorem}
+            Approximate inference is NP-hard for any $\delta, \epsilon < 0.5$.
+        \end{theorem}
+
+    \item[Consistency] \marginnote{Consistency}
+        A sampling method is consistent if:
+        \[ \lim_{N \rightarrow \infty} \hat{\mathcal{P}}(x) = \prob{x} \]
+\end{description}
+
+
+
+\section{Sampling from an empty network}
+\marginnote{Sampling from an empty network}
+
+Sample each variable in topological order (i.e. from parents to children).
+
+The probability $\mathcal{S}$ of sampling a specific event $x_1, \dots, x_n$ is given by the
+probability of the single events knowing their parents:
+\[ \mathcal{S}(x_1, \dots, x_n) = \prod_{i=1}^n \prob{x_i | \texttt{parents}(X_i)} = \prob{x_1, \dots, x_n} \]
+
+\begin{theorem}
+    Sampling from an empty network is consistent.
+
+    \begin{proof}
+        Let $N$ be the number of samples and 
+        $\mathcal{N}(x_1, \dots, x_n)$ the number of times the event $x_1, \dots, x_n$ has been sampled.
+        \[
+            \begin{split}
+                \lim_{N \rightarrow \infty} \hat{\mathcal{P}}(x_1, \dots, x_n) &=
+                \lim_{N \rightarrow \infty} \frac{\mathcal{N}(x_1, \dots, x_n)}{N} \\
+                &= \mathcal{S}(x_1, \dots, x_n) = 
+                \prob{x_1, \dots, x_n}
+            \end{split}    
+        \]
+    \end{proof}
+\end{theorem}
+
+\begin{example}
+    Given the following Bayesian network:
+    \begin{center}
+        \includegraphics[width=0.5\textwidth]{img/_approx_infer_example.pdf}
+    \end{center}
+    
+    A possible sampling order is \texttt{Cloudy}, \texttt{Sprinkler}, \texttt{Rain}, \texttt{WetGrass}.
+
+    Assuming that a random generator gives the sequence of probabilities $(0.4, 0.8, 0.1, 0.5)$,
+    the sample will be:
+    \[ \langle \prob{C}, \prob{S | C}, \prob{R | C}, \prob{W | S, R} \rangle \]
+    \[ \langle C=\texttt{false}, \prob{S | C=\texttt{false}}, \prob{R | C=\texttt{false}}, \prob{W | S, R} \rangle \]
+    \[ \langle C=\texttt{false}, S=\texttt{false}, R=\texttt{true}, \prob{W | S=\texttt{false}, R=\texttt{true}} \rangle \]
+    \[ \langle C=\texttt{false}, S=\texttt{false}, R=\texttt{true}, W=\texttt{true} \rangle \]
+
+    Note that the adopted convention is the following: 
+    if $r$ it the probability given by a random generator and $\prob{X} = p$, $X = \texttt{true}$ if $r \leq p$.
+\end{example}
+
+
+
+\section{Rejection sampling}
+\marginnote{Rejection sampling}
+
+Given a known evidence $\matr{E}$, rejection sampling works as sampling from an empty network
+but removes any sample that does no agree with the evidence.
+
+Obviously if $\prob{\matr{E}}$ is low, the majority of the samples will be discarded and 
+more iterations are required to reach the desired number of samples.
+
+\begin{theorem}
+    Rejection sampling is consistent.
+
+    \begin{proof}
+        Let $\mathcal{N}(\matr{X})$ be the number of times the event $\matr{X}$ has been sampled.
+        \[
+            \begin{split}
+                \hat{\mathcal{P}}(\matr{X} | \matr{E}) &= 
+                \frac{\mathcal{N}(\matr{X}, \matr{E})}{\mathcal{N}(\matr{E})} \\
+                &\approx \frac{\prob{\matr{X}, \matr{E}}}{\prob{\matr{E}}} =
+                \prob{\matr{X} | \matr{E}}
+            \end{split}    
+        \]
+        The approximation derives from the consistency of sampling from an empty network.
+    \end{proof}
+\end{theorem}
+
+
+
+\section{Likelihood weighting}
+\marginnote{Likelihood weighting}
+
+Given a known evidence $\matr{E}$, likelihood weighting samples non-evidence variables and 
+weights each sample by the likelihood of the evidence.
+
+The probability $\mathcal{S}$ of sampling a specific event $\matr{Z}$ and evidence $\matr{E}$ is given by the
+probability of the single events in $\matr{Z}$ knowing their parents:
+\[ \mathcal{S}(\matr{Z}, \matr{E}) = \prod_{z_i \in \matr{Z}} \prob{z_i | \texttt{parents}(z_i)} \]
+
+The weight of a sample $(\matr{Z}, \matr{E})$ is given by the
+probability of the single events in $\matr{E}$ knowing their parents:
+\[ \text{w}(\matr{Z}, \matr{E}) = \prod_{e_i \in \matr{E}} \prob{e_i | \texttt{parents}(e_i)} \]
+
+\begin{theorem}
+    Likelihood weighting is consistent.
+
+    \begin{proof}
+        The weighted sampling probability is given by:
+        \[ 
+            \begin{split}
+                \mathcal{S}(\matr{Z}, \matr{E}) \cdot \text{w}(\matr{Z}, \matr{E})
+                &= \prod_{z_i \in \matr{Z}} \prob{z_i | \texttt{parents}(z_i)} \cdot \prod_{e_i \in E} \prob{e_i | \texttt{parents}(e_i)} \\
+                &= \prob{\matr{Z}, \matr{E}}
+            \end{split}
+        \]
+        This is a consequence of the global semantics of Bayesian networks.
+    \end{proof}
+\end{theorem}
+
+\begin{example}
+    Given the following Bayesian network:
+    \begin{center}
+        \includegraphics[width=0.5\textwidth]{img/_approx_infer_example.pdf}
+    \end{center}
+
+    Knowing that $S=\texttt{true}$ and $W=\texttt{false}$,
+    we sample in the order: \texttt{Cloudy}, \texttt{Rain}.
+    
+    Assuming that a random generator gives the sequence of probabilities $(0.4, 0.1)$,
+    the sample will be:
+    \[ \langle \prob{C}, S=\texttt{true}, \prob{R | C}, W=\texttt{false} \rangle \]
+    \[ \langle C=\texttt{true}, S=\texttt{true}, \prob{R | C=\texttt{true}}, W=\texttt{false} \rangle \]
+    \[ \langle C=\texttt{true}, S=\texttt{true}, R=\texttt{true}, W=\texttt{false} \rangle \]
+
+    The weight associated to the sample is given by the probability of the evidence:
+    \[ 
+        \begin{split}
+            \text{w} &= \prob{S=\texttt{true} | C=\texttt{true}} \cdot \prob{W=\texttt{false} | S=\texttt{true}, R=\texttt{true}} \\
+            &= 0.1 \cdot (1 - 0.99) = 0.001
+        \end{split}
+    \]
+\end{example}
+
+
+
+\section{Markov chain Monte Carlo}
+\marginnote{Markov chain Monte Carlo}
+
+Sampling on a Markov chain where states contain an assignment to all variables.
+
+Adjacent states of the Markov chain differ by only one variable.
+Therefore, the probability of an edge connecting two states is given by the probability of the updated variable known its Markov blanket:
+\[ 
+    \prob{x_i | \texttt{markov\_blanket}(X_i)} = 
+    \prob{x_i | \texttt{parents}(X_i)} \cdot \prod_{Z_j \in \texttt{children}(x_i)} \prob{z_j | \texttt{parents}(Z_j)} 
+\]
+
+\begin{theorem}
+    Markov chain Monte Carlo is consistent.
+
+    Note: nevertheless, it is difficult to tell if convergence has been achieved.
+
+    \begin{proof}
+        Consequence of the fact that a long-run on a Markov chain converges to the posterior probability of the states.
+    \end{proof}
+\end{theorem}
+
+\begin{description}
+    \item[Compiled network]
+        A naive implementation of Markov chain Monte Carlo requires to repeatedly compute the probabilities with the Markov blanket.
+        A solution is to compile the network into a model-specific inference code.
+\end{description}
+
+\begin{example}
+    Given the evidence $S=\texttt{true}$ and $W=\texttt{true}$,
+    the structure of the Markov chain can be defined as follows:
+    \begin{center}
+        \includegraphics[width=0.5\textwidth]{img/_markov_chain_sampling.pdf}
+    \end{center}
+\end{example}
--- a/src/year1/fundamentals-of-ai-and-kr/module3/sections/_bayesian_net.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/sections/_bayesian_net.tex
@ -0,0 +1,557 @@
+\chapter{Bayesian networks}
+
+
+\section{Bayes' rule}
+
+\begin{description}
+    \item[Bayes' rule] \marginnote{Bayes' rule}
+        \[ \prob{a \,\vert\, b} = \frac{\prob{b \,\vert\, a} \prob{a}}{\prob{b}} \]
+
+    \item[Bayes' rule and conditional independence]
+        Given the random variables $\texttt{Cause}$ and\\
+        $\texttt{Effect}_1, \dots, \texttt{Effect}_n$, with $\texttt{Effect}_i$ independent from each other,
+        we can compute $\textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n)$ as follows:
+        \[ 
+            \textbf{P}(\texttt{Cause}, \texttt{Effect}_1, \dots, \texttt{Effect}_n) = 
+            \left(\prod_i \textbf{P}(\texttt{Effect}_i \,\vert\, \texttt{Cause})\right) \textbf{P}(\texttt{Cause})
+        \]
+        The number of parameters is linear.
+
+        \begin{example}
+            Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$:
+            \[
+                \begin{split}
+                    \textbf{P}&(\texttt{Cavity} \,\vert\, \texttt{toothache} \land \texttt{catch}) \\
+                        &= \alpha\textbf{P}(\texttt{toothache} \land \texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\
+                        &= \alpha\textbf{P}(\texttt{toothache} \,\vert\, \texttt{Cavity})
+                            \textbf{P}(\texttt{catch} \,\vert\, \texttt{Cavity})\textbf{P}(\texttt{Cavity}) \\
+                \end{split}
+            \]
+        \end{example}
+\end{description}
+
+
+\section{Bayesian network reasoning}
+
+\begin{description}
+    \item[Bayesian network] \marginnote{Bayesian network}
+        Graph for conditional independence assertions and a compact specification of full joint distributions.
+        \begin{itemize}
+            \item Directed acyclic graph.
+            \item Nodes represent variables.
+            \item The conditional distribution of a node is given by its parents 
+                \[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) \]
+                In other words, if there is an edge from $A$ to $B$, then $A$ (cause) influences $B$ (effect).
+        \end{itemize}
+
+        \begin{description}
+            \item[Conditional probability table (CPT)] \marginnote{Conditional probability table (CPT)}
+                In the case of boolean variables, the conditional distribution of a node can be represented using 
+                a table by considering all the combinations of the parents.
+
+                \begin{example} 
+                    Given the boolean variables $A$, $B$ and $C$, with $C$ depending on $A$ and $B$, we have that:\\
+                    \begin{minipage}{.48\linewidth}
+                        \centering
+                        \includegraphics[width=0.35\linewidth]{img/_cpt_graph.pdf}
+                    \end{minipage}
+                    \begin{minipage}{.48\linewidth}
+                        \centering
+                        \begin{tabular}{c|c|c|c}
+                            A           & B         & $\prob{c \vert A, B}$ & $\prob{\lnot c \vert A, B}$ \\
+                            \hline
+                            a           & b         & $\alpha$ & $1-\alpha$ \\
+                            $\lnot$a    & b         & $\beta$ & $1-\beta$ \\
+                            a           & $\lnot$b  & $\gamma$ & $1-\gamma$ \\
+                            $\lnot$a    & $\lnot$b  & $\delta$ & $1-\delta$ \\
+                        \end{tabular}
+                    \end{minipage}
+                \end{example}
+        \end{description}
+
+    \item[Reasoning patterns] \marginnote{Reasoning patterns}
+        Given a Bayesian network, the following reasoning patterns can be used:
+        \begin{descriptionlist}
+            \item[Causal] \marginnote{Causal reasoning}
+                To make a prediction. From the cause, derive the effect.
+                \begin{example}
+                    Knowing $\texttt{Intelligence}$, it is possible to make a prediction of $\texttt{Letter}$.
+                    \begin{center}
+                        \includegraphics[width=0.5\linewidth]{img/_causal_example.pdf}
+                    \end{center}
+                \end{example}
+
+            \item[Evidential] \marginnote{Evidential reasoning}
+                To find an explanation. From the effect, derive the cause.
+                \begin{example}
+                    Knowing $\texttt{Grade}$, it is possible to explain it by estimating\\$\texttt{Intelligence}$.
+                    \begin{center}
+                        \includegraphics[width=0.7\linewidth]{img/_evidential_example.pdf}
+                    \end{center}
+                \end{example}
+
+            \item[Explain away] \marginnote{Explain away reasoning}
+                Observation obtained "passing through" other observations.
+                \begin{example}
+                    Knowing $\texttt{Difficulty}$ and $\texttt{Grade}$, 
+                    it is possible to estimate \\$\texttt{Intelligence}$.
+
+                    Note that if $\texttt{Grade}$ was not known, 
+                    $\texttt{Difficulty}$ and $\texttt{Intelligence}$ would have been independent.
+                    \begin{center}
+                        \includegraphics[width=0.75\linewidth]{img/_explainaway_example.pdf}
+                    \end{center}
+                \end{example}
+        \end{descriptionlist}
+
+    \item[Independence] \marginnote{Bayesian network independence}
+        Intuitively, an effect is independent from a cause, 
+        if there is another cause in the middle whose value is already known.
+        \begin{example}
+            \phantom{}
+
+            \begin{minipage}{.3\linewidth}
+                \centering
+                \includegraphics[width=0.85\linewidth]{img/_independence_example.pdf}
+            \end{minipage}
+            \begin{minipage}{.6\linewidth}
+                \[ \textbf{P} \models (\texttt{L} \perp \texttt{D}, \texttt{I}, \texttt{S} \,\vert\, \texttt{G}) \]
+                \[ \textbf{P} \models (\texttt{S} \perp \texttt{L} \,\vert\, \texttt{G}) \]
+                \[ \textbf{P} \models (\texttt{S} \perp \texttt{D}) \text{ but } 
+                    \textbf{P} \models (\texttt{S} \,\cancel{\perp}\, \texttt{D} \,\vert\, \texttt{G}) \text{ (explain away)} \]
+            \end{minipage}
+        \end{example}
+
+
+    \item[V-structure] \marginnote{V-structure}
+        Effect with two causes.
+        If the effect is not in the evidence, the causes are independent.
+
+        \begin{figure}[H]
+            \centering
+            \includegraphics[width=0.2\textwidth]{img/_v_structure.pdf}
+            \caption{V-structure}
+        \end{figure}
+    
+    \item[Active two-edge trail] \marginnote{Active two-edge trail}
+        The trail $X \leftrightharpoons Z \leftrightharpoons Y$ is active either if:
+        \begin{itemize}
+            \item $X$, $Z$, $Y$ is a v-structure $X \rightarrow Z \leftarrow Y$
+                and $Z$ or one of its children is in the evidence.
+            \item $Z$ is not in the evidence.
+        \end{itemize}
+        In other words, influence can flow from $X$ to $Y$ passing by $Z$.
+
+        \begin{figure}[h]
+            \centering
+            \includegraphics[width=0.65\textwidth]{img/_active_trail.pdf}
+            \caption{Example of active and non-active two-edge trails}
+        \end{figure}
+    
+    \item[Active trail] \marginnote{Active trail}
+        A trail $X_1 \leftrightharpoons \dots \leftrightharpoons X_n$ is active iff
+        each two-edge trail $X_{i-1} \leftrightharpoons X_i \leftrightharpoons X_{i+1}$ along the trail is active.
+
+    \item[D-separation] \marginnote{D-separation}
+        Two sets of nodes $\vec{X}$ and $\vec{Y}$ are d-separated given the evidence $\vec{Z}$ if
+        there is no active trail between any $X \in \vec{X}$ and $Y \in \vec{Y}$.
+
+        \begin{theorem}
+            Two d-separated nodes are independent.
+            In other words, two nodes are independent if there are no active trails between them.
+        \end{theorem}
+
+    \item[Independence algorithm] \phantom{}
+        \begin{description}
+            \item[Blocked node]
+                A node is blocked if it blocks the flow.
+                This happens if one and only one of the following conditions are met:
+                \begin{itemize}
+                    \item The node is in the middle of an unmarked v-structure.
+                    \item The node is in the evidence.
+                \end{itemize}
+        \end{description}
+        To determine if $X \perp Y$ given the evidence $\vec{Z}$:
+        \begin{enumerate}
+            \item Traverse the graph bottom-up marking all nodes in $\vec{Z}$ or
+                having a child in $\vec{Z}$.
+            \item Find a path from $X$ to $Y$ that does not pass through a blocked node.
+            \item If $Y$ is not reachable from $X$, then $X$ and $Y$ are independent.
+                Otherwise $X$ and $Y$ are dependent.
+        \end{enumerate}
+
+        \begin{example}
+            To determine if $J \perp D$:
+            \begin{center}
+                \includegraphics[width=0.5\textwidth]{img/_d_sep_example.pdf}
+            \end{center}
+            As a path has been found, $J \,\cancel{\perp}\, D$.
+        \end{example}
+    
+
+    \item[Global semantics] \marginnote{Global semantics}
+        Given a Bayesian network, the full joint distribution can be defined as
+        the product of the local conditional distributions:
+        \[ \prob{x_1, \dots, x_n} = \prod_{i=1}^{n} \prob{x_i \,\vert\, \texttt{parents}(X_i)} \]
+
+        \begin{example}
+            Given the following Bayesian network:
+
+            \begin{minipage}{.3\linewidth}
+                \centering
+                \includegraphics[width=0.7\linewidth]{img/_global_semantics_example.pdf}
+            \end{minipage}
+            \begin{minipage}{.6\linewidth}
+                \[ 
+                    \begin{split}
+                        &\prob{j \land m \land a \land \lnot b \land \lnot e} \\
+                            &= \prob{\lnot b} \prob{\lnot e} \prob{a \,\vert\, \lnot b, \lnot e}
+                                \prob{j \,\vert\, a} \prob{m \,\vert\, a}
+                    \end{split}
+                \]
+            \end{minipage}
+        \end{example}
+
+    \item[Local semantics]
+        Each node is conditionally independent of its non-descendants given its parents.
+        \begin{figure}[h]
+            \centering
+            \includegraphics[width=0.35\textwidth]{img/_local_independence.pdf}
+            \caption{Local independence}
+        \end{figure}
+
+        \begin{theorem}
+            Local semantics $\iff$ Global semantics
+        \end{theorem}
+        
+    
+    \item[Markov blanket]
+        Each node is conditionally independent of all the other nodes 
+        if its Markov blanket (parents, children, children's parents) is in the evidence.
+        \begin{figure}[h]
+            \centering
+            \includegraphics[width=0.35\textwidth]{img/_markov_blanket.pdf}
+            \caption{Markov blanket}
+        \end{figure}
+\end{description}
+
+
+
+\section{Building Bayesian networks}
+
+\subsection{Algorithm}
+The following algorithm can be used to construct a Bayesian network of $n$ random variables:
+\begin{enumerate}
+    \item Choose an ordering of the variables $X_1, \dots, X_n$.
+    \item For $i=1, \dots, n$:
+        \begin{itemize}
+            \item Add $X_i$ to the network.
+            \item Select the parents of $X_i$ from $X_1, \dots, X_{i-1}$ such that:
+                \[ \textbf{P}(X_i \,\vert\, \texttt{parents}(X_i)) = 
+                    \textbf{P}(X_i \,\vert\, X_1, \dots, X_{i-1}) \]
+        \end{itemize}
+\end{enumerate}
+By construction, this algorithm guarantees the global semantics.
+
+\begin{example}[Monty Hall]
+    The variables are:
+    \begin{itemize}
+        \item $G$: the choice of the guest.
+        \item $H$: the choice of the host.
+        \item $P$: the position of the prize.
+    \end{itemize}
+    Note that $P \perp G$.
+    Let the order be fixed as follows: $P$, $G$, $H$.
+
+    \begin{figure}[h]
+        \begin{subfigure}{.3\textwidth}
+            \centering
+            \includegraphics[width=0.15\linewidth]{img/_monty_hall1.pdf}
+            \caption{First interaction}
+        \end{subfigure}
+        \begin{subfigure}{.3\textwidth}
+            \centering
+            \includegraphics[width=0.45\linewidth]{img/_monty_hall2.pdf}
+            \caption{Second interaction (note that $P \perp G$)}
+        \end{subfigure}
+        \begin{subfigure}{.3\textwidth}
+            \centering
+            \includegraphics[width=0.45\linewidth]{img/_monty_hall3.pdf}
+            \caption{Third interaction}
+        \end{subfigure}
+    \end{figure}
+\end{example}
+
+The nodes of the resulting network can be classified as:
+\begin{descriptionlist}
+    \item[Initial evidence] The initial observation.
+    \item[Testable variables] Variables that can be verified.
+    \item[Operable variables] Variables that can be changed by intervening on them.
+    \item[Hidden variables] Variables that "compress" more variables to reduce the parameters.
+\end{descriptionlist}
+
+\begin{example} \phantom{}\\
+    \begin{minipage}{.4\linewidth}
+        \begin{description}
+            \item[Initial evidence] Red.
+            \item[Testable variables] Green.
+            \item[Operable variables] Orange.
+            \item[Hidden variables] Gray.
+        \end{description}
+    \end{minipage}
+    \begin{minipage}{.5\linewidth}
+        \begin{center}
+            \includegraphics[width=\linewidth]{img/_car_example.pdf}
+        \end{center}
+    \end{minipage}
+\end{example}
+
+
+\subsection{Structure learning}
+\marginnote{Structure learning}
+Learn the network from the available data.
+\begin{description}
+    \item[Constraint-based] 
+        Independence tests to identify the constraints of the edges.
+    \item[Score-based] 
+        Define a score to evaluate the network.
+\end{description}
+
+
+
+\section{Causal networks}
+When building a Bayesian network, a correct ordering of the nodes 
+that respects the causality allows to obtain more compact networks.
+
+\begin{description}
+    \item[Structural equation] \marginnote{Structural equation}
+        Given a variable $X_i$ with values $x_i$, its structural equation is a function $f_i$
+        such that it represents all its possible values:
+        \[ x_i = f_i(\text{other variables}, U_i) \]
+        $U_i$ represents unmodeled variables or error terms.
+
+    \item[Causal network] \marginnote{Causal network}
+        Restricted class of Bayesian networks that only allows causally compatible ordering.
+
+        An edge exists between $X_j \rightarrow X_i$ iff $X_j$ is an argument of 
+        the structural equation $f_i$ of $X_i$.
+
+        \begin{example} \phantom{}\\[0.5em]
+            \begin{minipage}{.3\linewidth}
+                \centering
+                \includegraphics[width=\linewidth]{img/_causal_network_example1.pdf}
+            \end{minipage}
+            \begin{minipage}{.6\linewidth}
+                The structural equations are:
+                \[ 
+                    \begin{split}
+                        \texttt{cloudy} &= f_C(U_C) \\
+                        \texttt{sprinkler} &= f_S(\texttt{Cloudy}, U_S) \\
+                        \texttt{rain} &= f_R(\texttt{Cloudy}, U_R) \\
+                        \texttt{wet\_grass} &= f_W(\texttt{Sprinkler}, \texttt{Rain}, U_W) \\
+                        \texttt{greener\_grass} &= f_G(\texttt{WetGrass}, U_G)
+                    \end{split}
+                \]
+            \end{minipage}\\[0.5em]
+
+            If the sprinkler is disabled, the network becomes:\\[0.5em]
+            \begin{minipage}{.3\linewidth}
+                \centering
+                \includegraphics[width=\linewidth]{img/_causal_network_example2.pdf}
+            \end{minipage}
+            \begin{minipage}{.6\linewidth}
+                The structural equations become:
+                \[ 
+                    \begin{split}
+                        \texttt{cloudy} &= f_C(U_C) \\
+                        \texttt{sprinkler} &= f_S(U_S) \\
+                        \texttt{rain} &= f_R(\texttt{Cloudy}, U_R) \\
+                        \texttt{wet\_grass} &= f_W(\texttt{Rain}, U_W) \\
+                        \texttt{greener\_grass} &= f_G(\texttt{WetGrass}, U_G)
+                    \end{split}
+                \]
+            \end{minipage}
+        \end{example}
+
+    \item[do-operator] \marginnote{do-operator}
+        The do-operator allows to represent manual interventions on the network.
+        The operation $\texttt{do}(X_i = x_i)$ makes the structural equation of $X_i$
+        constant (i.e. $f_i = x_i$, without arguments, so there won't be inward edges to $X_i$).
+
+        \begin{example} \phantom{}\\[0.5em]
+            \begin{minipage}{.3\linewidth}
+                \centering
+                \includegraphics[width=\linewidth]{img/_do_operator_example1.pdf}
+            \end{minipage}
+            \begin{minipage}{.65\linewidth}
+                By applying $\texttt{do}(\texttt{Sprinkler} = \texttt{true})$, the structural equations become:
+                \[ 
+                    \begin{split}
+                        \texttt{cloudy} &= f_C(U_C) \\
+                        \texttt{sprinkler} &= \texttt{true} \\
+                        \texttt{rain} &= f_R(\texttt{Cloudy}, U_R) \\
+                        \texttt{wet\_grass} &= f_W(\texttt{Sprinkler}, \texttt{Rain}, U_W) \\
+                        \texttt{greener\_grass} &= f_G(\texttt{WetGrass}, U_G)
+                    \end{split}
+                \]
+            \end{minipage}\\[0.5em]
+
+            \begin{minipage}{.3\linewidth}
+                \centering
+                \includegraphics[width=\linewidth]{img/_do_operator_example2.pdf}
+            \end{minipage}
+            \begin{minipage}{.65\linewidth}
+                Note that Bayesian networks are not capable of modelling manual interventions.
+                In fact, intervening and observing a variable are different concepts:
+                \[ \prob{\texttt{WetGrass} \mid \texttt{do}(\texttt{Sprinkler} = \texttt{true})} \]
+                \[ \neq \]
+                \[ \prob{\texttt{WetGrass} \mid \texttt{Sprinkler} = \texttt{true}} \]
+            \end{minipage}
+        \end{example}
+\end{description}
+
+
+
+\section{Compact conditional distributions}
+
+Use canonical distributions (standard patterns) to reduce 
+the number of variables in a conditional probability table.
+
+
+\subsection{Noisy-OR}
+\marginnote{Noisy-OR}
+Noisy-OR distributions model a network of non-interacting causes with a common effect.
+A node $X$ has $k$ parents $U_1, \dots, U_k$ and possibly a leak node $U_L$ to capture unmodeled concepts. 
+
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.3\textwidth]{img/_noisy_or_example.pdf}
+    \caption{Example of noisy-OR network}
+\end{figure}
+
+Each node $U_i$ has a failure (inhibition) probability $q_i$:
+\[ q_i = \prob{\lnot x \mid u_i, \lnot u_j \text{ for } j \neq i} \]
+The CPT can be built by computing the probabilities as:
+\[ \prob{\lnot x \mid \texttt{Parents($X$)}} = \prod_{j:\, U_j = \texttt{true}} q_j \]
+In other words:
+\[ \prob{\lnot x \mid u_1, \dots, u_n} = 
+    \prob{\lnot x \mid u_1} \cdot \prob{\lnot x \mid u_2} \cdot \text{\dots} \cdot \prob{\lnot x \mid u_n} \]
+
+Because only the failure probabilities are required, the number of parameters is linear in the number of parents.
+
+\begin{example}
+    We have as causes \texttt{Cold}, \texttt{Flu} and \texttt{Malaria} and as effect \texttt{Fever}.
+    For simplicity there are no leak nodes.
+    The failure probabilities are:
+    \[
+        \begin{split}
+            q_\texttt{cold} &= \prob{\lnot \texttt{fever} \mid \texttt{cold}, \lnot\texttt{flu}, \lnot\texttt{malaria}} = 0.6 \\
+            q_\texttt{flu} &= \prob{\lnot \texttt{fever} \mid \lnot\texttt{cold}, \texttt{flu}, \lnot\texttt{malaria}} = 0.2 \\
+            q_\texttt{malaria} &= \prob{\lnot \texttt{fever} \mid \lnot\texttt{cold}, \lnot\texttt{flu}, \texttt{malaria}} = 0.1
+        \end{split}    
+    \]
+
+    Known the failure probabilities, the entire CPT can be computed:
+    \begin{center}
+        \begin{tabular}{c|c|c|rc|c}
+            \hline
+            \texttt{Cold} & \texttt{Flu} & \texttt{Malaria} & \multicolumn{2}{c|}{$\prob{\lnot\texttt{fever}}$} & $1-\prob{\lnot\texttt{fever}}$ \\
+            \hline
+            F & F & F &                                                                 & 0.0       & 1.0 \\
+            F & F & T & $q_\texttt{malaria} =$                                            & 0.1       & 0.9 \\
+            F & T & F & $q_\texttt{flu} =$                                                & 0.2       & 0.8 \\
+            F & T & T & $q_\texttt{flu} \cdot q_\texttt{malaria} =$                       & 0.02      & 0.98 \\
+            T & F & F & $q_\texttt{cold} =$                                               & 0.6       & 0.4 \\
+            T & F & T & $q_\texttt{cold} \cdot q_\texttt{malaria} =$                      & 0.06      & 0.94 \\
+            T & T & F & $q_\texttt{cold} \cdot q_\texttt{flu} =$                          & 0.12      & 0.88 \\
+            T & T & T & $q_\texttt{cold} \cdot q_\texttt{flu} \cdot q_\texttt{malaria} =$ & 0.012     & 0.988 \\
+            \hline
+        \end{tabular}
+    \end{center}
+\end{example}
+
+
+\subsection{Hybrid Bayesian networks}
+\marginnote{Hybrid Bayesian networks}
+
+Network with discrete and continuous random variables.
+Continuous variables must be converted into a finite representation.
+Possible approaches are:
+\begin{description}
+    \item[Discretization] \marginnote{Discretization} 
+        Values are divided into a fixed set of intervals.
+        This approach may introduce large errors and large CPTs.
+
+    \item[Finitely parametrized canonical families] \marginnote{Finitely parametrized canonical families}
+        There are two cases to handle using this approach:
+        \begin{descriptionlist}
+            \item[Continuous child] 
+                Given the continuous variables $X$ and $C$ and a discrete (boolean, for simplicity) variable $D$,
+                we want to compute the distribution $\textbf{P}(X \mid C, D)$.
+
+                The discrete parent is handled by enumeration, by computing the probability over the domain of $D$.
+
+                For the continuous parent, an arbitrarily chosen distribution over the values of $X$ is used.
+                A common choice is the \textbf{linear Gaussian} \marginnote{Linear Gaussian}
+                whose mean is a linear combination of the values of the parents and the variance is fixed.
+                
+                A network with all continuous linear Gaussian distributions has the property 
+                of having a multivariate Gaussian distribution as joint distribution.
+                Moreover, if a continuous variable has some discrete parents, it defines a conditional Gaussian distribution
+                where, fixed the values of the discrete variables, the distribution over the continuous variable is a multivariate Gaussian.
+
+                \begin{example}
+                    Let \texttt{Subsidy} and \texttt{Buys} be discrete variables and
+                    \texttt{Harvest} and \texttt{Cost} be continuous variables.
+                    \begin{center}
+                        \includegraphics[width=0.3\textwidth]{img/_linear_gaussian_example.pdf}
+                    \end{center}
+    
+                    To compute $\textbf{P}(\texttt{Cost} \mid \texttt{Harvest}, \texttt{Subsidy})$,
+                    we split the probabilities over the values of the discrete variable \texttt{Subsidy}
+                    and use a linear Gaussian for \texttt{Harvest}.
+                    We therefore have that:
+                    \[ 
+                        \begin{split}
+                            \prob{\texttt{C} = \texttt{c} \mid \texttt{Harvest} = \texttt{h}, \texttt{Subsidy} = \texttt{true}} 
+                            &= \mathcal{N}(a_t h + b_t, \sigma_t)(c) \\
+                            \prob{\texttt{C} = \texttt{c} \mid \texttt{Harvest} = \texttt{h}, \texttt{Subsidy} = \texttt{false}} 
+                            &= \mathcal{N}(a_f h + b_f, \sigma_f)(c)
+                        \end{split}
+                    \]
+                    where $a_t$, $b_t$, $\sigma_t$, $a_f$, $b_f$ and $\sigma_t$ are parameters.
+                \end{example}
+
+            \item[Discrete child with continuous parents] 
+                Given the continuous variable $C$ and a discrete variable $X$,
+                the probability of $X$ given $C$ in obtained by using a threshold function.
+                For instance, probit or sigmoid distributions can be used.
+        \end{descriptionlist}
+\end{description}
+
+
+\subsection{Other methods}
+
+\begin{description}
+    \item[Dynamic Bayesian network] \marginnote{Dynamic Bayesian network}
+        Useful to model the evolution through time.
+        A template variable $X_i$ is instantiated as $X_i^{(t)}$ at each time step.
+        \begin{figure}[h]
+            \centering
+            \includegraphics[width=0.3\textwidth]{img/_dynamic_bn_example.pdf}
+            \caption{Example of dynamic Bayesian network}
+        \end{figure}
+
+    \item[Density estimation] \marginnote{Density estimation}
+        Parameters of the conditional distribution can be learned using:
+        \begin{description}
+            \item[Bayesian learning] calculate the probability of each hypothesis.
+            \item[Approximations] using the maximum-a-posteriori and maximum-likelihood hypothesis.
+            \item[Expectation-maximization algorithm{\normalfont.}] 
+        \end{description}
+
+    \item[Undirected graphical models] \marginnote{Undirected graphical models}
+        Markov networks are an alternative to probabilistic graphical models (as Bayesian networks).
+        Markov networks are undirected graphs with factors (instead of probabilities) and
+        are able to naturally capture independence relations.
+\end{description}
--- a/src/year1/fundamentals-of-ai-and-kr/module3/sections/_exact_inference.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/sections/_exact_inference.tex
@ -0,0 +1,118 @@
+\chapter{Exact inference}
+
+
+\section{Inference by enumeration}
+\marginnote{Inference by enumeration}
+
+Method to sum out a joint probability without explicitly representing it
+by using CPT entries.
+
+Enumeration follows a depth-first exploration and has a space complexity of $O(n)$
+and time complexity of $O(d^n)$.
+It must be noted that some probabilities appear multiple times but 
+require to be recomputed because of the definition of the algorithm.
+
+\begin{example}[Burglary]
+    Given the Bayesian network:
+    \begin{center}
+        \includegraphics[width=0.15\textwidth]{img/_burglary_net.pdf}
+    \end{center}
+    We want to compute $\textbf{P}(B \mid j, m)$:
+    \[
+        \begin{split}
+            \textbf{P}(B \mid j, m) &= \frac{\textbf{P}(B, j, m)}{\prob{j, m}} \\
+                &= \alpha \textbf{P}(B, j, m) \\
+                &= \alpha \sum_{e} \sum_{a} \textbf{P}(B, j, m, e, a) \\
+                &= \alpha \sum_{e} \sum_{a} \textbf{P}(B) \prob{e} \textbf{P}(a \mid B, e) \prob{j \mid a} \prob{m \mid a} \\
+                &= \alpha \textbf{P}(B) \sum_{e} \prob{e} \sum_{a} \textbf{P}(a \mid B, e) \prob{j \mid a} \prob{m \mid a} \\
+        \end{split}  
+    \]
+
+    This can be represented using a tree:
+    \begin{center}
+        \includegraphics[width=0.75\textwidth]{img/_burglary_enumeration.pdf}
+    \end{center}
+\end{example}
+
+
+\section{Inference by variable elimination}
+\marginnote{Inference by variable elimination}
+Method that carries out summations right-to-left and stores intermediate results (called factors).
+
+\begin{description}
+    \item[Pointwise product of factors] $f(X, Y) \times g(Y, Z) = p(X, Y, Z)$
+        \begin{figure}[h]
+            \centering
+            \includegraphics[width=0.5\textwidth]{img/_pointwise_factors.pdf}
+            \caption{Example of pointwise product}
+        \end{figure}
+
+    \item[Summing out]
+        To sum out a variable $X$ from a product of factors:
+        \begin{enumerate}
+            \item Move constant factors outside (i.e. factors that do not depend on $X$).
+            \item Compute the pointwise product of the remaining terms.
+        \end{enumerate}
+
+        \begin{example}
+            \[ 
+                \begin{split}
+                    \sum_X f_1 \times \dots \times f_k &= f_1 \times \dots \times f_i \sum_X f_{i+1} \times \dots \times f_k \\
+                        &= f_1 \times \dots \times f_i \times f_X
+                \end{split}    
+            \]
+        \end{example}
+\end{description}
+
+\begin{example}[Burglary]
+    Given the Bayesian network:
+    \begin{center}
+        \includegraphics[width=0.15\textwidth]{img/_burglary_net.pdf}
+    \end{center}
+    We want to compute 
+    $\textbf{P}(B \mid j, m) = \alpha \textbf{P}(B) \sum_{e} \prob{e} \sum_{a} \textbf{P}(a \mid B, e) \prob{j \mid a} \prob{m \mid a}$.
+    
+    We first work on the summation on $A$.
+    We introduce as factors the entries of the CPT:
+        \[ \textbf{P}(B \mid j, m) = \alpha \textbf{P}(B) \sum_{e} \prob{e} \sum_{a} f_A(a, b, e) f_J(a) f_M(a) \]
+    Note that $j$ and $m$ are not parameters of the factors $f_J$ and $f_M$ because they are already given.
+    We then sum out on $A$:
+        \[ \textbf{P}(B \mid j, m) = \alpha \textbf{P}(B) \sum_{e} \prob{e} f_{AJM}(b, e) \]
+
+    Now, we repeat the same process and sum out $E$:
+        \[ \textbf{P}(B \mid j, m) = \alpha \textbf{P}(B) f_{EAJM}(b) \]
+
+    At last, we factor $\textbf{P}(B)$:
+        \[ \textbf{P}(B \mid j, m) = \alpha f_B(b) f_{EAJM}(b) \]
+\end{example}
+
+
+\subsection{Irrelevant variables}
+\marginnote{Irrelevant variables}
+A variable $X$ is irrelevant if summing over it results in a probability of $1$.
+
+\begin{theorem}
+    Given a query $X$, the evidence $\matr{E}$ and a variable $Y$:
+        \[ Y \notin (\texttt{Ancestors($\{ X \}$)} \cup  \texttt{Ancestors($\matr{E}$)}) \rightarrow Y \text{ is irrelevant} \]
+\end{theorem}
+
+\begin{theorem}
+    Given a query $X$, the evidence $\matr{E}$ and a variable $Y$:
+    \[ Y \text{ d-separated from } X \text{ by } \matr{E} \rightarrow Y \text{ is irrelevant} \]
+\end{theorem}
+
+
+\subsection{Complexity}
+\begin{description}
+    \item[Singly connected networks] 
+        Network where any two nodes are connected with at most one undirected path.
+        Time and space complexity is $O(d^k n)$.
+    \item[Multiply connected networks] The problem is NP-hard.
+\end{description}
+
+
+\section{Clustering algorithm}
+\marginnote{Clustering algorithm}
+
+Method that joins individual nodes to form clusters.
+Allows to estimate the posterior probabilities for $n$ variables with complexity $O(n)$.
--- a/src/year1/fundamentals-of-ai-and-kr/module3/sections/_intro.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/sections/_intro.tex
@ -0,0 +1,48 @@
+\chapter{Introduction}
+
+
+\section{Uncertainty}
+\begin{description}
+    \item[Uncertainty] \marginnote{Uncertainty}
+        A task is uncertain if it has:
+        \begin{itemize}
+            \item Partial observations
+            \item Noisy or wrong information
+            \item Uncertain outcomes of the actions
+            \item Complex models
+        \end{itemize}
+
+        A purely logic approach leads to:
+        \begin{itemize}
+            \item Risks falsehood: unreasonable conclusion when applied in practice.
+            \item Weak decisions: too many conditions required to make a conclusion.
+        \end{itemize}
+\end{description}
+
+
+\subsection{Handling uncertainty}
+\begin{descriptionlist}
+    \item[Default/non-monotonic logic] \marginnote{Default/non-monotonic logic}
+        Works on assumptions.
+        An assumption can be contradicted by the evidence.
+
+    \item[Rule-based systems with fudge factors] \marginnote{Rule-based systems with fudge factors}
+        Formulated as premise $\rightarrow_\text{prob.}$ effect.
+        Have the following issues:
+        \begin{itemize}
+            \item Locality: how can the probability account all the evidence.
+            \item Combination: chaining of unrelated concepts.
+        \end{itemize}
+
+    \item[Probability] \marginnote{Probability}
+        Assign a probability given the available known evidence.
+
+        Note: fuzzy logic handles the degree of truth and not the uncertainty.
+\end{descriptionlist}
+
+\begin{description}
+    \item[Decision theory] \marginnote{Decision theory}
+        Defined as:
+        \[ \text{Decision theory} = \text{Utility theory} + \text{Probability theory} \]
+        where the utility theory depends on one's preferences.
+\end{description}
--- a/src/year1/fundamentals-of-ai-and-kr/module3/sections/_probability.tex
+++ b/src/year1/fundamentals-of-ai-and-kr/module3/sections/_probability.tex
@ -0,0 +1,236 @@
+\chapter{Probability}
+
+\begin{description}
+    \item[Sample space] \marginnote{Sample space}
+        Set $\Omega$ of all possible worlds.
+        \begin{descriptionlist}
+            \item[Event] \marginnote{Event}
+                Subset $A \subseteq \Omega$.
+            \item[Sample point/Possible world/Atomic event] \marginnote{Sample point}
+                Element $\omega \in \Omega$.
+        \end{descriptionlist}
+
+    \item[Probability space] \marginnote{Probability space}
+        A probability space/model is a function $\prob{\cdot}: \Omega \rightarrow [0, 1]$ assigned to a sample space such that:
+        \begin{itemize}
+            \item $0 \leq \prob{\omega} \leq 1$
+            \item $\sum_{\omega \in \Omega} \prob{\omega} = 1$
+            \item $\prob{A} = \sum_{\omega \in A} \prob{\omega}$
+        \end{itemize}
+
+    \item[Random variable] \marginnote{Random variable}
+        A function from an event to some range (e.g. reals, booleans, \dots).
+
+    \item[Probability distribution] \marginnote{Probability distribution}
+        For any random variable $X$:
+        \[ \prob{X = x_i} = \sum_{\omega \text{ s.t. } X(\omega)=x_i} \prob{\omega} \]
+
+    \item[Proposition] \marginnote{Proposition}
+        Event where a random variable has a certain value.
+        \[ a = \{ \omega \,\vert\, A(\omega) = \texttt{true} \} \]
+        \[ \lnot  a = \{ \omega \,\vert\, A(\omega) = \texttt{false} \} \]
+        \[ (\texttt{Weather} = \texttt{rain}) = \{ \omega \,\vert\, B(\omega) = \texttt{rain} \} \]
+
+    \item[Prior probability] \marginnote{Prior probability}
+        Prior/unconditional probability of a proposition based on known evidence.
+        
+    \item[Probability distribution (all)] \marginnote{Probability distribution (all)}
+        Gives all the probabilities of a random variable.
+        \[ \textbf{P}(A) = \langle \prob{A=a_1}, \dots, \prob{A=a_n} \rangle \]
+    
+    \item[Joint probability distribution] \marginnote{Joint probability distribution}
+        The joint probability distribution of a set of random variables gives 
+        the probability of all the different combinations of their atomic events.
+
+        Note: Every question on a domain can, in theory, be answered using the joint distribution.
+        In practice, it is hard to apply.
+
+        \begin{example}
+            $\textbf{P}(\texttt{Weather}, \texttt{Cavity}) = $
+            \begin{center}
+                \small
+                \begin{tabular}{|c | c|c|c|c|}
+                    \cline{2-5}
+                    \multicolumn{1}{c|}{}    & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
+                    \hline
+                    \texttt{Cavity=true}    & 0.144 & 0.02 & 0.016 & 0.02 \\
+                    \hline
+                    \texttt{Cavity=false}   & 0.576 & 0.08 & 0.064 & 0.08 \\
+                    \hline
+                \end{tabular}
+            \end{center}
+        \end{example}
+
+    \item[Probability density function] \marginnote{Probability density function}
+        The probability density function (PDF) of a random variable $X$ is a function $p: \mathbb{R} \rightarrow \mathbb{R}$
+        such that:
+        \[ \int_{\mathcal{T}_X} p(x) \,dx = 1 \]
+        \begin{descriptionlist}
+            \item[Uniform distribution] \marginnote{Uniform distribution}
+                \[ 
+                    p(x) = \text{Unif}[a, b](x) = 
+                    \begin{cases}
+                        \frac{1}{b-a} & a \leq x \leq b \\
+                        0 & \text{otherwise}
+                    \end{cases} 
+                \]
+            \item[Gaussian (normal) distribution] \marginnote{Gaussian (normal) distribution}
+                \[ \mathcal{N}(\mu, \sigma^2) = \frac{1}{\sigma\sqrt{2\pi}}e^{\frac{-(x-\mu)^2}{2\sigma^2}} \]
+
+                $\mathcal{N}(0, 1)$ is the standard Gaussian.
+        \end{descriptionlist}
+
+    \item[Conditional probability] \marginnote{Conditional probability}
+        Probability of a prior knowledge with new evidence:
+        \[ \prob{a \vert b} = \frac{\prob{a \land b}}{\prob{b}} \]
+        The product rule gives an alternative formulation:
+        \[ \prob{a \land b} = \prob{a \vert b}{\prob{b}} = \prob{b \vert a}{\prob{a}} \]
+
+        \begin{description}
+            \item[Chain rule] \marginnote{Chain rule}
+                Successive application of the product rule:
+                \[ 
+                    \begin{split}
+                        \textbf{P}(X_1, \dots, X_n) &= \textbf{P}(X_1, \dots, X_{n-1}) \textbf{P}(X_n \vert X_1, \dots, X_{n-1}) \\
+                            &= \textbf{P}(X_1, \dots, X_{n-2}) \textbf{P}(X_{n-1} \vert X_1, \dots, X_{n-2}) \textbf{P}(X_n \vert X_1, \dots, X_{n-1}) \\
+                            &= \prod_{i=1}^{n} \textbf{P}(X_i \vert X_1, \dots, X_{i-1})
+                    \end{split}  
+                \]
+        \end{description}
+
+    \item[Independence] \marginnote{Independence}
+        Two random variables $A$ and $B$ are independent ($A \perp B$) iff:
+        \[ 
+            \textbf{P}(A \vert B) = \textbf{P}(A) \,\text{ or }\, 
+            \textbf{P}(B \vert A) = \textbf{P}(B) \,\text{ or }\,
+            \textbf{P}(A, B) = \textbf{P}(A)\textbf{P}(B)
+        \]
+
+    \item[Conditional independence] \marginnote{Conditional independence}
+        Two random variables $A$ and $B$ are conditionally independent iff:
+        \[ \textbf{P}(A \,\vert\, C, B) = \textbf{P}(A \,\vert\, C) \]
+\end{description}
+
+
+
+\section{Inference with full joint distributions}
+Given a joint distribution, the probability of any proposition $\phi$ 
+can be computed as the sum of the atomic events where $\phi$ is true:
+\[ \prob{\phi} = \sum_{\omega:\, \omega \models \phi} \prob{\omega} \]
+
+\begin{example}
+    Given the following joint distribution:
+    \begin{center}
+        \begin{tabular}{|c|c|c|c|c|}
+            \cline{2-5}
+            \multicolumn{1}{c|}{}    & \multicolumn{2}{c|}{\texttt{toothache}} & \multicolumn{2}{c|}{$\lnot$\texttt{toothache}} \\
+            \cline{2-5}
+            \multicolumn{1}{c|}{}    & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\
+            \hline
+            \texttt{cavity}         & 0.108 & 0.012 & 0.072 & 0.008 \\
+            \hline
+            $\lnot$\texttt{cavity}  & 0.016 & 0.064 & 0.144 & 0.576 \\
+            \hline
+        \end{tabular}
+    \end{center}
+
+    We have that:
+    \begin{itemize}
+        \item $\prob{\texttt{toothache}} = 0.108 + 0.012 + 0.016 + 0.064 = 0.2$
+        \item $\prob{\texttt{cavity} \vee \texttt{toothache}} = 0.108 + 0.012 + 0.072 + 0.008 + 0.016 + 0.064 = 0.28$
+        \item $\prob{\lnot\texttt{cavity} \,\vert\, \texttt{toothache}} = \frac{\prob{\lnot\texttt{cavity} \land \texttt{toothache}}}{\prob{\texttt{toothache}}} =
+                \frac{0.016 + 0.064}{0.2} = 0.4$
+    \end{itemize}
+\end{example}
+
+\begin{description}
+    \item[Marginalization] \marginnote{Marginalization}
+        The probability that a random variable assumes a specific value is given by 
+        the sum off all the joint probabilities where that random variable assumes the given value.
+        \begin{example}
+            Given the joint distribution:
+            \begin{center}
+                \small
+                \begin{tabular}{|c | c|c|c|c|}
+                    \cline{2-5}
+                    \multicolumn{1}{c|}{}    & \texttt{Weather=sunny} & \texttt{Weather=rain} & \texttt{Weather=cloudy} & \texttt{Weather=snow} \\
+                    \hline
+                    \texttt{Cavity=true}    & 0.144 & 0.02 & 0.016 & 0.02 \\
+                    \hline
+                    \texttt{Cavity=false}   & 0.576 & 0.08 & 0.064 & 0.08 \\
+                    \hline
+                \end{tabular}
+            \end{center}
+            We have that $\prob{\texttt{Weather}=\texttt{sunny}} = 0.144 + 0.576$
+        \end{example}
+    \item[Conditioning] \marginnote{Conditioning}
+        Adding a condition to a probability (reduction and renormalization).
+
+    \item[Normalization] \marginnote{Normalization}
+        Given a conditional probability distribution $\textbf{P}(A \vert B)$,
+        it can be formulated as:
+        \[ \textbf{P}(A \vert B) = \alpha\textbf{P}(A, B) \]
+        where $\alpha$ is a normalization constant.
+        In fact, fixed the evidence $B$, the denominator to compute the conditional probability is the same for each probability.
+
+        \begin{example}
+            Given the joint distribution:
+            \begin{center}
+                \begin{tabular}{|c|c|c|c|c|}
+                    \cline{2-5}
+                    \multicolumn{1}{c|}{}    & \multicolumn{2}{c|}{\texttt{toothache}} & \multicolumn{2}{c|}{$\lnot$\texttt{toothache}} \\
+                    \cline{2-5}
+                    \multicolumn{1}{c|}{}    & \texttt{catch} & $\lnot$\texttt{catch} & \texttt{catch} & $\lnot$\texttt{catch} \\
+                    \hline
+                    \texttt{cavity}         & 0.108 & 0.012 & 0.072 & 0.008 \\
+                    \hline
+                    $\lnot$\texttt{cavity}  & 0.016 & 0.064 & 0.144 & 0.576 \\
+                    \hline
+                \end{tabular}
+            \end{center}
+
+            We have that:
+            \[
+                \textbf{P}(\texttt{Cavity} \vert \texttt{toothache}) = 
+                    \langle 
+                        \frac{\prob{\texttt{cavity}, \texttt{toothache}, \texttt{catch}}}{\prob{\texttt{toothache}}},
+                        \frac{\prob{\lnot\texttt{cavity}, \texttt{toothache}, \lnot\texttt{catch}}}{\prob{\texttt{toothache}}}
+                    \rangle  
+            \]
+        \end{example}
+
+    \item[Probability query] \marginnote{Probability query}
+        Given a set of query variables $\bm{Y}$, the evidence variables $\vec{e}$ and the other hidden variables $\bm{H}$,
+        the probability of the query can be computed as:
+        \[ 
+            \textbf{P}(\bm{Y} \vert \bm{E}=\vec{e}) = \alpha \textbf{P}(\bm{Y}, \bm{E}=\vec{e})
+                = \alpha \sum_{\vec{h}} \textbf{P}(\bm{Y}, \bm{E}=\vec{e}, \bm{H}=\vec{h})
+        \]
+        The problem of this approach is that it has exponential time and space complexity
+        that makes it not applicable in practice.
+
+        To reduce the size of the variables, conditional independence can be exploited.
+        \begin{example}
+            Knowing that $\textbf{P} \models (\texttt{Catch} \perp \texttt{Toothache} \vert \texttt{Cavity})$,
+            we can compute the distribution $\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ as follows:
+            \[
+                \begin{split}
+                    \textbf{P}&(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity}) = \\
+                        &= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Catch}, \texttt{Cavity})
+                            \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity}) \\
+                        &= \textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity})
+                            \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity})
+                \end{split}
+            \]
+            $\textbf{P}(\texttt{Toothache}, \texttt{Catch}, \texttt{Cavity})$ has 7 independent values that grows exponentially
+            ($2 \cdot 2 \cdot 2 = 8$ values, but one of them can be omitted as a probability always sums up to 1).
+
+            $\textbf{P}(\texttt{Toothache} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Catch} \,\vert\, \texttt{Cavity}) \textbf{P}(\texttt{Cavity})$
+            has 5 independent values that grows linearly ($4 + 4 + 2 = 10$, but a value of $\textbf{P}(\texttt{Cavity})$ can be omitted.
+            The conditional probabilities require two tables (one for each prior) each with 2 values, 
+            but for each table a value can be omitted, therefore requiring $2$ independent values per conditional probability instead of $4$).
+        \end{example}
+\end{description}
+
+
+