From 76ce62296c3815c97526c792d5123cbdf02d92ea Mon Sep 17 00:00:00 2001 From: NotXia <35894453+NotXia@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:10:56 +0200 Subject: [PATCH] Fix Bellman equation --- .../cognition-and-neuroscience/module2/sections/_dopamine.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/year1/cognition-and-neuroscience/module2/sections/_dopamine.tex b/src/year1/cognition-and-neuroscience/module2/sections/_dopamine.tex index aca5394..75a6ee0 100644 --- a/src/year1/cognition-and-neuroscience/module2/sections/_dopamine.tex +++ b/src/year1/cognition-and-neuroscience/module2/sections/_dopamine.tex @@ -111,7 +111,7 @@ \item[Bellman equation] \marginnote{Bellman equation} Given an action $a_t$ performed in the state $s_t$ following a policy $\pi$, the expected future reward is given by the following equation: - \[ Q_\pi(s_t, a_t) = r_t + \gamma \sum_{s_{t+1}} \prob{s_{t+1 | s_t, a_t}} Q_\pi(s_{t+1}, \pi(s_{t+1})) \] + \[ Q_\pi(s_t, a_t) = r_t + \gamma \sum_{s_{t+1}} \prob{s_{t+1} | s_t, a_t} Q_\pi(s_{t+1}, \pi(s_{t+1})) \] where $\gamma$ is a discount factor. \end{description}