diff --git a/src/cognition-and-neuroscience/module1/cn1.tex b/src/cognition-and-neuroscience/module1/cn1.tex index 8616881..392a0ba 100644 --- a/src/cognition-and-neuroscience/module1/cn1.tex +++ b/src/cognition-and-neuroscience/module1/cn1.tex @@ -27,5 +27,7 @@ \input{./sections/_introduction.tex} \input{./sections/_nervous_system.tex} \input{./sections/_rl.tex} + \input{./sections/_pavlovian_learning.tex} + \input{./sections/_instrumental_learning.tex} \end{document} \ No newline at end of file diff --git a/src/cognition-and-neuroscience/module1/img/dopamine_probability.png b/src/cognition-and-neuroscience/module1/img/dopamine_probability.png index fe07379..fdb32ac 100644 Binary files a/src/cognition-and-neuroscience/module1/img/dopamine_probability.png and b/src/cognition-and-neuroscience/module1/img/dopamine_probability.png differ diff --git a/src/cognition-and-neuroscience/module1/sections/_instrumental_learning.tex b/src/cognition-and-neuroscience/module1/sections/_instrumental_learning.tex new file mode 100644 index 0000000..4cb7236 --- /dev/null +++ b/src/cognition-and-neuroscience/module1/sections/_instrumental_learning.tex @@ -0,0 +1,235 @@ +\chapter{Instrumental learning} + + +Form of control learning that aims to learn action-outcome associations: +\begin{itemize} + \item When a reinforcer is likely to occur. + \item Which actions bring to those reinforcers. +\end{itemize} +This allows the animal to act in anticipation of a reinforcer. + +Depending on the outcome, the effect varies: +\begin{descriptionlist} + \item[Positive reinforcement] \marginnote{Positive reinforcement} + Delivering an appetitive outcome to an action increases the probability of emitting it. + + \item[Positive punishment] \marginnote{Positive punishment} + Delivering an aversive outcome to an action decreases the probability of emitting it. + + \item[Negative reinforcement] \marginnote{Negative reinforcement} + Omitting an aversive outcome to an action increases the probability of emitting it. + + \item[Negative punishment] \marginnote{Negative punishment} + Omitting an appetitive outcome to an action decreases the probability of emitting it. +\end{descriptionlist} + +\begin{table}[H] + \centering + \begin{tabular}{r|cc} + \toprule + & \textbf{Delivery} & \textbf{Omission} \\ + \midrule + \textbf{Appetitive} & Positive reinforcement (\texttt{+prob}) & Negative punishment (\texttt{-prob}) \\ + \textbf{Aversive} & Positive punishment (\texttt{-prob}) & Negative reinforcement (\texttt{+prob}) \\ + \bottomrule + \end{tabular} + \caption{Summary of the possible effects} +\end{table} + + + +\section{Types of schedule} + +There are two types of learning: +\begin{descriptionlist} + \item[Continuous schedule] \marginnote{Continuous schedule} + The desired action is followed by the outcome every time. + \begin{remark} + More effective to teach a new association. + \end{remark} + + \item[Partial schedule] \marginnote{Partial schedule} + The desired action is not always followed by the outcome. + \begin{remark} + Learning is slower but the response is more resistant to extinction. + \end{remark} + + There are four types of partial schedules: + \begin{descriptionlist} + \item[Fixed-ratio] + Outcome available after a specific number of responses. + + This results in a high and steady rate of response, with a brief pause after the outcome is delivered. + + + \item[Variable-ratio] + Outcome available after an unpredictable number of responses. + + This results in a high and steady rate of response. + + + \item[Fixed-interval] + Outcome available after a specific interval of time. + + This results in a high rate of response near the end of the interval and a slowdown after the outcome is delivered. + + + \item[Variable-interval] + Outcome available after an unpredictable interval of time. + + This results in a slow and steady rate of response. + \end{descriptionlist} +\end{descriptionlist} + +\begin{minipage}{0.55\linewidth} + \begin{example}[Aplysia Californica] + An Aplysia Californica will withdraw its gill upon stimulating the siphon. + \begin{itemize} + \item Repeated mild stimulations will induce a habituation of the reflex. + \item Repeated intense stimulations will induce a sensitization of the reflex. + \end{itemize} + \end{example} +\end{minipage} +\begin{minipage}{0.4\linewidth} + \centering + \includegraphics[width=0.9\linewidth]{./img/gill_habituation.png} +\end{minipage} + + + +\section{Dopamine} + +There is evidence that dopamine is involved in learning action-outcome associations. + +\begin{description} + \item[Striatal activity on unexpected events] \marginnote{Striatal activity on unexpected events} + When an unexpected event happens, there is a change in the activity of the striatum. + There is an increase in response when the feedback is positive and a decrease when negative. + + \begin{@empty} + \small + \begin{example}[Microelectrodes in substantia nigra] + The activity of the substantia nigra of patients with Parkinson's disease is measured during a probabilistic instrumental learning task. + The task consists of repeatedly drawing a card from two decks, followed by positive or negative feedback depending on the deck. + + \begin{figure}[H] + \centering + \begin{subfigure}{0.25\linewidth} + \centering + \includegraphics[width=\linewidth]{./img/instrumental_dopamine_sn1.png} + \end{subfigure} + \begin{subfigure}{0.55\linewidth} + \centering + \includegraphics[width=\linewidth]{./img/instrumental_dopamine_sn2.png} + \end{subfigure} + \end{figure} + + The increase and decrease in striatal activity can be clearly seen when the feedback is unexpected. + \end{example} + \end{@empty} + + \item[Dopamine effect on behavior] \marginnote{Dopamine effect on behavior} + The amount of dopamine changes the learning behavior: + \begin{itemize} + \item Low levels of dopamine cause an impairment in learning from positive feedback. + This happens because positive prediction errors cannot occur. + + \item High levels of dopamine cause an impairment in learning from negative feedback. + This happens because negative prediction errors cannot occur. + \end{itemize} + + \begin{@empty} + \small + \begin{example}[Probabilistic selection task] + This instrumental learning task has two phases: + \begin{descriptionlist} + \item[Learning] + There are three pairs of stimuli (symbols) and, at each trial, a pair is presented to the participant who selects one. + For each pair, a symbol has a higher probability of providing positive feedback while the other is more likely to be negative. + Moreover, the probabilities are different among the three pairs. + + \begin{center} + \includegraphics[width=0.55\linewidth]{./img/instrumental_dopamine_selection1.png} + \end{center} + + Participants are required to learn by trial and error the stimulus in each pair that leads to a positive reward. + Note that learning could be accomplished by: + \begin{itemize} + \item Recognizing the more rewarding stimulus. + \item Recognizing the less rewarding stimulus. + \item Both. + \end{itemize} + + \item[Testing] + Aims to assess if participants learned to select positive feedback or avoid negative feedback. + + The same task as above is repeated but all combinations of the stimuli among the three pairs are possible. + \end{descriptionlist} + + Three groups of participants are considered for this experiment: + \begin{enumerate} + \item Those who took the cabergoline drug (dopamine antagonist). + \item Those who took the haloperidol drug (dopamine agonist). + \item Those who took a drug without effects (placebo). + \end{enumerate} + + \begin{center} + \includegraphics[width=0.55\linewidth]{./img/instrumental_dopamine_selection2.png} + \end{center} + + Results show that: + \begin{enumerate} + \item Cabergoline inhibited positive feedback learning. + \item Haloperidol enhanced positive feedback learning. + \item Placebo learned positive and negative feedback equally. + \end{enumerate} + \end{example} + \end{@empty} + + \begin{@empty} + \small + \begin{example} + It has been observed that: + \begin{itemize} + \item Reward prediction errors are correlated with activity in the left posterior putamen and left ventral striatum. + \item Punishment prediction errors are correlated with activity in the right anterior insula. + \end{itemize} + + \begin{center} + \includegraphics[width=0.5\linewidth]{./img/pe_location.png} + \end{center} + \end{example} + \end{@empty} + + \item[Actor-critic model] \marginnote{Actor-critic model} + Model to correlate Pavlovian and instrumental learning. + It is composed by: + \begin{itemize} + \item The cortex is responsible for representing the current state. + \item The basal ganglia implement two computational models: + \begin{descriptionlist} + \item[Critic] \marginnote{Critic} + Learns stimulus-outcome associations and is active in both Pavlovian and instrumental learning. + It might be implemented in the ventral striatum, the amygdala and the orbitofrontal cortex. + + \item[Actor] \marginnote{Actor} + Learns stimulus-action associations and is only active during instrumental learning. + It might be implemented in the dorsal striatum. + \end{descriptionlist} + \end{itemize} +\end{description} + +\begin{@empty} + \small + \begin{example}[Food and cocaine] + \phantom{} + \begin{itemize} + \item Food-induced dopamine response is modulated by the reward expectations that promote learning until the prediction matches the actual outcome. + \item Cocaine-induced dopamine response causes a continuous increase in the predicted reward that + will eventually surpass all other cues and bias decision-making towards cocaine. + \end{itemize} + \begin{center} + \includegraphics[width=0.7\linewidth]{./img/dopamine_food_cocaine.png} + \end{center} + \end{example} +\end{@empty} \ No newline at end of file diff --git a/src/cognition-and-neuroscience/module1/sections/_pavlovian_learning.tex b/src/cognition-and-neuroscience/module1/sections/_pavlovian_learning.tex new file mode 100644 index 0000000..448267d --- /dev/null +++ b/src/cognition-and-neuroscience/module1/sections/_pavlovian_learning.tex @@ -0,0 +1,506 @@ +\chapter{Pavlovian learning} + + +Form of prediction learning that aims to learn stimulus-outcome associations: +\begin{itemize} + \item When a reinforcer is likely to occur. + \item Which stimuli tend to precede a reinforcer. +\end{itemize} +This allows the animal to emit a response in anticipation of a reinforcer. + +Pavlovian learning works as follows:\\ +\begin{minipage}{0.58\linewidth} + \begin{enumerate}[label=\alph*.] + \item A stimulus that has no meaning to the animal will result in \ac{nr}. + \item An \ac{us} (i.e. a reinforcer) generates an \ac{ur}. + \item Learning happens when a reinforcer is paired with a non-relevant stimulus. + \item The learned \ac{cs} generates a \ac{cr}. + \end{enumerate} +\end{minipage} +\begin{minipage}{0.4\linewidth} + \raggedleft + \includegraphics[width=0.9\linewidth]{./img/pavlovian_example.png} +\end{minipage}\\ + +An outcome can be: +\begin{descriptionlist} + \item[Appetitive] Something considered positive. + \item[Aversive] Something considered negative. +\end{descriptionlist} + +The learned \acl{cr} can be: +\begin{descriptionlist} + \item[Behavioral] Associated to the startle response (i.e. reflex in response to a sudden stimulus). + \item[Physiological] Associated to the autonomic system. + \item[Change in subjective response] +\end{descriptionlist} + +\begin{remark} + Pavlovian learning has its foundations in behaviorism: the brain starts as a blank slate and only observable behaviors can be studied. +\end{remark} + + + +\section{Types of reinforcement} + +There are two types of learning: +\begin{descriptionlist} + \item[Continuous reinforcement] \marginnote{Continuous reinforcement} + The \acl{cs} is reinforced every time the \acl{us} occurs. + \begin{remark} + More effective to teach a new association. + \end{remark} + + \item[Partial reinforcement] \marginnote{Partial reinforcement} + The \acl{cs} is not always reinforced. + \begin{remark} + Learning is slower but the \acl{cr} is more resistant to extinction. + \end{remark} +\end{descriptionlist} + + + +\section{Learning flexibility} + +\begin{description} + \item[Acquisition] \marginnote{Acquisition} + The probability of occurrence of a \acl{cr} increases if the \acl{cs} is presented with the \acl{us}. + + \item[Extinction] \marginnote{Extinction} + The probability of occurrence of a \acl{cr} decreases if the \acl{cs} is presented alone. +\end{description} + +\begin{remark} + Extinction does not imply forgetting. + After an association between \ac{cs} and \ac{us} is made, + extinction consists of creating a second association with inhibitory effects that overrides the existing association. + + The extinct association can return in the future + (this is more evident when the context is the same as the acquisition phase). +\end{remark} + +\begin{figure}[H] + \centering + \includegraphics[width=0.95\linewidth]{./img/pavlovian_extinction.png} + \caption{Example of acquisition, extinction, and \ac{cr} return} +\end{figure} + +\begin{description} + \item[Generalization] \marginnote{Generalization} + A new stimulus that is similar to a learned \acl{cs} can elicit a \acl{cr}. +\end{description} + +\begin{example}[Aplysia Californica] \phantom{}\\ + \begin{minipage}{0.8\linewidth} + \begin{enumerate} + \item Before conditioning, a stimulus to the siphon of an aplysia californica results in a weak withdrawal of the gill. + \item During conditioning, a stimulus to the siphon is paired with a shock to the tail which results in a large withdrawal of the gill. + \item After conditioning, a stimulus to the siphon alone results in a large withdrawal response. + \end{enumerate} + \end{minipage} + \begin{minipage}{0.18\linewidth} + \centering + \includegraphics[width=\linewidth]{./img/aplysia.png} + \end{minipage} + + \begin{figure}[H] + \centering + \includegraphics[width=0.85\linewidth]{./img/gill_pavlovian.png} + \caption{Conditioning process} + \end{figure} + + The learned response lasts for days. + It can be observed that without training, the response disappears faster. + + \begin{figure}[H] + \centering + \includegraphics[width=0.3\linewidth]{./img/gill_pavlovian_graph.png} + \caption{Withdrawal response decay} + \end{figure} +\end{example} + +\begin{remark} \marginnote{Amygdala in Pavlovian learning} + In mammals, aversive Pavlovian conditioning involves the amygdala. + The \ac{cs} and \ac{us} are relayed from the thalamus and the cerebral cortex to the amygdala, + which in turn connects to various motor responses such as: + \begin{descriptionlist} + \item[Central gray region (CG)] Controls the freezing behavior. + \item[Lateral hypothalamus (LH)] Controls autonomic responses. + \item[Paraventricular hypothalamus (PVN)] Controls stress hormones. + \end{descriptionlist} + + \begin{figure}[H] + \centering + \includegraphics[width=0.9\linewidth]{./img/amygdala_pavlovian.png} + \caption{Neural circuits during aversive conditioning} + \end{figure} +\end{remark} + + + +\section{Memory} +\marginnote{Memory} + +Memory is vulnerable to alteration. +Once reactivated, the subsequent reconsolidation phase might store a modified version of the memory. + +\begin{figure}[H] + \centering + \includegraphics[width=0.6\linewidth]{./img/memory.png} + \caption{Memory flow} +\end{figure} + +\begin{remark} + This mechanism is useful against traumatic memories. +\end{remark} + +\begin{remark} + The amygdala is responsible for storing conditioned responses while the hippocampus recognizes conditioned stimuli. + + Patients with a damaged amygdala only recognize \ac{cs} but do not act with any \ac{cr}. + On the other hand, a damaged hippocampus results in patients that present a \ac{cr} without recognizing the \ac{cs}. +\end{remark} + +\begin{example}[Reconsolidation disruption] + Propranolol is a drug that disrupts amygdala-specific memory reconsolidation (i.e. the physiological response). + A possible therapy to suppress a phobia is to trigger the fear memory and then administer propranolol to prevent its reconsolidation. +\end{example} + + + +\section{Learning preconditions} + +\subsection{Contiguity} +\marginnote{Contiguity} + +Closeness between the \acl{cs} and the \acl{us}. + +\begin{remark} + The closer in time the stimuli are presented, the more likely the association will be created. +\end{remark} + +Depending on when the \ac{cs} and \ac{us} are presented, conditioning can be: +\begin{descriptionlist} + \item[Delay conditioning] \marginnote{Delay conditioning} + The \ac{cs} is extended through the interstimulus interval (ISI) (i.e. time between the start of the \ac{cs} and the \ac{us}). + + \item[Trace conditioning] \marginnote{Trace conditioning} + There is a delay (trace interval) between the \ac{cs} end and the \ac{us} start. + + Learning requires more trials and might be impossible if the trace interval is too long as the mental representation of the \ac{cs} decays. + + \begin{figure}[H] + \centering + \includegraphics[width=0.45\linewidth]{./img/contiguity.png} + \end{figure} +\end{descriptionlist} + +\begin{example} + Two groups of rats were exposed to a 6 seconds tone (\ac{cs}) followed by food delivery (\ac{us}) with a delay of: + \begin{itemize} + \item 6 seconds (red). + \item 18 seconds (purple). + \end{itemize} + + \begin{figure}[H] + \centering + \includegraphics[width=0.55\linewidth]{./img/contiguity_rats.png} + \caption{Number of entries (i.e. the rat checks the food tray) per second} + \end{figure} +\end{example} + + +\subsection{Contingency} +\marginnote{Contingency} + +Causal relationship between the \acl{cs} and the \acl{us}. + +\begin{remark} + Learning happens when: + \[ \prob{\text{\ac{us} with \ac{cs}}} > \prob{\text{\ac{us} with no \ac{cs}}} \] + In other words, the \ac{cs} should provide information regarding the \ac{us}. +\end{remark} + +\begin{figure}[H] + \centering + \includegraphics[width=0.6\linewidth]{./img/contingency.png} + \caption{Example of contingent and random group} +\end{figure} + +\begin{example} + Two groups of rats are exposed to a shock paired with a bell ring. + Contiguity is the same but contingency differs. + + Only the group where the shock is more likely with the bell learns the association. + + \begin{figure}[H] + \centering + \includegraphics[width=0.8\linewidth]{./img/contingency_rats.png} + \caption{Representation of the experiment} + \end{figure} +\end{example} + + +\subsection{Surprise} + +\begin{description} + \item[Prediction error] \marginnote{Prediction error} + Quantitative discrepancy between the expected and experienced outcome. +\end{description} + +\begin{remark} + Learning happens when the outcome is different from what was expected. +\end{remark} + +\begin{figure}[H] + \centering + \includegraphics[width=0.4\linewidth]{./img/surprise.png} + \caption{Learning outcome due to surprise} +\end{figure} + +\begin{example}[Blocking effect] + \phantom{} \label{ex:blocking} \\ + \begin{minipage}{0.65\linewidth} + \begin{enumerate} + \item A rat is taught that a hissing sound (\ac{cs}) is paired with a sexually receptive mate (\ac{us}). + \item A light is added together with the hissing sound. + \item When only the light is presented, the rat does not provide a response. + \end{enumerate} + + The light is not learned as a \ac{cs} as it does not provide any new information on the \ac{us}. + \end{minipage} + \begin{minipage}{0.35\linewidth} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{./img/surprise_rats.png} + \end{figure} + \end{minipage} +\end{example} + + + +\section{Computational model} + + +\subsection{Rescorla-Wagner model} +\marginnote{Rescorla-Wagner model} + +Error-driven learning model where the change expectancy is proportional to the difference between predicted and actual outcome: +\[ \delta_{tr} = R_{tr} - V_{tr} \] +where: +\begin{itemize} + \item $\delta_{tr}$ is the prediction error. + \item $R_{tr} = \begin{cases} + 1 & \text{if the \ac{us} is delivered at trial $tr$} \\ + 0 & \text{if the \ac{us} is omitted at trial $tr$} + \end{cases}$. + \item $V_{tr}$ is the association strength (i.e. expectancy of the \ac{us} or the expected value resulting from a given \ac{cs}) at trial $tr$. +\end{itemize} + +Then, the expected value $V_{tr+1}$ is obtained as: +\[ V_{tr+1} = V_{tr} + \alpha \delta_{tr} \] +where $\alpha \in [0, 1]$ is the learning rate. + +\begin{remark} + A lower $\alpha$ is more suited for volatile environments. +\end{remark} + +\begin{remark} + The prediction error $\delta$ is: + \begin{itemize} + \item Positive during acquisition. + \item Negative during extinction. + \end{itemize} + Moreover, the error is larger at the start of acquisition/extinction. +\end{remark} + +\begin{remark} + The Rescorla-Wagner model is able to capture the blocking effect (see \hyperref[ex:blocking]{Blocking example}) as + the animal computes a single prediction error obtained as the combination of multiple stimuli. +\end{remark} + +\begin{figure}[H] + \centering + \includegraphics[width=0.4\linewidth]{./img/rescorla_wagner_curve.png} + \caption{Acquisition and extinction in Pavlovian learning according to the Rescorla-Wagner model} +\end{figure} + +\begin{remark} + The Rescorla-Wagner model is a trial-level model that only considers the change from trial to trial + without considering what happens within and between trials. +\end{remark} + + +\subsection{Temporal difference model} +\marginnote{Temporal difference model} + +Real-time model based on time steps within a trial instead of monolithic trials. +At each time $t$ of a trial during which a \ac{cs} is presented, +the model computes a prediction of the total future reward that will be gained from time $t$ to the end of the trial. + +The prediction error is computed as follows\footnote{\url{https://pubmed.ncbi.nlm.nih.gov/9054347/}}: +\begin{gather*} + \delta_t = R_t + V_{t+1} - V_t \\ + V_{t+1} = V_t + \alpha \delta_t +\end{gather*} + +\begin{itemize} + \item At the beginning of learning, the \ac{cs} is presented at time $t_\text{\ac{cs}}$ + and $V_t = 0$ until the \ac{us} is delivered at time $t_\text{\ac{us}} > t_\text{\ac{cs}}$. + \item On the next trial, $V_{t_\text{\ac{us}}} - V_{t_\text{\ac{us}} - 1}$ now generates a positive prediction error that updates $V_{t_\text{\ac{us}} - 1}$. + \item On subsequent trials, $V_t$ is updated for each $t$ in between $t_\text{\ac{us}}$ back to $t_\text{\ac{cs}}$. +\end{itemize} + +In other words, the value signal produced by the reward (\ac{us}) is transferred back to an event (\ac{cs}) that predicts the reward. + +\begin{example}[Second-order conditioning] + Pairing a new \ac{cs} to an existing \ac{cs}. + + \begin{center} + \includegraphics[width=0.95\linewidth]{./img/second_order_conditioning.png} + \end{center} + + \begin{remark} + The Rescorla-Wagner model is not capable of modeling second-order conditioning while + the temporal difference model is. + \end{remark} +\end{example} + + + +\section{Reward prediction error hypothesis of dopamine} + +There is strong evidence that the dopaminergic system is the major neural mechanism of reward and reinforcement. + +\begin{description} + \item[Response to unexpected rewards] \marginnote{Dopamine response to unexpected rewards} + Dopaminergic neurons exhibit a strong phasic response in the presence of an unexpected reward. + + \begin{@empty} + \small + \begin{example}[Monkey that touches food] + Some food is put in a box with a hole to reach its content. + In the absence of any other stimuli predicting the reward, + a monkey presents a high dopaminergic response when it touches the food. + \begin{center} + \includegraphics[width=0.55\linewidth]{./img/dopamine_monkey1.png} + \end{center} + \end{example} + \end{@empty} + + \item[Reward discrimination] \marginnote{Dopamine reward discrimination} + Dopamine neurons respond differently depending on the actual presence of a reward. + + \begin{@empty} + \small + \begin{example}[Monkey that touches food] + The dopaminergic response of a monkey that touches an apple attached to a wire in a box is different + from the response of only touching the wire. + \begin{center} + \includegraphics[width=0.5\linewidth]{./img/dopamine_monkey2.png} + \end{center} + \end{example} + \end{@empty} + + \item[Magnitude discrimination] \marginnote{Dopamine magnitude discrimination} + Dopamine neurons respond differently depending on the amount of reward received. + + \begin{@empty} + \small + \begin{example}[Monkey that drinks] + By giving a monkey different amounts of fruit juice in a pseudorandom order, + its dopaminergic response is stronger for the highest volume and weaker for the lowest volume. + \begin{center} + \includegraphics[width=0.7\linewidth]{./img/dopamine_monkey3.png} + \end{center} + \end{example} + \end{@empty} + + \begin{@empty} + \small + \begin{example}[Monkey with juice and images] + Using different \acp{cs}, it can be seen that the dopaminergic response differs based on the amount of reward. + \begin{center} + \includegraphics[width=0.55\linewidth]{./img/dopamine_expected.png} + \end{center} + \end{example} + \end{@empty} + + \begin{@empty} + \small + \begin{example}[Monkey with juice and images] + After learning the association between a \ac{cs} and \ac{us} (middle graph), a change in the amount of the reward changes the dopaminergic response. + \begin{center} + \includegraphics[width=0.6\linewidth]{./img/dopamine_expected2.png} + \end{center} + + This behavior also involves the context (i.e. the \ac{cs} image that is shown). + \begin{center} + \includegraphics[width=0.6\linewidth]{./img/dopamine_expected3.png} + \end{center} + \end{example} + \end{@empty} +\end{description} + +\begin{remark} + With the previous observations, it can be concluded that: + \begin{itemize} + \item Dopamine neurons increase their firing rate when the reward is unexpectedly delivered or better than expected. + \item Dopamine neurons decrease their firing rate when the reward is unexpectedly omitted or worse than expected. + \end{itemize} +\end{remark} + +\begin{description} + \item[Transfer to \ac{cs}] \marginnote{Dopamine transfer to \ac{cs}} + \begin{itemize} + \item Before training, an unexpected reward (\ac{us}) causes the dopamine neurons to increase firing (positive prediction error). + \item After training, dopamine neurons firing is increased after the \ac{cs} but not following the reward (no prediction error). + \item After training, dopamine neurons firing is increased after the \ac{cs} but is decreased if the reward is omitted (negative prediction error). + \end{itemize} + \begin{center} + \includegraphics[width=0.4\linewidth]{./img/dopamine_transfer_cs.png} + \end{center} + + \item[Response to blocking] \marginnote{Dopamine response to blocking} + Dopaminergic response is in line with the blocking effect. + + \begin{@empty} + \small + \begin{example}[Monkey with food and images] + \phantom{}\\ + \begin{minipage}{0.7\linewidth} + A monkey is taught to associate images with food. + A new \ac{cs} alongside an existing \ac{cs} will not be learned. + \end{minipage} + \begin{minipage}{0.28\linewidth} + \centering + \includegraphics[width=\linewidth]{./img/dopamine_blocking.png} + \end{minipage} + \end{example} + \end{@empty} + + \item[Probability encoding] \marginnote{Dopamine probability encoding} + The phasic activation of dopamine neurons varies monotonically with the reward probability + \begin{center} + \includegraphics[width=0.65\linewidth]{./img/dopamine_probability.png} + \end{center} + + \item[Timing encoding] \marginnote{Dopamine timing encoding} + Dopamine response to unexpectedness also involves timing. + A dopaminergic response occurs when a reward is given earlier or later than expected. + + \begin{@empty} + \small + \begin{example} + After learning that a reward occurs 1 second after the end of the \ac{cs}, + dopamine neurons fire if the timing changes. + \begin{center} + \includegraphics[width=0.5\linewidth]{./img/dopamine_timing.png} + \end{center} + \end{example} + \end{@empty} +\end{description} + +\begin{remark} + Dopamine is therefore a signal for the predicted error and not strictly for the reward. +\end{remark} \ No newline at end of file diff --git a/src/cognition-and-neuroscience/module1/sections/_rl.tex b/src/cognition-and-neuroscience/module1/sections/_rl.tex index 57b1b1b..6ba8145 100644 --- a/src/cognition-and-neuroscience/module1/sections/_rl.tex +++ b/src/cognition-and-neuroscience/module1/sections/_rl.tex @@ -145,472 +145,6 @@ -\section{Pavlovian learning} -\marginnote{Pavlovian learning} - -Form of prediction learning that aims to learn stimulus-outcome associations: -\begin{itemize} - \item When a reinforcer is likely to occur. - \item Which stimuli tend to precede a reinforcer. -\end{itemize} -This allows the animal to emit a response in anticipation of a reinforcer. - -Pavlovian learning works as follows:\\ -\begin{minipage}{0.58\linewidth} - \begin{enumerate}[label=\alph*.] - \item A stimulus that has no meaning to the animal will result in \ac{nr}. - \item An \ac{us} (i.e. a reinforcer) generates an \ac{ur}. - \item Learning happens when a reinforcer is paired with a non-relevant stimulus. - \item The learned \ac{cs} generates a \ac{cr}. - \end{enumerate} -\end{minipage} -\begin{minipage}{0.4\linewidth} - \raggedleft - \includegraphics[width=0.9\linewidth]{./img/pavlovian_example.png} -\end{minipage}\\ - -An outcome can be: -\begin{descriptionlist} - \item[Appetitive] Something considered positive. - \item[Aversive] Something considered negative. -\end{descriptionlist} - -The learned \acl{cr} can be: -\begin{descriptionlist} - \item[Behavioral] Associated to the startle response (i.e. reflex in response to a sudden stimulus). - \item[Physiological] Associated to the autonomic system. - \item[Change in subjective response] -\end{descriptionlist} - -\begin{remark} - Pavlovian learning has its foundations in behaviorism: the brain starts as a blank slate and only observable behaviors can be studied. -\end{remark} - - -\subsection{Types of reinforcement} - -There are two types of learning: -\begin{descriptionlist} - \item[Continuous reinforcement] \marginnote{Continuous reinforcement} - The \acl{cs} is reinforced every time the \acl{us} occurs. - \begin{remark} - More effective to teach a new association. - \end{remark} - - \item[Partial reinforcement] \marginnote{Partial reinforcement} - The \acl{cs} is not always reinforced. - \begin{remark} - Learning is slower but the \acl{cr} is more resistant to extinction. - \end{remark} -\end{descriptionlist} - - -\subsection{Learning flexibility} - -\begin{description} - \item[Acquisition] \marginnote{Acquisition} - The probability of occurrence of a \acl{cr} increases if the \acl{cs} is presented with the \acl{us}. - - \item[Extinction] \marginnote{Extinction} - The probability of occurrence of a \acl{cr} decreases if the \acl{cs} is presented alone. -\end{description} - -\begin{remark} - Extinction does not imply forgetting. - After an association between \ac{cs} and \ac{us} is made, - extinction consists of creating a second association with inhibitory effects that overrides the existing association. - - The extinct association can return in the future - (this is more evident when the context is the same as the acquisition phase). -\end{remark} - -\begin{figure}[H] - \centering - \includegraphics[width=0.95\linewidth]{./img/pavlovian_extinction.png} - \caption{Example of acquisition, extinction, and \ac{cr} return} -\end{figure} - -\begin{description} - \item[Generalization] \marginnote{Generalization} - A new stimulus that is similar to a learned \acl{cs} can elicit a \acl{cr}. -\end{description} - -\begin{example}[Aplysia Californica] \phantom{}\\ - \begin{minipage}{0.8\linewidth} - \begin{enumerate} - \item Before conditioning, a stimulus to the siphon of an aplysia californica results in a weak withdrawal of the gill. - \item During conditioning, a stimulus to the siphon is paired with a shock to the tail which results in a large withdrawal of the gill. - \item After conditioning, a stimulus to the siphon alone results in a large withdrawal response. - \end{enumerate} - \end{minipage} - \begin{minipage}{0.18\linewidth} - \centering - \includegraphics[width=\linewidth]{./img/aplysia.png} - \end{minipage} - - \begin{figure}[H] - \centering - \includegraphics[width=0.85\linewidth]{./img/gill_pavlovian.png} - \caption{Conditioning process} - \end{figure} - - The learned response lasts for days. - It can be observed that without training, the response disappears faster. - - \begin{figure}[H] - \centering - \includegraphics[width=0.35\linewidth]{./img/gill_pavlovian_graph.png} - \caption{Withdrawal response decay} - \end{figure} -\end{example} - -\begin{remark} \marginnote{Amygdala in Pavlovian learning} - In mammals, aversive Pavlovian conditioning involves the amygdala. - The \ac{cs} and \ac{us} are relayed from the thalamus and the cerebral cortex to the amygdala, - which in turn connects to various motor responses such as: - \begin{descriptionlist} - \item[Central gray region (CG)] Controls the freezing behavior. - \item[Lateral hypothalamus (LH)] Controls autonomic responses. - \item[Paraventricular hypothalamus (PVN)] Controls stress hormones. - \end{descriptionlist} - - \begin{figure}[H] - \centering - \includegraphics[width=0.9\linewidth]{./img/amygdala_pavlovian.png} - \caption{Neural circuits during aversive conditioning} - \end{figure} -\end{remark} - - - -\section{Instrumental learning} -\marginnote{Instrumental learning} - -Form of control learning that aims to learn action-outcome associations: -\begin{itemize} - \item When a reinforcer is likely to occur. - \item Which actions bring to those reinforcers. -\end{itemize} -This allows the animal to act in anticipation of a reinforcer. - -Depending on the outcome, the effect varies: -\begin{descriptionlist} - \item[Positive reinforcement] \marginnote{Positive reinforcement} - Delivering an appetitive outcome to an action increases the probability of emitting it. - - \item[Positive punishment] \marginnote{Positive punishment} - Delivering an aversive outcome to an action decreases the probability of emitting it. - - \item[Negative reinforcement] \marginnote{Negative reinforcement} - Omitting an aversive outcome to an action increases the probability of emitting it. - - \item[Negative punishment] \marginnote{Negative punishment} - Omitting an appetitive outcome to an action decreases the probability of emitting it. -\end{descriptionlist} - -\begin{table}[H] - \centering - \begin{tabular}{r|cc} - \toprule - & \textbf{Delivery} & \textbf{Omission} \\ - \midrule - \textbf{Appetitive} & Positive reinforcement (\texttt{+prob}) & Negative punishment (\texttt{-prob}) \\ - \textbf{Aversive} & Positive punishment (\texttt{-prob}) & Negative reinforcement (\texttt{+prob}) \\ - \bottomrule - \end{tabular} - \caption{Summary of the possible effects} -\end{table} - - -\subsection{Types of schedule} - -There are two types of learning: -\begin{descriptionlist} - \item[Continuous schedule] \marginnote{Continuous schedule} - The desired action is followed by the outcome every time. - \begin{remark} - More effective to teach a new association. - \end{remark} - - \item[Partial schedule] \marginnote{Partial schedule} - The desired action is not always followed by the outcome. - \begin{remark} - Learning is slower but the response is more resistant to extinction. - \end{remark} - - There are four types of partial schedules: - \begin{descriptionlist} - \item[Fixed-ratio] - Outcome available after a specific number of responses. - - This results in a high and steady rate of response, with a brief pause after the outcome is delivered. - - - \item[Variable-ratio] - Outcome available after an unpredictable number of responses. - - This results in a high and steady rate of response. - - - \item[Fixed-interval] - Outcome available after a specific interval of time. - - This results in a high rate of response near the end of the interval and a slowdown after the outcome is delivered. - - - \item[Variable-interval] - Outcome available after an unpredictable interval of time. - - This results in a slow and steady rate of response. - \end{descriptionlist} -\end{descriptionlist} - -\begin{example}[Aplysia Californica] - An Aplysia Californica will withdraw its gill upon stimulating the siphon. - \begin{itemize} - \item Repeated mild stimulations will induce a habituation of the reflex. - \item Repeated intense stimulations will induce a sensitization of the reflex. - \end{itemize} - - \begin{figure}[H] - \centering - \includegraphics[width=0.4\linewidth]{./img/gill_habituation.png} - \caption{Example of habituation} - \end{figure} -\end{example} - - - -\section{Memory} -\marginnote{Memory} - -Memory is vulnerable to alteration. -Once reactivated, the subsequent reconsolidation phase might store a modified version of the memory. - -\begin{figure}[H] - \centering - \includegraphics[width=0.7\linewidth]{./img/memory.png} - \caption{Memory flow} -\end{figure} - -\begin{remark} - This mechanism is useful against traumatic memories. -\end{remark} - -\begin{remark} - The amygdala is responsible for storing conditioned responses while the hippocampus recognizes conditioned stimuli. - - Patients with a damaged amygdala only recognize \ac{cs} but do not act with any \ac{cr}. - On the other hand, a damaged hippocampus results in patients that present a \ac{cr} without recognizing the \ac{cs}. -\end{remark} - -\begin{example}[Reconsolidation disruption] - Propranolol is a drug that disrupts amygdala-specific memory reconsolidation (i.e. the physiological response). - A possible therapy to suppress a phobia is to trigger the fear memory and then administer propranolol to prevent its reconsolidation. -\end{example} - - - -\section{Learning preconditions} - -\subsection{Contiguity} -\marginnote{Contiguity} - -Closeness between the \acl{cs} and the \acl{us}. - -\begin{remark} - The closer in time the stimuli are presented, the more likely the association will be created. -\end{remark} - -Depending on when the \ac{cs} and \ac{us} are presented, conditioning can be: -\begin{descriptionlist} - \item[Delay conditioning] \marginnote{Delay conditioning} - The \ac{cs} is extended through the interstimulus interval (ISI) (i.e. time between the start of the \ac{cs} and the \ac{us}). - - \item[Trace conditioning] \marginnote{Trace conditioning} - There is a delay (trace interval) between the \ac{cs} end and the \ac{us} start. - - Learning requires more trials and might be impossible if the trace interval is too long as the mental representation of the \ac{cs} decays. - - \begin{figure}[H] - \centering - \includegraphics[width=0.45\linewidth]{./img/contiguity.png} - \end{figure} -\end{descriptionlist} - -\begin{example} - Two groups of rats were exposed to a 6 seconds tone (\ac{cs}) followed by food delivery (\ac{us}) with a delay of: - \begin{itemize} - \item 6 seconds (red). - \item 18 seconds (purple). - \end{itemize} - - \begin{figure}[H] - \centering - \includegraphics[width=0.55\linewidth]{./img/contiguity_rats.png} - \caption{Number of entries (i.e. the rat checks the food tray) per second} - \end{figure} -\end{example} - - -\subsection{Contingency} -\marginnote{Contingency} - -Causal relationship between the \acl{cs} and the \acl{us}. - -\begin{remark} - Learning happens when: - \[ \prob{\text{\ac{us} with \ac{cs}}} > \prob{\text{\ac{us} with no \ac{cs}}} \] - In other words, the \ac{cs} should provide information regarding the \ac{us}. -\end{remark} - -\begin{figure}[H] - \centering - \includegraphics[width=0.6\linewidth]{./img/contingency.png} - \caption{Example of contingent and random group} -\end{figure} - -\begin{example} - Two groups of rats are exposed to a shock paired with a bell ring. - Contiguity is the same but contingency differs. - - Only the group where the shock is more likely with the bell learns the association. - - \begin{figure}[H] - \centering - \includegraphics[width=0.8\linewidth]{./img/contingency_rats.png} - \caption{Representation of the experiment} - \end{figure} -\end{example} - - -\subsection{Surprise} - -\begin{description} - \item[Prediction error] \marginnote{Prediction error} - Quantitative discrepancy between the expected and experienced outcome. -\end{description} - -\begin{remark} - Learning happens when the outcome is different from what was expected. -\end{remark} - -\begin{figure}[H] - \centering - \includegraphics[width=0.4\linewidth]{./img/surprise.png} - \caption{Learning outcome due to surprise} -\end{figure} - -\begin{example}[Blocking effect] - \phantom{} \label{ex:blocking} \\ - \begin{minipage}{0.65\linewidth} - \begin{enumerate} - \item A rat is taught that a hissing sound (\ac{cs}) is paired with a sexually receptive mate (\ac{us}). - \item A light is added together with the hissing sound. - \item When only the light is presented, the rat does not provide a response. - \end{enumerate} - - The light is not learned as a \ac{cs} as it does not provide any new information on the \ac{us}. - \end{minipage} - \begin{minipage}{0.35\linewidth} - \begin{figure}[H] - \centering - \includegraphics[width=\linewidth]{./img/surprise_rats.png} - \end{figure} - \end{minipage} -\end{example} - - - -\section{Computational model} - - -\subsection{Rescorla-Wagner model} -\marginnote{Rescorla-Wagner model} - -Error-driven learning model where the change expectancy is proportional to the difference between predicted and actual outcome: -\[ \delta_{tr} = R_{tr} - V_{tr} \] -where: -\begin{itemize} - \item $\delta_{tr}$ is the prediction error. - \item $R_{tr} = \begin{cases} - 1 & \text{if the \ac{us} is delivered at trial $tr$} \\ - 0 & \text{if the \ac{us} is omitted at trial $tr$} - \end{cases}$. - \item $V_{tr}$ is the association strength (i.e. expectancy of the \ac{us} or the expected value resulting from a given \ac{cs}) at trial $tr$. -\end{itemize} - -Then, the expected value $V_{tr+1}$ is obtained as: -\[ V_{tr+1} = V_{tr} + \alpha \delta_{tr} \] -where $\alpha \in [0, 1]$ is the learning rate. - -\begin{remark} - A lower $\alpha$ is more suited for volatile environments. -\end{remark} - -\begin{remark} - The prediction error $\delta$ is: - \begin{itemize} - \item Positive during acquisition. - \item Negative during extinction. - \end{itemize} - Moreover, the error is larger at the start of acquisition/extinction. -\end{remark} - -\begin{remark} - The Rescorla-Wagner model is able to capture the blocking effect (see \hyperref[ex:blocking]{Blocking example}) as - the animal computes a single prediction error obtained as the combination of multiple stimuli. -\end{remark} - -\begin{figure}[H] - \centering - \includegraphics[width=0.4\linewidth]{./img/rescorla_wagner_curve.png} - \caption{Acquisition and extinction in Pavlovian learning according to the Rescorla-Wagner model} -\end{figure} - -\begin{remark} - The Rescorla-Wagner model is a trial-level model that only considers the change from trial to trial - without considering what happens within and between trials. -\end{remark} - - -\subsection{Temporal difference model} -\marginnote{Temporal difference model} - -Real-time model based on time steps within a trial instead of monolithic trials. -At each time $t$ of a trial during which a \ac{cs} is presented, -the model computes a prediction of the total future reward that will be gained from time $t$ to the end of the trial. - -The prediction error is computed as follows\footnote{\url{https://pubmed.ncbi.nlm.nih.gov/9054347/}}: -\begin{gather*} - \delta_t = R_t + V_{t+1} - V_t \\ - V_{t+1} = V_t + \alpha \delta_t -\end{gather*} - -\begin{itemize} - \item At the beginning of learning, the \ac{cs} is presented at time $t_\text{\ac{cs}}$ - and $V_t = 0$ until the \ac{us} is delivered at time $t_\text{\ac{us}} > t_\text{\ac{cs}}$. - \item On the next trial, $V_{t_\text{\ac{us}}} - V_{t_\text{\ac{us}} - 1}$ now generates a positive prediction error that updates $V_{t_\text{\ac{us}} - 1}$. - \item On subsequent trials, $V_t$ is updated for each $t$ in between $t_\text{\ac{us}}$ back to $t_\text{\ac{cs}}$. -\end{itemize} - -In other words, the value signal produced by the reward (\ac{us}) is transferred back to an event (\ac{cs}) that predicts the reward. - -\begin{example}[Second-order conditioning] - Pairing a new \ac{cs} to an existing \ac{cs}. - - \begin{center} - \includegraphics[width=0.9\linewidth]{./img/second_order_conditioning.png} - \end{center} - - \begin{remark} - The Rescorla-Wagner model is not capable of modeling second-order conditioning while - the temporal difference model is. - \end{remark} -\end{example} - - - \section{Dopamine} \begin{description} @@ -661,288 +195,4 @@ In other words, the value signal produced by the reward (\ac{us}) is transferred \includegraphics[width=0.3\linewidth]{./img/dopaminergic_pathways.png} \caption{Dopaminergic pathways} \end{figure} -\end{description} - - -\subsection{Reward prediction error hypothesis of dopamine} - -There is strong evidence that the dopaminergic system is the major neural mechanism of reward and reinforcement. - -\begin{description} - \item[Response to unexpected rewards] \marginnote{Dopamine response to unexpected rewards} - Dopaminergic neurons exhibit a strong phasic response in the presence of an unexpected reward. - - \begin{@empty} - \small - \begin{example}[Monkey that touches food] - Some food is put in a box with a hole to reach its content. - In the absence of any other stimuli predicting the reward, - a monkey presents a high dopaminergic response when it touches the food. - \begin{center} - \includegraphics[width=0.55\linewidth]{./img/dopamine_monkey1.png} - \end{center} - \end{example} - \end{@empty} - - \item[Reward discrimination] \marginnote{Dopamine reward discrimination} - Dopamine neurons respond differently depending on the actual presence of a reward. - - \begin{@empty} - \small - \begin{example}[Monkey that touches food] - The dopaminergic response of a monkey that touches an apple attached to a wire in a box is different - from the response of only touching the wire. - \begin{center} - \includegraphics[width=0.5\linewidth]{./img/dopamine_monkey2.png} - \end{center} - \end{example} - \end{@empty} - - \item[Magnitude discrimination] \marginnote{Dopamine magnitude discrimination} - Dopamine neurons respond differently depending on the amount of reward received. - - \begin{@empty} - \small - \begin{example}[Monkey that drinks] - By giving a monkey different amounts of fruit juice in a pseudorandom order, - its dopaminergic response is stronger for the highest volume and weaker for the lowest volume. - \begin{center} - \includegraphics[width=0.7\linewidth]{./img/dopamine_monkey3.png} - \end{center} - \end{example} - \end{@empty} - - \begin{@empty} - \small - \begin{example}[Monkey with juice and images] - Using different \acp{cs}, it can be seen that the dopaminergic response differs based on the amount of reward. - \begin{center} - \includegraphics[width=0.5\linewidth]{./img/dopamine_expected.png} - \end{center} - \end{example} - \end{@empty} - - \begin{@empty} - \small - \begin{example}[Monkey with juice and images] - After learning the association between a \ac{cs} and \ac{us} (middle graph), a change in the amount of the reward changes the dopaminergic response. - \begin{center} - \includegraphics[width=0.6\linewidth]{./img/dopamine_expected2.png} - \end{center} - - This behavior also involves the context (i.e. the \ac{cs} image that is shown). - \begin{center} - \includegraphics[width=0.6\linewidth]{./img/dopamine_expected3.png} - \end{center} - \end{example} - \end{@empty} -\end{description} - -\begin{remark} - With the previous observations, it can be concluded that: - \begin{itemize} - \item Dopamine neurons increase their firing rate when the reward is unexpectedly delivered or better than expected. - \item Dopamine neurons decrease their firing rate when the reward is unexpectedly omitted or worse than expected. - \end{itemize} -\end{remark} - -\begin{description} - \item[Transfer to \ac{cs}] \marginnote{Dopamine transfer to \ac{cs}} - \phantom{} \\ - \begin{minipage}{0.65\linewidth} - \begin{itemize}[leftmargin=*] - \item Before training, an unexpected reward (\ac{us}) causes the dopamine neurons to increase firing (positive prediction error). - \item After training, dopamine neurons firing is increased after the \ac{cs} but not following the reward (no prediction error). - \item After training, dopamine neurons firing is increased after the \ac{cs} but is decreased if the reward is omitted (negative prediction error). - \end{itemize} - \end{minipage} - \begin{minipage}{0.35\linewidth} - \centering - \includegraphics[width=\linewidth]{./img/dopamine_transfer_cs.png} - \end{minipage} - - \item[Response to blocking] \marginnote{Dopamine response to blocking} - Dopaminergic response is in line with the blocking effect. - - \begin{@empty} - \small - \begin{example}[Monkey with food and images] - \phantom{}\\ - \begin{minipage}{0.7\linewidth} - A monkey is taught to associate images with food. - A new \ac{cs} alongside an existing \ac{cs} will not be learned. - \end{minipage} - \begin{minipage}{0.28\linewidth} - \centering - \includegraphics[width=\linewidth]{./img/dopamine_blocking.png} - \end{minipage} - \end{example} - \end{@empty} - - \item[Probability encoding] \marginnote{Dopamine probability encoding} - \phantom{} \\ - \begin{minipage}{0.45\linewidth} - The phasic activation of dopamine neurons varies monotonically with the reward probability - \end{minipage} - \begin{minipage}{0.5\linewidth} - \centering - \includegraphics[width=0.85\linewidth]{./img/dopamine_probability.png} - \end{minipage} - - \item[Timing encoding] \marginnote{Dopamine timing encoding} - Dopamine response to unexpectedness also involves timing. - A dopaminergic response occurs when a reward is given earlier or later than expected. - - \begin{@empty} - \small - \begin{example} - After learning that a reward occurs 1 second after the end of the \ac{cs}, - dopamine neurons fire if the timing changes. - \begin{center} - \includegraphics[width=0.5\linewidth]{./img/dopamine_timing.png} - \end{center} - \end{example} - \end{@empty} -\end{description} - -\begin{remark} - Dopamine is therefore a signal for the predicted error and not strictly for the reward. -\end{remark} - - -\subsection{Dopamine in instrumental learning} - -There is evidence that dopamine is involved in learning action-outcome associations (instrumental learning). - -\begin{description} - \item[Striatal activity on unexpected events] \marginnote{Striatal activity on unexpected events} - When an unexpected event happens, there is a change in the activity of the striatum. - There is an increase in response when the feedback is positive and a decrease when negative. - - \begin{@empty} - \small - \begin{example}[Microelectrodes in substantia nigra] - The activity of the substantia nigra of patients with Parkinson's disease is measured during a probabilistic instrumental learning task. - The task consists of repeatedly drawing a card from two decks, followed by positive or negative feedback depending on the deck. - - \begin{figure}[H] - \centering - \begin{subfigure}{0.25\linewidth} - \centering - \includegraphics[width=\linewidth]{./img/instrumental_dopamine_sn1.png} - \end{subfigure} - \begin{subfigure}{0.55\linewidth} - \centering - \includegraphics[width=\linewidth]{./img/instrumental_dopamine_sn2.png} - \end{subfigure} - \end{figure} - - The increase and decrease in striatal activity can be clearly seen when the feedback is unexpected. - \end{example} - \end{@empty} - - \item[Dopamine effect on behavior] \marginnote{Dopamine effect on behavior} - The amount of dopamine changes the learning behavior: - \begin{itemize} - \item Low levels of dopamine cause an impairment in learning from positive feedback. - This happens because positive prediction errors cannot occur. - - \item High levels of dopamine cause an impairment in learning from negative feedback. - This happens because negative prediction errors cannot occur. - \end{itemize} - - \begin{@empty} - \small - \begin{example}[Probabilistic selection task] - This instrumental learning task has two phases: - \begin{descriptionlist} - \item[Learning] - There are three pairs of stimuli (symbols) and, at each trial, a pair is presented to the participant who selects one. - For each pair, a symbol has a higher probability of providing positive feedback while the other is more likely to be negative. - Moreover, the probabilities are different among the three pairs. - - \begin{center} - \includegraphics[width=0.55\linewidth]{./img/instrumental_dopamine_selection1.png} - \end{center} - - Participants are required to learn by trial and error the stimulus in each pair that leads to a positive reward. - Note that learning could be accomplished by: - \begin{itemize} - \item Recognizing the more rewarding stimulus. - \item Recognizing the less rewarding stimulus. - \item Both. - \end{itemize} - - \item[Testing] - Aims to assess if participants learned to select positive feedback or avoid negative feedback. - - The same task as above is repeated but all combinations of the stimuli among the three pairs are possible. - \end{descriptionlist} - - Three groups of participants are considered for this experiment: - \begin{enumerate} - \item Those who took the cabergoline drug (dopamine antagonist). - \item Those who took the haloperidol drug (dopamine agonist). - \item Those who took a drug without effects (placebo). - \end{enumerate} - - \begin{center} - \includegraphics[width=0.55\linewidth]{./img/instrumental_dopamine_selection2.png} - \end{center} - - Results show that: - \begin{enumerate} - \item Cabergoline inhibited positive feedback learning. - \item Haloperidol enhanced positive feedback learning. - \item Placebo learned positive and negative feedback equally. - \end{enumerate} - \end{example} - \end{@empty} - - \begin{@empty} - \small - \begin{example} - It has been observed that: - \begin{itemize} - \item Reward prediction errors are correlated with activity in the left posterior putamen and left ventral striatum. - \item Punishment prediction errors are correlated with activity in the right anterior insula. - \end{itemize} - - \begin{center} - \includegraphics[width=0.5\linewidth]{./img/pe_location.png} - \end{center} - \end{example} - \end{@empty} - - \item[Actor-critic model] \marginnote{Actor-critic model} - Model to correlate Pavlovian and instrumental learning. - It is composed by: - \begin{itemize} - \item The cortex is responsible for representing the current state. - \item The basal ganglia implement two computational models: - \begin{descriptionlist} - \item[Critic] \marginnote{Critic} - Learns stimulus-outcome associations and is active in both Pavlovian and instrumental learning. - It might be implemented in the ventral striatum, the amygdala and the orbitofrontal cortex. - - \item[Actor] \marginnote{Actor} - Learns stimulus-action associations and is only active during instrumental learning. - It might be implemented in the dorsal striatum. - \end{descriptionlist} - \end{itemize} -\end{description} - -\begin{@empty} - \small - \begin{example}[Food and cocaine] - \phantom{} - \begin{itemize} - \item Food-induced dopamine response is modulated by the reward expectations that promote learning until the prediction matches the actual outcome. - \item Cocaine-induced dopamine response causes a continuous increase in the predicted reward that - will eventually surpass all other cues and bias decision-making towards cocaine. - \end{itemize} - \begin{center} - \includegraphics[width=0.7\linewidth]{./img/dopamine_food_cocaine.png} - \end{center} - \end{example} -\end{@empty} \ No newline at end of file +\end{description} \ No newline at end of file