diff --git a/src/cognition-and-neuroscience/cn.tex b/src/cognition-and-neuroscience/cn.tex index 35c4eee..8616881 100644 --- a/src/cognition-and-neuroscience/cn.tex +++ b/src/cognition-and-neuroscience/cn.tex @@ -10,6 +10,12 @@ \DeclareAcronym{ap}{short=AP, long=action potential, long-plural=s} \DeclareAcronym{cns}{short=CNS, long=central nervous system} \DeclareAcronym{pns}{short=PNS, long=peripheral nervous system} +\DeclareAcronym{rl}{short=RL, long=reinforcement learning} +\DeclareAcronym{nr}{short=NR, long=no response} +\DeclareAcronym{us}{short=US, long=unconditioned stimulus} +\DeclareAcronym{ur}{short=UR, long=unconditioned response} +\DeclareAcronym{cs}{short=CS, long=conditioned stimulus} +\DeclareAcronym{cr}{short=CR, long=conditioned response} \begin{document} @@ -20,5 +26,6 @@ \input{./sections/_introduction.tex} \input{./sections/_nervous_system.tex} + \input{./sections/_rl.tex} \end{document} \ No newline at end of file diff --git a/src/cognition-and-neuroscience/img/learning_systems.png b/src/cognition-and-neuroscience/img/learning_systems.png new file mode 100644 index 0000000..a4b1e46 Binary files /dev/null and b/src/cognition-and-neuroscience/img/learning_systems.png differ diff --git a/src/cognition-and-neuroscience/img/pavlovian_example.png b/src/cognition-and-neuroscience/img/pavlovian_example.png new file mode 100644 index 0000000..8b917aa Binary files /dev/null and b/src/cognition-and-neuroscience/img/pavlovian_example.png differ diff --git a/src/cognition-and-neuroscience/img/pavlovian_extinction.png b/src/cognition-and-neuroscience/img/pavlovian_extinction.png new file mode 100644 index 0000000..d888a3c Binary files /dev/null and b/src/cognition-and-neuroscience/img/pavlovian_extinction.png differ diff --git a/src/cognition-and-neuroscience/sections/_rl.tex b/src/cognition-and-neuroscience/sections/_rl.tex new file mode 100644 index 0000000..b55aae7 --- /dev/null +++ b/src/cognition-and-neuroscience/sections/_rl.tex @@ -0,0 +1,320 @@ +\chapter{Reinforcement learning} + + +\section{Definitions} + +\Acl{rl} (\acs{rl}) methods aim to maximize future reward by mapping the possible states of an environment into actions. + +\begin{description} + \item[Optimal decision making] \marginnote{Optimal decision making} + Aims to maximize rewards and minimize punishments. + + \begin{remark} + This is a difficult task as the outcome might be delayed or depend on a series of actions. + + \begin{descriptionlist} + \item[Credit assignment problem] + Determine how the various factors involved in making a decision contributed to the success or failure of it. + \end{descriptionlist} + \end{remark} +\end{description} + +\begin{remark} + Multiple competing sub-systems contribute to learning and controlling behavior in animals. + + \begin{example}[Freud's theory of the mind structure] + The mind is composed of three structures: + \begin{descriptionlist} + \item[Ego] + Mainly works at the conscious level. + Rational part of the mind that mediates id impulses and superego inhibitions. + + \item[Superego] + Mainly works at the preconscious level. + Includes one's ideals and morals. Strives for perfection. + + \item[Id] + Mainly works at the unconscious level. + Irrational part of the mind based on basic impulses that seek immediate gratification. + \end{descriptionlist} + \end{example} +\end{remark} + + +\subsection{Learning} + +\begin{description} + \item[Learning] \marginnote{Learning} + Lasting change in response or behavior originated from experience. + + \item[Non-associative learning] \marginnote{Non-associative learning} + Change in response or behavior caused by learning the properties of a single stimulus. + It can result in: + \begin{descriptionlist} + \item[Habituation] + A decrease in response to a stimulus that is presented repeatedly. + \begin{example} + The first explosion of a firework causes a strong response but the following ones do not cause much response. + \end{example} + + \item[Sensitization] + An increase in response to a stimulus that is presented repeatedly. + \begin{example} + When the skin itches, one will start scratching it. + \end{example} + \end{descriptionlist} + + \item[Associative learning] \marginnote{Associative learning} + Change in response or behavior caused by learning an association of two or more stimuli/events. + + \begin{descriptionlist} + \item[\Acl{rl}] \marginnote{\Acl{rl}} + Learn an association between a neutral stimulus (something the body considers irrelevant) and + a reinforcer (something the body considers relevant). + + \begin{description} + \item[Primary reinforcer] \marginnote{Primary reinforcer} + Positive or negative stimulus that is biologically relevant and elicits a response. + \begin{example} + Food, pain, social interactions, \dots + \end{example} + + \item[Secondary reinforcer] \marginnote{Secondary reinforcer} + Positive or negative stimulus that became relevant following associative learning. + It elicits a response which usually enables a primary reinforcer. + \end{description} + \end{descriptionlist} +\end{description} + + +\subsection{Learning systems} + +\begin{description} + \item[Pavlovian/classical system] \marginnote{Pavlovian system} + Form of prediction learning. + Learns to predict biologically relevant stimuli to trigger an appropriate response (stimulus-outcome associations). + + \item[Instrumental system] \marginnote{Instrumental system} + Form of control learning to learn action-outcome associations. + It includes: + \begin{descriptionlist} + \item[Habitual system] \marginnote{Habitual system} + Learn to repeat previously successful actions. + \item[Goal-directed system] \marginnote{Goal-directed system} + Evaluate actions based on the prior knowledge of their consequences. + \end{descriptionlist} +\end{description} + +\begin{remark} + Pavlovian and instrumental systems are not independent. + By predicting which situations are positive, one can act to reach them through its actions. + + \begin{figure}[H] + \centering + \includegraphics[width=0.35\linewidth]{./img/learning_systems.png} + \caption{Learning systems relationship} + \end{figure} +\end{remark} + + + +\section{Learning at the neuronal level} + +\begin{description} + \item[Plasticity] + Learning and experience change the connections of a neural system. + + \item[Short-term change] + Functional physiological change that modifies the effectiveness of existing synaptic connections (i.e. amount of neurotransmitters). + Lasts from seconds up to hours. + + \item[Long-term change] + Structural change that leads to anatomical alterations such as pruning or growth of synapses. + Lasts days and can cause further short-term changes. +\end{description} + +\begin{remark} + Neuronal changes follow a "use it or lose it" policy. + Only useful changes will last. +\end{remark} + +\begin{example}[Phantom limb pain] + In amputees, the area of the brain responsible for the missing part of the body is overrun by the neighboring section. + In the case of an arm, the area responsible for the face might "conquer" what once was the area of the arm. +\end{example} + + + +\section{Pavlovian learning} +\marginnote{Pavlovian learning} + +Form of prediction learning that aims to learn stimulus-outcome associations: +\begin{itemize} + \item When a reinforcer is likely to occur. + \item Which stimuli tend to precede a reinforcer. +\end{itemize} +This allows the animal to emit a response in anticipation of a reinforcer. + +Pavlovian learning works as follows:\\ +\begin{minipage}{0.58\linewidth} + \begin{enumerate}[label=\alph*.] + \item A stimulus that has no meaning to the animal will result in \ac{nr}. + \item An \ac{us} (i.e. a reinforcer) generates an \ac{ur}. + \item Learning happens when a reinforcer is paired with a non-relevant stimulus. + \item The learned \ac{cs} generates a \ac{cr}. + \end{enumerate} +\end{minipage} +\begin{minipage}{0.4\linewidth} + \raggedleft + \includegraphics[width=0.9\linewidth]{./img/pavlovian_example.png} +\end{minipage}\\ + +An outcome can be: +\begin{descriptionlist} + \item[Appetitive] Something considered positive. + \item[Aversive] Something considered negative. +\end{descriptionlist} + +The learned \acl{cr} can be: +\begin{descriptionlist} + \item[Behavioral] Associated to the startle response (i.e. reflex in response to a sudden stimulus). + \item[Physiological] Associated to the autonomic system. + \item[Change in subjective response] +\end{descriptionlist} + +\begin{remark} + Pavlovian learning has its foundations in behaviorism: the brain starts as a blank slate and only observable behaviors can be studied. +\end{remark} + + +\subsection{Types of reinforcement} + +There are two types of learning: +\begin{descriptionlist} + \item[Continuous reinforcement] \marginnote{Continuous reinforcement} + The \acl{cs} is reinforced every time the \acl{us} occurs. + \begin{remark} + More effective to teach a new association. + \end{remark} + + \item[Partial reinforcement] \marginnote{Partial reinforcement} + The \acl{cs} is not always reinforced. + \begin{remark} + Learning is slower but the \acl{cr} is more resistant to extinction. + \end{remark} +\end{descriptionlist} + + +\subsection{Learning flexibility} + +\begin{description} + \item[Acquisition] \marginnote{Acquisition} + The probability of occurrence of a \acl{cr} increases if the \acl{cs} is presented with the \acl{us}. + + \item[Extinction] \marginnote{Extinction} + The probability of occurrence of a \acl{cr} decreases if the \acl{cs} is presented alone. +\end{description} + +\begin{remark} + Extinction does not imply forgetting. + After an association between \ac{cs} and \ac{us} is made, + extinction consists of creating a second association with inhibitory effects that overrides the existing association. + + The extinct association can return in the future + (this is more evident when the context is the same as the acquisition phase). +\end{remark} + +\begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{./img/pavlovian_extinction.png} + \caption{Example of acquisition, extinction, and \ac{cr} return} +\end{figure} + +\begin{description} + \item[Generalization] \marginnote{Generalization} + A new stimulus that is similar to a learned \acl{cs} can elicit a \acl{cr}. +\end{description} + + + +\section{Instrumental learning} +\marginnote{Instrumental learning} + +Form of control learning that aims to learn action-outcome associations: +\begin{itemize} + \item When a reinforcer is likely to occur. + \item Which actions bring to those reinforcers. +\end{itemize} +This allows the animal to act in anticipation of a reinforcer. + +Depending on the outcome, the effect varies: +\begin{descriptionlist} + \item[Positive reinforcement] \marginnote{Positive reinforcement} + Delivering an appetitive outcome to an action increases the probability of emitting it. + + \item[Positive punishment] \marginnote{Positive punishment} + Delivering an aversive outcome to an action decreases the probability of emitting it. + + \item[Negative reinforcement] \marginnote{Negative reinforcement} + Omitting an aversive outcome to an action increases the probability of emitting it. + + \item[Negative punishment] \marginnote{Negative punishment} + Omitting an appetitive outcome to an action decreases the probability of emitting it. +\end{descriptionlist} + +\begin{table}[H] + \centering + \begin{tabular}{r|cc} + \toprule + & \textbf{Delivery} & \textbf{Omission} \\ + \midrule + \textbf{Appetitive} & Positive reinforcement (\texttt{+prob}) & Negative punishment (\texttt{-prob}) \\ + \textbf{Aversive} & Positive punishment (\texttt{-prob}) & Negative reinforcement (\texttt{+prob}) \\ + \bottomrule + \end{tabular} + \caption{Summary of the possible effects} +\end{table} + + +\subsection{Types of schedule} + +There are two types of learning: +\begin{descriptionlist} + \item[Continuous schedule] \marginnote{Continuous schedule} + The desired action is followed by the outcome every time. + \begin{remark} + More effective to teach a new association. + \end{remark} + + \item[Partial schedule] \marginnote{Partial schedule} + The desired action is not always followed by the outcome. + \begin{remark} + Learning is slower but the response is more resistant to extinction. + \end{remark} + + There are four types of partial schedules: + \begin{descriptionlist} + \item[Fixed-ratio] + Outcome available after a specific number of responses. + + This results in a high and steady rate of response, with a brief pause after the outcome is delivered. + + + \item[Variable-ratio] + Outcome available after an unpredictable number of responses. + + This results in a high and steady rate of response. + + + \item[Fixed-interval] + Outcome available after a specific interval of time. + + This results in a high rate of response near the end of the interval and a slowdown after the outcome is delivered. + + + \item[Variable-interval] + Outcome available after an unpredictable interval of time. + + This results in a slow and steady rate of response. + \end{descriptionlist} +\end{descriptionlist} \ No newline at end of file