diff --git a/src/year2/natural-language-processing/img/_instruction_tuning.pdf b/src/year2/natural-language-processing/img/_instruction_tuning.pdf
new file mode 100644
index 0000000..a970ccb
Binary files /dev/null and b/src/year2/natural-language-processing/img/_instruction_tuning.pdf differ
diff --git a/src/year2/natural-language-processing/img/_tuning_comparison1.pdf b/src/year2/natural-language-processing/img/_tuning_comparison1.pdf
new file mode 100644
index 0000000..a2b1d5c
Binary files /dev/null and b/src/year2/natural-language-processing/img/_tuning_comparison1.pdf differ
diff --git a/src/year2/natural-language-processing/img/_tuning_comparison2.pdf b/src/year2/natural-language-processing/img/_tuning_comparison2.pdf
new file mode 100644
index 0000000..c0b9701
Binary files /dev/null and b/src/year2/natural-language-processing/img/_tuning_comparison2.pdf differ
diff --git a/src/year2/natural-language-processing/img/induction_head.png b/src/year2/natural-language-processing/img/induction_head.png
new file mode 100644
index 0000000..71a0142
Binary files /dev/null and b/src/year2/natural-language-processing/img/induction_head.png differ
diff --git a/src/year2/natural-language-processing/img/prefix_tuning.png b/src/year2/natural-language-processing/img/prefix_tuning.png
new file mode 100644
index 0000000..8174649
Binary files /dev/null and b/src/year2/natural-language-processing/img/prefix_tuning.png differ
diff --git a/src/year2/natural-language-processing/img/rlhf.png b/src/year2/natural-language-processing/img/rlhf.png
new file mode 100644
index 0000000..d8e6359
Binary files /dev/null and b/src/year2/natural-language-processing/img/rlhf.png differ
diff --git a/src/year2/natural-language-processing/nlp.tex b/src/year2/natural-language-processing/nlp.tex
index 45bba82..ad82d12 100644
--- a/src/year2/natural-language-processing/nlp.tex
+++ b/src/year2/natural-language-processing/nlp.tex
@@ -16,5 +16,6 @@
     \include{./sections/_attention.tex}
     \include{./sections/_llm.tex}
     \include{./sections/_model_efficiency.tex}
+    \include{./sections/_llm_usage.tex}
 
 \end{document}
\ No newline at end of file
diff --git a/src/year2/natural-language-processing/sections/_llm_usage.tex b/src/year2/natural-language-processing/sections/_llm_usage.tex
new file mode 100644
index 0000000..4e3bb5b
--- /dev/null
+++ b/src/year2/natural-language-processing/sections/_llm_usage.tex
@@ -0,0 +1,77 @@
+\chapter{Language model alignment and applications}
+
+
+\section{Model alignment}
+
+\begin{remark}
+    Off-the-shelf pre-trained models tend to only be good at word completion. They are most likely unable to understand instructions and might generate harmful content.
+\end{remark}
+
+
+\subsection{Instruction tuning}
+
+\begin{description}
+    \item[Instruction tuning] \marginnote{Instruction tuning}
+        Fine-tune a model on a dataset containing various tasks expressed in natural language in the form $(\text{description}, \text{examples}, \text{solution})$, all possibly formatted using multiple templates.
+
+        \begin{figure}[H]
+            \centering
+            \includegraphics[width=0.7\linewidth]{./img/_instruction_tuning.pdf}
+            \caption{Example of templates for entailment detection}
+        \end{figure}
+
+        \begin{remark}
+            If performed correctly, after performing instruction tuning on a model, it is able to also solve tasks that were not present in the tuning dataset.
+        \end{remark}
+
+        \begin{figure}[H]
+            \centering
+            \begin{subfigure}[c]{0.34\linewidth}
+                \centering
+                \includegraphics[width=\linewidth]{./img/_tuning_comparison1.pdf}
+            \end{subfigure}
+            \hfill
+            \begin{subfigure}[c]{0.6\linewidth}
+                \centering
+                \includegraphics[width=\linewidth]{./img/_tuning_comparison2.pdf}
+            \end{subfigure}
+            \caption{Comparison of tuning approaches}
+        \end{figure}
+\end{description}
+
+
+\subsection{Preference alignment}
+
+\begin{description}
+    \item[Preference alignment] \marginnote{Preference alignment}
+        Align the output of a model with human values.
+
+    \item[Reinforcement learning with human feedback (RLHF)] \marginnote{Reinforcement learning with human feedback (RLHF)}
+        Align a language model using a policy-gradient reinforcement learning algorithm. The problem can be formulated as follows:
+        \begin{itemize}
+            \item The policy to learn represents the aligned model (i.e., $\texttt{prompt} \mapsto \texttt{answer}$ model),
+            \item Prompts are the states,
+            \item Answers are the actions.
+        \end{itemize}
+        RLHF works as follows:
+        \begin{enumerate}
+            \item Start from a pre-trained language model that already works well.
+
+            \item Train a reward model $r_\theta$ from a human-annotated dataset that maps text sequences into rewards. The architecture is usually based on transformers.
+
+            \item Fine-tune the language model (i.e., train the policy) using an RL algorithm (e.g., PPO) and the learned reward model. 
+
+            Given a prompt $x$ and an answer $y$, the reward $r$ used for the RL update is computed as:
+            \[ r = r_\theta(y \mid x) - \lambda_\text{KL} D_\text{KL}(\pi_{\text{PPO}}(y \mid x) \Vert \pi_{\text{base}}(y \mid x)) \]
+            where:
+            \begin{itemize}
+                \item $r_\theta(y \mid x)$ is the reward provided by the reward model.
+                \item $- \lambda_\text{KL} D_\text{KL}(\pi_{\text{PPO}}(y \mid x) \Vert \pi_{\text{base}}(y \mid x))$ is a penalty based on the Kullback-Leibler divergence to prevent the aligned model $\pi_\text{PPO}$ from moving too away from the original model $\pi_\text{base}$ (i.e., prevent the loss of language capabilities).
+            \end{itemize}
+        \end{enumerate}
+
+        \begin{figure}[H]
+            \centering
+            \includegraphics[width=0.6\linewidth]{./img/rlhf.png}
+        \end{figure}
+\end{description}
\ No newline at end of file
diff --git a/src/year2/natural-language-processing/sections/_model_efficiency.tex b/src/year2/natural-language-processing/sections/_model_efficiency.tex
index fed668b..12caed9 100644
--- a/src/year2/natural-language-processing/sections/_model_efficiency.tex
+++ b/src/year2/natural-language-processing/sections/_model_efficiency.tex
@@ -81,5 +81,72 @@
                 \]
                 In other words, each token in $V_\text{dom}$ is encoded as the average of embeddings of the tokens that compose it in the starting embedding model (if the token appear in both vocabularies, the embedding is the same).
         \end{description}
+\end{description}
 
+
+
+\section{In-context learning}
+
+\begin{description}
+    \item[Prompting] \marginnote{Prompting}
+        Pass a prompt to the language model to condition generation.
+
+        More formally, a prompt is defined by means of a prompting function $f_\text{prompt}(\cdot)$ that formats an input text $x$. $f_\text{prompt}$ typically has a slot for the input and a slot for the answer (e.g., the class in case of classification). The prompt is then fed to the language model that searches the highest scoring word $\hat{z}$ to fill the answer as follows:
+        \[ \hat{z} = \arg\max \prob{ f_\text{fill}(f_\text{prompt}(x), z); \theta } \]
+        Where $f_\text{fill}(f_\text{prompt}(x), z)$ inserts $z$ in the prompt. In other word, we are looking for the word that makes the model least perplexed.
+
+        \begin{example}
+            A prompt for sentiment analysis of movie reviews might be:
+            \begin{center}
+                \texttt{[X] Overall, it was a [Z] movie.}
+            \end{center}
+            Where \texttt{[X]} is the placeholder for the review and \texttt{[Z]} is for the class.
+        \end{example}
+
+        \begin{remark}
+            The prompt does not necessarily need to be text (i.e., discrete/hard prompts). Continuous/soft prompts (i.e., embeddings) can also be used to condition generation.
+        \end{remark}
+
+
+    \item[Zero-shot learning] \marginnote{Zero-shot learning}
+        Solve a task by providing a language model the description of the problem in natural language.
+
+    \item[One-shot learning] \marginnote{One-shot learning}
+        Solve a task by providing a language model the description of the problem in natural language and a single demonstration (i.e., an example).
+
+    \item[Few-shot learning] \marginnote{Few-shot learning}
+        Solve a task by providing a language model the description of the problem in natural language and a few demonstrations.
+
+        \begin{remark}
+            Empirical results show that not too many examples are required. Also, too many examples might reduce performance.
+        \end{remark}
+\end{description}
+
+\begin{remark}
+    Some studies show that an explanation for in-context learning is that causal attention has the same effect of gradient updates (i.e., the left part of the prompt influences the right part).
+
+    Another possible explanation is based on the concept of induction heads which are attention heads that specialize in predicting repeated sequences (i.e., in-context learning is seen as the capability of imitating past data). Ablation studies show that by identifying and removing induction heads, in-context learning performance of a model drastically drops.
+
+    \begin{figure}[H]
+        \centering
+        \includegraphics[width=0.9\linewidth]{./img/induction_head.png}
+        \caption{Example of induction head}
+    \end{figure}
+\end{remark}
+
+\begin{description}
+    \item[Prefix-tuning] \marginnote{Prefix-tuning}
+        Soft prompting technique that learns some prefix embeddings for a specific task to add to the prompt while keeping the rest of the model frozen.
+
+        \begin{figure}[H]
+            \centering
+            \includegraphics[width=0.65\linewidth]{./img/prefix_tuning.png}
+        \end{figure}
+
+    \item[Chain-of-thought prompting] \marginnote{Chain-of-thought prompting}
+        Provide in the prompt examples of reasoning to make the model provide the output step-by-step.
+
+        \begin{remark}
+            Empirical results show that the best prompt for chain-of-thought is to add to the prompt \texttt{think step by step}.
+        \end{remark}
 \end{description}
\ No newline at end of file