diff --git a/src/year2/natural-language-processing/sections/_llm.tex b/src/year2/natural-language-processing/sections/_llm.tex index ee8619a..5d3ff39 100644 --- a/src/year2/natural-language-processing/sections/_llm.tex +++ b/src/year2/natural-language-processing/sections/_llm.tex @@ -100,7 +100,7 @@ \item Higher temperatures (i.e., $\tau > 1$) allow for considering low-probability words. \item Lower temperatures (i.e., $\tau \in (0, 1]$) focus on high-probability words. \begin{remark} - A temperature of $\tau = 0$ corresponds to greedy decoding. + For $\tau \rightarrow 0$, generation becomes closer to greedy decoding. \end{remark} \end{itemize} @@ -175,7 +175,7 @@ Add a new trainable head on top of the model. \item[Parameter-efficient fine-tuning (PEFT)] \marginnote{Parameter-efficient fine-tuning (PEFT)} - Continue training a selected subset of parameters (e.g., LoRA \Cref{sec:lora}). + Continue training a selected subset of parameters (e.g., LoRA in \Cref{sec:lora}). \end{description} \item[Supervised fine-tuning] \marginnote{Supervised fine-tuning} diff --git a/src/year2/natural-language-processing/sections/_llm_usage.tex b/src/year2/natural-language-processing/sections/_llm_usage.tex index e500433..6f83e5b 100644 --- a/src/year2/natural-language-processing/sections/_llm_usage.tex +++ b/src/year2/natural-language-processing/sections/_llm_usage.tex @@ -12,7 +12,7 @@ \begin{description} \item[Instruction tuning] \marginnote{Instruction tuning} - Fine-tune a model on a dataset containing various tasks expressed in natural language in the form $(\text{description}, \text{examples}, \text{solution})$, all possibly formatted using multiple templates. + Fine-tune a model on a dataset containing various tasks expressed in natural language in the form $(\text{description}, \text{examples}, \text{solution})$, all usually formatted using multiple templates. \begin{figure}[H] \centering @@ -61,7 +61,7 @@ \item Fine-tune the language model (i.e., train the policy) using an RL algorithm (e.g., PPO) and the learned reward model. - Given a prompt $x$ and an answer $y$, the reward $r$ used for the RL update is computed as: + Given a prompt $x$ and an answer $y$, the reward $r$ used for RL update is computed as: \[ r = r_\theta(y \mid x) - \lambda_\text{KL} D_\text{KL}(\pi_{\text{PPO}}(y \mid x) \Vert \pi_{\text{base}}(y \mid x)) \] where: \begin{itemize} diff --git a/src/year2/natural-language-processing/sections/_mlm.tex b/src/year2/natural-language-processing/sections/_mlm.tex deleted file mode 100644 index e69de29..0000000 diff --git a/src/year2/natural-language-processing/sections/_rag.tex b/src/year2/natural-language-processing/sections/_rag.tex index 07a7a6d..dcd37b2 100644 --- a/src/year2/natural-language-processing/sections/_rag.tex +++ b/src/year2/natural-language-processing/sections/_rag.tex @@ -159,7 +159,7 @@ \begin{description} \item[Mean average precision] \marginnote{Mean average precision} Average AP over different queries $Q$: - \[ \texttt{MAP}_t = \frac{1}{|Q|} \sum_{q \in Q} \texttt{AP}_t(q) \] + \[ \texttt{mAP}_t = \frac{1}{|Q|} \sum_{q \in Q} \texttt{AP}_t(q) \] \end{description} \end{description} @@ -267,10 +267,10 @@ \item[Mean reciprocal rank] \marginnote{Mean reciprocal rank} Given a system that provides a ranked list of answers to a question $q_i$, the reciprocal rank for $q_i$ is: - \[ \frac{1}{\texttt{rank}_i} \] + \[ \texttt{RR} = \frac{1}{\texttt{rank}_i} \] where $\texttt{rank}_i$ is the index of the first correct answer in the provided ranked list. Mean reciprocal rank is computed over a set of queries $Q$: - \[ \texttt{MRR} = \frac{1}{|Q|} \sum_{i=1}^{|Q|} \frac{1}{\texttt{rank}_i} \] + \[ \texttt{mRR} = \frac{1}{|Q|} \sum_{i=1}^{|Q|} \frac{1}{\texttt{rank}_i} \] \end{description} @@ -312,7 +312,7 @@ \end{description} \begin{remark} - The current trend is to evaluate RAG performances with another LLM. + The current trend is to evaluate RAG performance with another LLM. \end{remark} \begin{remark}