mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-14 18:51:52 +01:00
Fix typos <noupdate>
This commit is contained in:
@ -100,7 +100,7 @@
|
||||
\item Higher temperatures (i.e., $\tau > 1$) allow for considering low-probability words.
|
||||
\item Lower temperatures (i.e., $\tau \in (0, 1]$) focus on high-probability words.
|
||||
\begin{remark}
|
||||
A temperature of $\tau = 0$ corresponds to greedy decoding.
|
||||
For $\tau \rightarrow 0$, generation becomes closer to greedy decoding.
|
||||
\end{remark}
|
||||
\end{itemize}
|
||||
|
||||
@ -175,7 +175,7 @@
|
||||
Add a new trainable head on top of the model.
|
||||
|
||||
\item[Parameter-efficient fine-tuning (PEFT)] \marginnote{Parameter-efficient fine-tuning (PEFT)}
|
||||
Continue training a selected subset of parameters (e.g., LoRA \Cref{sec:lora}).
|
||||
Continue training a selected subset of parameters (e.g., LoRA in \Cref{sec:lora}).
|
||||
\end{description}
|
||||
|
||||
\item[Supervised fine-tuning] \marginnote{Supervised fine-tuning}
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
|
||||
\begin{description}
|
||||
\item[Instruction tuning] \marginnote{Instruction tuning}
|
||||
Fine-tune a model on a dataset containing various tasks expressed in natural language in the form $(\text{description}, \text{examples}, \text{solution})$, all possibly formatted using multiple templates.
|
||||
Fine-tune a model on a dataset containing various tasks expressed in natural language in the form $(\text{description}, \text{examples}, \text{solution})$, all usually formatted using multiple templates.
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
@ -61,7 +61,7 @@
|
||||
|
||||
\item Fine-tune the language model (i.e., train the policy) using an RL algorithm (e.g., PPO) and the learned reward model.
|
||||
|
||||
Given a prompt $x$ and an answer $y$, the reward $r$ used for the RL update is computed as:
|
||||
Given a prompt $x$ and an answer $y$, the reward $r$ used for RL update is computed as:
|
||||
\[ r = r_\theta(y \mid x) - \lambda_\text{KL} D_\text{KL}(\pi_{\text{PPO}}(y \mid x) \Vert \pi_{\text{base}}(y \mid x)) \]
|
||||
where:
|
||||
\begin{itemize}
|
||||
|
||||
@ -159,7 +159,7 @@
|
||||
\begin{description}
|
||||
\item[Mean average precision] \marginnote{Mean average precision}
|
||||
Average AP over different queries $Q$:
|
||||
\[ \texttt{MAP}_t = \frac{1}{|Q|} \sum_{q \in Q} \texttt{AP}_t(q) \]
|
||||
\[ \texttt{mAP}_t = \frac{1}{|Q|} \sum_{q \in Q} \texttt{AP}_t(q) \]
|
||||
\end{description}
|
||||
\end{description}
|
||||
|
||||
@ -267,10 +267,10 @@
|
||||
|
||||
\item[Mean reciprocal rank] \marginnote{Mean reciprocal rank}
|
||||
Given a system that provides a ranked list of answers to a question $q_i$, the reciprocal rank for $q_i$ is:
|
||||
\[ \frac{1}{\texttt{rank}_i} \]
|
||||
\[ \texttt{RR} = \frac{1}{\texttt{rank}_i} \]
|
||||
where $\texttt{rank}_i$ is the index of the first correct answer in the provided ranked list.
|
||||
Mean reciprocal rank is computed over a set of queries $Q$:
|
||||
\[ \texttt{MRR} = \frac{1}{|Q|} \sum_{i=1}^{|Q|} \frac{1}{\texttt{rank}_i} \]
|
||||
\[ \texttt{mRR} = \frac{1}{|Q|} \sum_{i=1}^{|Q|} \frac{1}{\texttt{rank}_i} \]
|
||||
|
||||
\end{description}
|
||||
|
||||
@ -312,7 +312,7 @@
|
||||
\end{description}
|
||||
|
||||
\begin{remark}
|
||||
The current trend is to evaluate RAG performances with another LLM.
|
||||
The current trend is to evaluate RAG performance with another LLM.
|
||||
\end{remark}
|
||||
|
||||
\begin{remark}
|
||||
|
||||
Reference in New Issue
Block a user