Fix typos <noupdate>

2026-02-04 07:41:43 +01:00 · 2025-01-21 20:16:46 +01:00
parent c9716b60ac
commit 62507994b7
1 changed files with 3 additions and 3 deletions
--- a/src/year2/machine-learning-for-computer-vision/sections/_optimizers.tex
+++ b/src/year2/machine-learning-for-computer-vision/sections/_optimizers.tex
@ -262,11 +262,11 @@ Methods that also consider the second-order derivatives when determining the ste

        Finally, the update is defined as:
        \[
-            \vec{\theta}^{(t+1)} = \vec{\theta}^{(t)} - \frac{\texttt{lr}}{\sqrt{s^{(t)}_{\text{debiased}}} + \varepsilon} \odot g^{(t)}_{\text{debiased}}
+            \vec{\theta}^{(t+1)} = \vec{\theta}^{(t)} - \frac{\texttt{lr}}{\sqrt{\vec{s}^{(t)}_{\text{debiased}}} + \varepsilon} \odot \vec{g}^{(t)}_{\text{debiased}}
        \]

        \begin{remark}
-            It can be shown that $\frac{g^{(t)}_{\text{debiased}}}{\sqrt{s^{(t)}_{\text{debiased}}}}$ has a bounded domain, making it more controlled than RMSProp.
+            It can be shown that $\frac{\vec{g}^{(t)}_{\text{debiased}}}{\sqrt{\vec{s}^{(t)}_{\text{debiased}}}}$ has a bounded domain, making it more controlled than RMSProp.
        \end{remark}

        \begin{figure}[H]
@ -311,7 +311,7 @@ Methods that also consider the second-order derivatives when determining the ste
    \item[Adam with weight decay (AdamW)] \marginnote{Adam with weight decay (AdamW)} 
        Modification on the gradient update of Adam to include weight decay:
        \[ 
-            \vec{\theta}^{(t+1)} = \vec{\theta}^{(t)} - \frac{\texttt{lr}}{\sqrt{s^{(t)}_{\text{debiased}}} + \varepsilon} \odot g^{(t)}_{\text{debiased}} - \lambda\vec{\theta}^{(t)}
+            \vec{\theta}^{(t+1)} = \vec{\theta}^{(t)} - \frac{\texttt{lr}}{\sqrt{\vec{s}^{(t)}_{\text{debiased}}} + \varepsilon} \odot \vec{g}^{(t)}_{\text{debiased}} - \lambda\vec{\theta}^{(t)}
        \]

        \begin{remark}