From 6b9393ac888787da07ed1207c2f01827226d7588 Mon Sep 17 00:00:00 2001
From: NotXia <35894453+NotXia@users.noreply.github.com>
Date: Sat, 16 Nov 2024 09:52:36 +0100
Subject: [PATCH] Fix scaling laws

---
 src/year2/natural-language-processing/sections/_llm.tex | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/year2/natural-language-processing/sections/_llm.tex b/src/year2/natural-language-processing/sections/_llm.tex
index adc57b8..b562352 100644
--- a/src/year2/natural-language-processing/sections/_llm.tex
+++ b/src/year2/natural-language-processing/sections/_llm.tex
@@ -143,12 +143,13 @@
         \end{itemize}
         By keeping two of the three factors constant, the loss $\mathcal{L}$ of an LLM can be estimated as a function of the third variable:
         \[ 
-            \mathcal{L}(N) = \left( \frac{N_c}{N} \right)^{\alpha N} 
+            \mathcal{L}(N) = \left( \frac{N_c}{N} \right)^{\alpha_N} 
             \qquad
-            \mathcal{L}(D) = \left( \frac{D_c}{D} \right)^{\alpha D} 
+            \mathcal{L}(D) = \left( \frac{D_c}{D} \right)^{\alpha_D} 
             \qquad
-            \mathcal{L}(C) = \left( \frac{C_c}{C} \right)^{\alpha C} 
+            \mathcal{L}(C) = \left( \frac{C_c}{C} \right)^{\alpha_C} 
         \]
+        where $N_c$, $D_c$, $C_c$, $\alpha_N$, $\alpha_D$, and $\alpha_C$ are constants determined empirically based on the model architecture.
 \end{description}