diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
index 4878bb4..044b549 100644
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
@@ -683,7 +683,7 @@ It has the following properties:
         Variant of residual blocks that uses more layers with approximately the same number of parameters and FLOPs of the standard residual block.
         Instead of using two $3 \times 3$ convolutions, bottleneck residual network has the following structure:
         \begin{itemize}
-            \item $1 \times 1$ convolution to compress the channels of the input by an order of $4$ (and the spatial dimension by $2$ if it is the first block of a stage, as in the normal ResNet).
+            \item $1 \times 1$ convolution to compress the channels of the input by an order of $4$ (and the spatial dimension by $2$ if it is the first block of a stage, as in normal ResNet).
             \item $3 \times 3$ convolution.
             \item $1 \times 1$ convolution to match the shape of the skip connection.
         \end{itemize}
diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex
index 4685c1d..04c6631 100644
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex
@@ -271,7 +271,7 @@ where the $\texttt{logits} \in \mathbb{R}^{c}$ vector contains a score for each
 The prediction is obtained as the index of the maximum score.
 
 \begin{remark}
-    Predicting directly the integer encoded classes is not ideal as it would give a (probably) inexistent semantic ordering
+    Directly predicting the integer encoded classes is not ideal as it would give a (probably) inexistent semantic ordering
     (e.g. if $2$ encodes bird and $3$ encodes cat, $2.5$ should not mean half bird and half cat).
 \end{remark}
 
diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
index a1a87ba..c73cffc 100644
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
@@ -7,7 +7,7 @@
     \item[Camera reference frame (CRF)] \marginnote{Camera reference frame (CRF)}
         Coordinate system $(X_C, Y_C, Z_C)$ that characterizes a camera.
 
-    \item[Image reference frame (IRF)] \marginnote{Image reference frame}
+    \item[Image reference frame (IRF)] \marginnote{Image reference frame (IRF)}
         Coordinate system $(U, V)$ of the image.
         They are obtained as a perspective projection of CRF coordinates as:
         \[