diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
index 044b549..beea0f5 100644
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
@@ -599,7 +599,7 @@ Different modules are used depending on the activation shape.
 \subsection{Inception-v4}
 \marginnote{Inception-v4}
 
-A larger version of Inception v3 with more complicated stem layers.
+A larger version of Inception-v3 with more complicated stem layers.
 
 
 
@@ -660,7 +660,7 @@ It has the following properties:
     \item A stage is composed of residual blocks.
     \item A residual block is composed of two $3 \times 3$ convolutions followed by batch normalization.
     \item The first residual block of each stage halves the spatial dimension and doubles the number of channels (there is no pooling).
-    \item Stem layers are less aggressive than GoogLeNet (\texttt{conv + pool}. Input reduced to $56 \times 56$).
+    \item Stem layers are less aggressive than GoogLeNet (\texttt{conv + pool}. Input reduced to a shape of $56 \times 56$).
     \item Global average pooling is used instead of flattening.
 \end{itemize}
 
@@ -696,7 +696,7 @@ It has the following properties:
 \end{description}
 
 \begin{remark}
-    ResNet improves the results of a deeper layer but beyond a certain depth, the gain is negligible.
+    ResNet improves the results of a deeper network but, beyond a certain depth, the gain is negligible.
     \begin{figure}[H]
         \centering
         \includegraphics[width=0.65\linewidth]{./img/resnet_results.png}
diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
index c73cffc..0a828e2 100644
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
@@ -621,7 +621,7 @@ Therefore, the complete workflow for image formation becomes the following:
     \item[Homographies non-linear refinement]
         The homographies $\matr{H}_i$ estimated at the previous step are obtained using a linear method and need to be refined as, for each image $i$, 
         the IRF coordinates $\matr{H}_i\vec{w}_j = \left( \frac{h_{i, 1}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j}, \frac{h_{i, 2}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j} \right)$
-        of the world point $\vec{w}_j$ are still not matching the known IRF coordinates $\vec{m}_{i,j}$ of the $j$-corner in the $i$-image.
+        of the world point $\vec{w}_j$ are still not matching the known IRF coordinates $\vec{m}_{i,j}$ of the $j$-th corner in the $i$-th image.
         \begin{figure}[H]
             \centering
             \includegraphics[width=0.7\linewidth]{./img/_homography_refinement.pdf}
@@ -989,7 +989,7 @@ Undistorted images enjoy some properties:
 
         \begin{example}[Compensate pitch or yaw]
             In autonomous driving, cameras should be ideally mounted with the optical axis parallel to the road plane and aligned with the direction of motion.
-            It is usually very difficult to obtain perfect alignment physically 
+            It is usually very difficult to physically obtain perfect alignment 
             but a calibrated camera can help to compensate pitch (i.e. rotation around the $x$-axis)
             and yaw (i.e. rotation around the $y$-axis) by estimating the vanishing point of the lane lines.