Fix typos <noupdate>

2026-02-04 07:41:43 +01:00 · 2024-06-20 10:48:32 +02:00
parent 892ed09c26
commit 0969f767e9
4 changed files with 17 additions and 15 deletions
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex
@ -5,7 +5,7 @@

 \begin{description}
    \item[Stem layer] \marginnote{Stem layer}
-        First convolutional layer of a CNN that aims to reduce the spatial size of the activations for memory and computational purposes
+        First convolutional layer(s) of a CNN that aims to reduce the spatial size of the activations for memory and computational purposes
        but also to rapidly increase the receptive field.

    \item[Model parallelism] \marginnote{Model parallelism}
@ -299,6 +299,7 @@ The authors constrained the layers to:

            On the other hand, two activations are computed and both need to be stored for backpropagation.

+            \indenttbox
            \begin{example}
                \phantom{}
                \begin{center}
@ -451,7 +452,7 @@ Network that aims to optimize computing resources.
 \begin{description}
    \item[Stem layers]
        Down-sample the image from a shape of 224 to 28.
-        As in ZFNet, multiple layers are used (5) and the largest convolution is of shape $7 \times 7$ and stride $2$.
+        As in ZFNet, multiple layers are used (5) and the largest convolution is of shape $7 \times 7$ with stride $2$.

    \item[Inception module] \marginnote{Inception module}
        Main component of Inception-v1 that computes multiple convolutions on the input.
@ -621,7 +622,7 @@ A larger version of Inception v3 with more complicated stem layers.

 \begin{description}
    \item[Standard residual block] \marginnote{Standard residual block}
-        Block that allows to easily learn the identity function through skip connections.
+        Block that allows to easily learn the identity function through a skip connection.
        The output of a residual block with input $x$ and a series of convolutional layers $F$ is:
        \[ F(x; \matr{\theta}) + x \]

@ -779,7 +780,7 @@ It has the following properties:
        \item The majority of the possible paths have a length of $\sim 30$.
        \item The gradient magnitude is significant at the first layers (i.e. in shorter paths).
    \end{itemize}
-    By multiplying values of two points above, results show that the total gradient magnitude is significant only up until paths of length $\sim 20$.
+    By multiplying the values of the two points above, results show that the total gradient magnitude is significant only up until paths of length $\sim 20$.
    
    \begin{figure}[H]
        \centering
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex
@ -291,7 +291,7 @@ The prediction is obtained as the index of the maximum score.

    \begin{figure}[H]
        \centering
-        \includegraphics[width=0.45\linewidth]{./img/data_representation_linear.png}
+        \includegraphics[width=0.40\linewidth]{./img/data_representation_linear.png}
        \caption{
            \parbox[t]{0.6\linewidth}{
                Example of non-linearly separable data points that become linearly separable in polar coordinates
@ -386,12 +386,12 @@ The prediction is obtained as the index of the maximum score.

            \begin{figure}[H]
                \centering
-                \begin{subfigure}{0.55\linewidth}
+                \begin{subfigure}{0.6\linewidth}
                    \centering
                    \includegraphics[width=\linewidth]{./img/relu_separability_1.png}
                \end{subfigure}

-                \begin{subfigure}{0.55\linewidth}
+                \begin{subfigure}{0.6\linewidth}
                    \centering
                    \includegraphics[width=\linewidth]{./img/relu_separability_2.png}
                \end{subfigure}
@ -442,7 +442,7 @@ Image filtering can be implemented through:

        Given an image of size $H \times W$, a convolution requires:
        \begin{itemize}
-            \item $2$ parameters.
+            \item $2$ parameters (in the case of edge detection).
            \item $3 (H \cdot (W-1)) \approx 3HW$ FLOPs.
        \end{itemize}

@ -469,7 +469,7 @@ Image filtering can be implemented through:

 \begin{description}
    \item[Multi-channel convolution] \marginnote{Multi-channel convolution}
-        On inputs with multiple channels (i.e. 3D inputs), different 2D convolutions are applied across the different channels.
+        On inputs with multiple channels (e.g. RGB images), different 2D convolutions are applied across the different channels.

        Given a $C_\text{in} \times H_\text{in} \times W_\text{in}$ image $I$, a convolution kernel $K$ will have shape $C_\text{in} \times H_K \times W_K$
        and the output activation at each pixel is computed as:
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex
@ -84,7 +84,7 @@ is done in two steps:
 \marginnote{Roto-translation}

 The conversion from the world reference system to the camera reference system
-is done through a roto-translation wrt the optical center.
+is done through a roto-translation w.r.t. the optical center.

 Given: 
 \begin{itemize}
@ -111,7 +111,7 @@ the coordinates $\vec{M}_C$ in CRF corresponding to $\vec{M}_W$ are given by:
 \]

 \begin{remark}
-    The coordinates $\vec{C}_W$ of the optical center $\vec{C}$ are obtained as:
+    The coordinates $\vec{C}_W$ of the optical center $\vec{C} = \nullvec$ are obtained as:
    \[ 
        \nullvec = \matr{R}\vec{C}_W + \vec{t} 
            \iff (\nullvec - \vec{t}) = \matr{R}\vec{C}_W 
@ -378,7 +378,7 @@ where:
        \]
        where $p_1$ and $p_2$ are additional intrinsic parameters.
        \begin{remark}
-            This approximation has empirically been shown to work.
+            This approximation has been empirically shown to work.
        \end{remark}
 \end{itemize}

@ -620,7 +620,7 @@ Therefore, the complete workflow for image formation becomes the following:

    \item[Homographies non-linear refinement]
        The homographies $\matr{H}_i$ estimated at the previous step are obtained using a linear method and need to be refined as, for each image $i$, 
-        the IRF coordinates $\matr{H}_i\vec{w}_j = (\frac{h_{i, 1}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j}, \frac{h_{i, 2}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j})$
+        the IRF coordinates $\matr{H}_i\vec{w}_j = \left( \frac{h_{i, 1}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j}, \frac{h_{i, 2}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j} \right)$
        of the world point $\vec{w}_j$ are still not matching the known IRF coordinates $\vec{m}_{i,j}$ of the $j$-corner in the $i$-image.
        \begin{figure}[H]
            \centering
@ -896,7 +896,7 @@ The computed input coordinates might be continuous. Possible discretization stra

 \subsection{Undistort warping}

-Once a camera has been calibrated, the lens distortion parameters can be used to obtain the undistorted image through backward warping.
+Once a camera has been calibrated using Zhang's method, the lens distortion parameters can be used to obtain the undistorted image through backward warping.
 \[
    \begin{split}
        w_u &= u_\text{undist} + (k_1 r^2 + k_2 r^4)(u_\text{undist} - u_0) \\
@ -1018,6 +1018,7 @@ Undistorted images enjoy some properties:

            Finally, the homography $\matr{A}\matr{R}_\text{pitch}\matr{A}^{-1}$ relates the pitched image to the ideal image.

+            \indenttbox
            \begin{remark}
                The same procedure can be done for the yaw.
            \end{remark}
--- a/src/year1/image-processing-and-computer-vision/module2/sections/_training.tex
+++ b/src/year1/image-processing-and-computer-vision/module2/sections/_training.tex
@ -297,7 +297,7 @@ Given $C$ classes, labels can be smoothed by assuming a small uniform noise $\va
    Dropout and batch normalization show a general pattern for regularization:
    \begin{itemize}
        \item At train time, some randomness is added.
-        \item Ad test time, inference is done by averaging or approximating the output of the network.
+        \item At test time, inference is done by averaging or approximating the output of the network.
    \end{itemize}
 \end{remark}