From 0969f767e9410f2959dca79b2f1e055a1afc3614 Mon Sep 17 00:00:00 2001 From: NotXia <35894453+NotXia@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:48:32 +0200 Subject: [PATCH] Fix typos --- .../module2/sections/_architectures.tex | 9 +++++---- .../module2/sections/_classification.tex | 10 +++++----- .../module2/sections/_image_formation.tex | 11 ++++++----- .../module2/sections/_training.tex | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex index 2d56ca0..2e1cd01 100644 --- a/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex +++ b/src/year1/image-processing-and-computer-vision/module2/sections/_architectures.tex @@ -5,7 +5,7 @@ \begin{description} \item[Stem layer] \marginnote{Stem layer} - First convolutional layer of a CNN that aims to reduce the spatial size of the activations for memory and computational purposes + First convolutional layer(s) of a CNN that aims to reduce the spatial size of the activations for memory and computational purposes but also to rapidly increase the receptive field. \item[Model parallelism] \marginnote{Model parallelism} @@ -299,6 +299,7 @@ The authors constrained the layers to: On the other hand, two activations are computed and both need to be stored for backpropagation. + \indenttbox \begin{example} \phantom{} \begin{center} @@ -451,7 +452,7 @@ Network that aims to optimize computing resources. \begin{description} \item[Stem layers] Down-sample the image from a shape of 224 to 28. - As in ZFNet, multiple layers are used (5) and the largest convolution is of shape $7 \times 7$ and stride $2$. + As in ZFNet, multiple layers are used (5) and the largest convolution is of shape $7 \times 7$ with stride $2$. \item[Inception module] \marginnote{Inception module} Main component of Inception-v1 that computes multiple convolutions on the input. @@ -621,7 +622,7 @@ A larger version of Inception v3 with more complicated stem layers. \begin{description} \item[Standard residual block] \marginnote{Standard residual block} - Block that allows to easily learn the identity function through skip connections. + Block that allows to easily learn the identity function through a skip connection. The output of a residual block with input $x$ and a series of convolutional layers $F$ is: \[ F(x; \matr{\theta}) + x \] @@ -779,7 +780,7 @@ It has the following properties: \item The majority of the possible paths have a length of $\sim 30$. \item The gradient magnitude is significant at the first layers (i.e. in shorter paths). \end{itemize} - By multiplying values of two points above, results show that the total gradient magnitude is significant only up until paths of length $\sim 20$. + By multiplying the values of the two points above, results show that the total gradient magnitude is significant only up until paths of length $\sim 20$. \begin{figure}[H] \centering diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex index e219add..4685c1d 100644 --- a/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex +++ b/src/year1/image-processing-and-computer-vision/module2/sections/_classification.tex @@ -291,7 +291,7 @@ The prediction is obtained as the index of the maximum score. \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/data_representation_linear.png} + \includegraphics[width=0.40\linewidth]{./img/data_representation_linear.png} \caption{ \parbox[t]{0.6\linewidth}{ Example of non-linearly separable data points that become linearly separable in polar coordinates @@ -386,12 +386,12 @@ The prediction is obtained as the index of the maximum score. \begin{figure}[H] \centering - \begin{subfigure}{0.55\linewidth} + \begin{subfigure}{0.6\linewidth} \centering \includegraphics[width=\linewidth]{./img/relu_separability_1.png} \end{subfigure} - \begin{subfigure}{0.55\linewidth} + \begin{subfigure}{0.6\linewidth} \centering \includegraphics[width=\linewidth]{./img/relu_separability_2.png} \end{subfigure} @@ -442,7 +442,7 @@ Image filtering can be implemented through: Given an image of size $H \times W$, a convolution requires: \begin{itemize} - \item $2$ parameters. + \item $2$ parameters (in the case of edge detection). \item $3 (H \cdot (W-1)) \approx 3HW$ FLOPs. \end{itemize} @@ -469,7 +469,7 @@ Image filtering can be implemented through: \begin{description} \item[Multi-channel convolution] \marginnote{Multi-channel convolution} - On inputs with multiple channels (i.e. 3D inputs), different 2D convolutions are applied across the different channels. + On inputs with multiple channels (e.g. RGB images), different 2D convolutions are applied across the different channels. Given a $C_\text{in} \times H_\text{in} \times W_\text{in}$ image $I$, a convolution kernel $K$ will have shape $C_\text{in} \times H_K \times W_K$ and the output activation at each pixel is computed as: diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex index 964afed..a1a87ba 100644 --- a/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex +++ b/src/year1/image-processing-and-computer-vision/module2/sections/_image_formation.tex @@ -84,7 +84,7 @@ is done in two steps: \marginnote{Roto-translation} The conversion from the world reference system to the camera reference system -is done through a roto-translation wrt the optical center. +is done through a roto-translation w.r.t. the optical center. Given: \begin{itemize} @@ -111,7 +111,7 @@ the coordinates $\vec{M}_C$ in CRF corresponding to $\vec{M}_W$ are given by: \] \begin{remark} - The coordinates $\vec{C}_W$ of the optical center $\vec{C}$ are obtained as: + The coordinates $\vec{C}_W$ of the optical center $\vec{C} = \nullvec$ are obtained as: \[ \nullvec = \matr{R}\vec{C}_W + \vec{t} \iff (\nullvec - \vec{t}) = \matr{R}\vec{C}_W @@ -378,7 +378,7 @@ where: \] where $p_1$ and $p_2$ are additional intrinsic parameters. \begin{remark} - This approximation has empirically been shown to work. + This approximation has been empirically shown to work. \end{remark} \end{itemize} @@ -620,7 +620,7 @@ Therefore, the complete workflow for image formation becomes the following: \item[Homographies non-linear refinement] The homographies $\matr{H}_i$ estimated at the previous step are obtained using a linear method and need to be refined as, for each image $i$, - the IRF coordinates $\matr{H}_i\vec{w}_j = (\frac{h_{i, 1}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j}, \frac{h_{i, 2}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j})$ + the IRF coordinates $\matr{H}_i\vec{w}_j = \left( \frac{h_{i, 1}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j}, \frac{h_{i, 2}^T \tilde{\vec{w}}_j}{h_{i, 3}^T \tilde{\vec{w}}_j} \right)$ of the world point $\vec{w}_j$ are still not matching the known IRF coordinates $\vec{m}_{i,j}$ of the $j$-corner in the $i$-image. \begin{figure}[H] \centering @@ -896,7 +896,7 @@ The computed input coordinates might be continuous. Possible discretization stra \subsection{Undistort warping} -Once a camera has been calibrated, the lens distortion parameters can be used to obtain the undistorted image through backward warping. +Once a camera has been calibrated using Zhang's method, the lens distortion parameters can be used to obtain the undistorted image through backward warping. \[ \begin{split} w_u &= u_\text{undist} + (k_1 r^2 + k_2 r^4)(u_\text{undist} - u_0) \\ @@ -1018,6 +1018,7 @@ Undistorted images enjoy some properties: Finally, the homography $\matr{A}\matr{R}_\text{pitch}\matr{A}^{-1}$ relates the pitched image to the ideal image. + \indenttbox \begin{remark} The same procedure can be done for the yaw. \end{remark} diff --git a/src/year1/image-processing-and-computer-vision/module2/sections/_training.tex b/src/year1/image-processing-and-computer-vision/module2/sections/_training.tex index 3a4cc8f..529d023 100644 --- a/src/year1/image-processing-and-computer-vision/module2/sections/_training.tex +++ b/src/year1/image-processing-and-computer-vision/module2/sections/_training.tex @@ -297,7 +297,7 @@ Given $C$ classes, labels can be smoothed by assuming a small uniform noise $\va Dropout and batch normalization show a general pattern for regularization: \begin{itemize} \item At train time, some randomness is added. - \item Ad test time, inference is done by averaging or approximating the output of the network. + \item At test time, inference is done by averaging or approximating the output of the network. \end{itemize} \end{remark}