From 329a2a736c029eb427a54ef08ffb1593277de61b Mon Sep 17 00:00:00 2001
From: NotXia <35894453+NotXia@users.noreply.github.com>
Date: Sun, 16 Jun 2024 19:46:58 +0200
Subject: [PATCH] Fix typos <noupdate>

---
 .../module1/sections/_edge_detection.tex      |  6 +++---
 .../module1/sections/_image_acquisition.tex   |  2 +-
 .../sections/_instance_obj_detection.tex      |  9 +++++----
 .../module1/sections/_local_features.tex      | 12 +++++------
 .../module1/sections/_spatial_filtering.tex   | 20 ++++++++++---------
 5 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex
index d6908a9..0b9c2c3 100644
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex
@@ -72,8 +72,8 @@ In a 2D signal (e.g. an image), the gradient allows to determine the magnitude a
                 \end{remark}
         \end{description}
 
-    \item[Discete magnitude approximation] \marginnote{Discete magnitude approximation}
-        The gradient magnitude can be approximated using the approximated partial derivatives:
+    \item[Discrete magnitude approximation] \marginnote{Discrete magnitude approximation}
+        The gradient magnitude can be approximated in different ways by using the approximated partial derivatives:
         \[ 
             \Vert \nabla I \Vert = \sqrt{(\partial_x I)^2 + (\partial_y I)^2} \hspace{1.5em}
             \Vert \nabla I \Vert_+ = \vert \partial_x I \vert + \vert \partial_y I \vert \hspace{1.5em}
@@ -124,7 +124,7 @@ In a 2D signal (e.g. an image), the gradient allows to determine the magnitude a
     In practice, the signal of an image is not always smooth due to noise. 
     Derivatives amplify noise and are therefore unable to recognize edges.
 
-    Smoothing the signal before computing the derivative allows to reduce the noise but also blurs the edges making it more difficult to localize them.
+    Smoothing the signal before computing the derivative allows to reduce the noise but also blurs the edges, making it more difficult to localize them.
 
     A solution is to smooth and differentiate in a single operation by approximating the gradient as a difference of averages.
 \end{remark}
diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex
index b15a0cf..7979258 100644
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex
@@ -253,7 +253,7 @@ Geometric model of a pinhole camera.\\
 \subsection{Ratios and parallelism}
 
 Given a 3D line of length $L$ lying in a plane parallel to the image plane at distance $z$,
-then its length $l$ in the image plane is:
+its length $l$ in the image plane is:
 \[ l = L\frac{f}{z} \]
 
 In all the other cases (i.e. when the line is not parallel to the image plane), 
diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex
index 05bb18b..529d8c7 100644
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex
@@ -64,7 +64,7 @@ Possible similarity/dissimilarity functions are:
         before computing \texttt{NCC}:
         \[ \mu(\tilde{I}_{i,j}) = \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} I(i+m, j+n) \hspace{3em} \mu(T) = \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} T(m, n) \]
         \[ 
-            \texttt{NCC}(i, j) = 
+            \texttt{ZNCC}(i, j) = 
             \frac{ \sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \Big( \big(I(i+m, j+n) - \mu(\tilde{I}_{i,j})\big) \cdot \big(T(m, n) - \mu(T)\big) \Big) }
                 { \sqrt{\sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \big(I(i+m, j+n) - \mu(\tilde{I}_{i,j})\big)^2} \cdot \sqrt{\sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \big(T(m, n) - \mu(T)\big)^2} } 
         \]
@@ -97,7 +97,7 @@ Edge-based template matching that works as follows:
             \nabla \tilde{I}_{i,j}(\tilde{P}_k) = \begin{pmatrix} \partial_x \tilde{I}_{i,j}(\tilde{P}_k) \\ \partial_y \tilde{I}_{i,j}(\tilde{P}_k) \end{pmatrix} \hspace{2em} 
             \tilde{\vec{u}}_k(\tilde{P}_k) = \frac{\nabla \tilde{I}_{i,j}(\tilde{P}_k)}{\Vert \nabla \tilde{I}_{i,j}(\tilde{P}_k) \Vert} 
         \]
-    \item Compute the similarity as the sum of the cosine similarities of each pair of gradients:
+    \item Compute the similarity as the mean of the cosine similarities of each pair of gradients:
         \[ S(i, j) = \frac{1}{n} \sum_{k=1}^{n} \vec{u}_k(P_k) \cdot \tilde{\vec{u}}_k(\tilde{P}_k) = \frac{1}{n} \sum_{k=1}^{n} \cos \theta_k \in [-1, 1] \]
         $S(i, j) = 1$ when the gradients perfectly match. A minimum threshold $S_\text{min}$ is used to determine if there is a match.
 \end{enumerate}
@@ -156,13 +156,14 @@ by means of a projection from the image space to a parameter space.
             For instance, consider two points $p_1$, $p_2$ in the image space and
             their projection in the parameter space.
             If the two lines intersect at the point $(\tilde{m}, \tilde{c})$,
-            then the line parametrized on $\tilde{m}$ and $\tilde{c}$ passes through $p_1$ and $p_2$ in the image space.
+            then the line parametrized on $\tilde{m}$ and $\tilde{c}$ passes through both $p_1$ and $p_2$ in the image space.
             
             \begin{figure}[H]
                 \centering
                 \includegraphics[width=0.4\linewidth]{./img/hough_line_parameter_space.png}
             \end{figure}
 
+            \indenttbox
             \begin{remark}
                 By projecting $n$ points of the image space, there are at most $\frac{n(n-1)}{2}$ intersections in the parameter space.
             \end{remark}
@@ -267,7 +268,7 @@ Hough transform extended to detect an arbitrary shape.
                     \item Compute its gradient direction $\varphi(\vec{x})$ discretized to match the step $\Delta \varphi$ of the R-table.
                     \item For each $\vec{r}_i$ in the corresponding row of the R-table:
                     \begin{enumerate}
-                        \item Compute an estimate of the barycenter as $\vec{y} = \vec{x} - \vec{r}_i$.
+                        \item Compute an estimate of the barycenter as $\vec{y} = \vec{x} + \vec{r}_i$.
                         \item Cast a vote in the accumulator array $A[\vec{y}] \texttt{+=} 1$
                     \end{enumerate}
                 \end{enumerate}
diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex
index 492b6fc..e4877ad 100644
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex
@@ -6,7 +6,7 @@
 
         \begin{example}[Homography]
             Align two images of the same scene to create a larger image.
-            Homography requires at least 4 correspondences. 
+            An homography requires at least 4 correspondences. 
             To find them, it does the following:
             \begin{itemize}
                 \item Independently find salient points in the two images.
@@ -264,8 +264,8 @@ but this is not always able to capture the same features due to the details diff
         \begin{enumerate}
             \item Create a Gaussian scale-space by applying the scale-normalized Laplacian of Gaussian with different values of $\sigma$.
             \item For each pixel, find the characteristic scale and its corresponding Laplacian response across the scale-space (automatic scale selection).
-            \item Filter out the pixels whose response is lower than a threshold and apply NMS.
-            \item The remaining pixels are the centers of the blobs. 
+            \item Filter out the pixels whose response is lower than a threshold and find the peaks.
+            \item The found pixels are the centers of the blobs. 
                 It can be shown that the radius is given by $r = \sigma\sqrt{2}$.
         \end{enumerate}
 \end{description}
@@ -332,7 +332,7 @@ When detecting a peak, there are two cases:
         \]
 
         \begin{theorem}
-            It can be proven that the DoG kernel is a scaled version of the LoG kernel:
+            It can be proved that the DoG kernel is a scaled version of the LoG kernel:
             \[ G(x, y, k\sigma) - G(x, y, \sigma) \approx (k-1)\sigma^2 \nabla^{(2)}G(x, y, \sigma) \]
 
             \begin{remark}
@@ -341,7 +341,7 @@ When detecting a peak, there are two cases:
         \end{theorem}
 
     \item[Extrema detection] \marginnote{DoG extrema}
-        Given three DoG images with scales $\sigma_i$, $\sigma_{i-1}$ and $\sigma_{i+1}$,
+        Given three DoG images with scales $\sigma_{i-1}$, $\sigma_i$ and $\sigma_{i+1}$,
         a pixel $(x, y, \sigma_i)$ is an extrema (i.e. keypoint) iff:
         \begin{itemize}
             \item It is an extrema in a $3 \times 3$ patch centered on it (8 pixels as $(x, y, \sigma_i)$ is excluded).
@@ -407,7 +407,7 @@ After finding the keypoints, a descriptor of a keypoint is computed from the pix
         \[ 
             \begin{split}
                 \vert \nabla L(x, y) \vert &= \sqrt{ \big( L(x+1, y) - L(x-1, y) \big)^2 + \big( L(x, y+1) - L(x, y-1) \big)^2 } \\
-                \theta_L(x, y) &= \tan^{-1}\left( \frac{L(x, y+1) - L(x, y-1)}{L(x+1, y) - L(x-1, y)} \right)
+                \theta_L(x, y) &= \arctan\left( \frac{L(x, y+1) - L(x, y-1)}{L(x+1, y) - L(x-1, y)} \right)
             \end{split}
         \] 
 
diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex
index 69fc819..1261381 100644
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex
@@ -5,7 +5,7 @@
 
 The noise added to a pixel $p$ is defined by $n_k(p)$, 
 where $k$ indicates the time step (i.e. noise changes depending on the moment the image is taken).
-It is assumed that $n_k(p)$ is i.i.d and $n_k(p) \sim \mathcal{N}(0, \sigma)$.
+It is assumed that $n_k(p)$ is i.i.d. and $n_k(p) \sim \mathcal{N}(0, \sigma)$.
 
 The information of a pixel $p$ is therefore defined as:
 \[ I_k(p) = \tilde{I}(p) + n_k(p) \]
@@ -33,7 +33,7 @@ where $\tilde{I}(p)$ is the real information.
         Let $K_p$ be the pixels in a window around $p$ (included):
         \[ 
             \begin{split}
-                O(p) &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} I(p) \\
+                O(p) &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} I(q) \\
                     &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \Big( \tilde{I}(q) + n(q) \Big) \\
                     &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \tilde{I}(q) + \frac{1}{\vert K_p \vert} \sum_{q \in K_p} n(q) \\
                     &\approx \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \tilde{I}(q)
@@ -80,7 +80,7 @@ where $\tilde{I}(p)$ is the real information.
         \end{descriptionlist}
 
     \item[Dirac delta] \marginnote{Dirac delta}
-        The Dirac delta "function" $\delta$ is defined as follows \cite{wiki:dirac,book:sonka}:
+        The Dirac delta ``function" $\delta$ is defined as follows \cite{wiki:dirac,book:sonka}:
         \[ \forall x \neq 0: \delta(x) = 0 \text{, constrained to } \int_{-\infty}^{+\infty} \delta(x) \,\text{d}x = 1 \]
 
         Extended to the 2-dimensional case, the definition is the following:
@@ -93,7 +93,7 @@ where $\tilde{I}(p)$ is the real information.
                 
                 \begin{remark}
                     Exploiting the sifting property, the signal of an image can be expressed through an integral of Dirac deltas 
-                    (i.e. a linear combination) \cite{slides:filters,book:sonka}:
+                    (i.e. a linear combination) \cite{slides:filters, book:sonka}:
                     \[ i(x, y) = \int_{-\infty}^{+\infty}\int_{-\infty}^{+\infty} i(\alpha, \beta) \delta(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta \]
                 \end{remark}
         \end{description}
@@ -173,7 +173,8 @@ where $\tilde{I}(p)$ is the real information.
                 & \text{linearity of $T\{ \cdot \}$} \\
                 %
                 &= \int_{-\infty}^{+\infty}\int_{-\infty}^{+\infty} i(\alpha, \beta) h(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta
-                & \text{\small translation-equivariance of $T\{ \cdot \}$} \\
+                & \text{impulse response} \\
+                % & \text{\small translation-equivariance of $T\{ \cdot \}$} \\
                 %
                 &= i(x, y) * h(x, y) 
                 & \text{definition of convolution} \\
@@ -214,7 +215,8 @@ where $\tilde{I}(p)$ is the real information.
             \begin{align*}
                 h(x, y) * i(x, y) &= \int_{-\infty}^{+\infty} \int_{-\infty}^{+\infty} i(\alpha, \beta)h(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta \\
                     &= \int_{-\infty}^{+\infty} \int_{-\infty}^{+\infty} i(\alpha, \beta)h(\alpha-x, \beta-y) \,\text{d}\alpha\,\text{d}\beta 
-                    & \parbox[b]{0.25\textwidth}{\raggedleft signs in $h$ swappable for Dirac delta} \\
+                    % & \parbox[b]{0.25\textwidth}{\raggedleft signs in $h$ swappable for Dirac delta} 
+                    \\
                     &= h(x, y) \circ i(x, y)
             \end{align*}
         \end{remark}
@@ -336,8 +338,8 @@ where $\tilde{I}(p)$ is the real information.
         \begin{description}
             \item[Sampling]
                 In practice, the kernel is created by sampling from the wanted Gaussian distribution.
-                One can notice that a higher $\sigma$ results in a more spread distribution and therefore a larger kernel is more suited,
-                on the other hand, a smaller $\sigma$ can be represented using a smaller kernel as it is more concentrated around the origin.
+                One can notice that a higher $\sigma$ results in a more spread distribution and therefore a larger kernel is more suited.
+                On the other hand, a smaller $\sigma$ can be represented using a smaller kernel as it is more concentrated around the origin.
 
                 As a rule-of-thumb, given $\sigma$, an ideal kernel is of size $(2\lceil 3\sigma \rceil + 1) \times (2\lceil 3\sigma \rceil + 1)$.
 
@@ -448,7 +450,7 @@ where $\tilde{I}(p)$ is the real information.
                 \begin{split}
                     O(p) &= \sum_{q \in S_p} w(p, q) \cdot \texttt{intensity}(q) \\
                     \text{where }& w(p, q) = \frac{1}{Z(p)} e^{-\frac{\Vert \mathcal{N}_p - \mathcal{N}_q \Vert_2^2}{h^2}} \\
-                                & Z(p) = \sum_{q \in I} e^{\frac{\Vert \mathcal{N}_p - \mathcal{N}_q \Vert_2^2}{h^2}}
+                                & Z(p) = \sum_{v \in I} e^{\frac{\Vert \mathcal{N}_p - \mathcal{N}_v \Vert_2^2}{h^2}}
                 \end{split}
             \]
         \end{minipage}