Fix typos <noupdate>

2026-02-04 07:41:43 +01:00 · 2024-06-16 19:46:58 +02:00
parent 27b1947218
commit 329a2a736c
5 changed files with 26 additions and 23 deletions
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex
@ -72,8 +72,8 @@ In a 2D signal (e.g. an image), the gradient allows to determine the magnitude a
                \end{remark}
        \end{description}

-    \item[Discete magnitude approximation] \marginnote{Discete magnitude approximation}
-        The gradient magnitude can be approximated using the approximated partial derivatives:
+    \item[Discrete magnitude approximation] \marginnote{Discrete magnitude approximation}
+        The gradient magnitude can be approximated in different ways by using the approximated partial derivatives:
        \[ 
            \Vert \nabla I \Vert = \sqrt{(\partial_x I)^2 + (\partial_y I)^2} \hspace{1.5em}
            \Vert \nabla I \Vert_+ = \vert \partial_x I \vert + \vert \partial_y I \vert \hspace{1.5em}
@ -124,7 +124,7 @@ In a 2D signal (e.g. an image), the gradient allows to determine the magnitude a
    In practice, the signal of an image is not always smooth due to noise. 
    Derivatives amplify noise and are therefore unable to recognize edges.

-    Smoothing the signal before computing the derivative allows to reduce the noise but also blurs the edges making it more difficult to localize them.
+    Smoothing the signal before computing the derivative allows to reduce the noise but also blurs the edges, making it more difficult to localize them.

    A solution is to smooth and differentiate in a single operation by approximating the gradient as a difference of averages.
 \end{remark}
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex
@ -253,7 +253,7 @@ Geometric model of a pinhole camera.\\
 \subsection{Ratios and parallelism}

 Given a 3D line of length $L$ lying in a plane parallel to the image plane at distance $z$,
-then its length $l$ in the image plane is:
+its length $l$ in the image plane is:
 \[ l = L\frac{f}{z} \]

 In all the other cases (i.e. when the line is not parallel to the image plane), 
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex
@ -64,7 +64,7 @@ Possible similarity/dissimilarity functions are:
        before computing \texttt{NCC}:
        \[ \mu(\tilde{I}_{i,j}) = \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} I(i+m, j+n) \hspace{3em} \mu(T) = \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} T(m, n) \]
        \[ 
-            \texttt{NCC}(i, j) = 
+            \texttt{ZNCC}(i, j) = 
            \frac{ \sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \Big( \big(I(i+m, j+n) - \mu(\tilde{I}_{i,j})\big) \cdot \big(T(m, n) - \mu(T)\big) \Big) }
                { \sqrt{\sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \big(I(i+m, j+n) - \mu(\tilde{I}_{i,j})\big)^2} \cdot \sqrt{\sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \big(T(m, n) - \mu(T)\big)^2} } 
        \]
@ -97,7 +97,7 @@ Edge-based template matching that works as follows:
            \nabla \tilde{I}_{i,j}(\tilde{P}_k) = \begin{pmatrix} \partial_x \tilde{I}_{i,j}(\tilde{P}_k) \\ \partial_y \tilde{I}_{i,j}(\tilde{P}_k) \end{pmatrix} \hspace{2em} 
            \tilde{\vec{u}}_k(\tilde{P}_k) = \frac{\nabla \tilde{I}_{i,j}(\tilde{P}_k)}{\Vert \nabla \tilde{I}_{i,j}(\tilde{P}_k) \Vert} 
        \]
-    \item Compute the similarity as the sum of the cosine similarities of each pair of gradients:
+    \item Compute the similarity as the mean of the cosine similarities of each pair of gradients:
        \[ S(i, j) = \frac{1}{n} \sum_{k=1}^{n} \vec{u}_k(P_k) \cdot \tilde{\vec{u}}_k(\tilde{P}_k) = \frac{1}{n} \sum_{k=1}^{n} \cos \theta_k \in [-1, 1] \]
        $S(i, j) = 1$ when the gradients perfectly match. A minimum threshold $S_\text{min}$ is used to determine if there is a match.
 \end{enumerate}
@ -156,13 +156,14 @@ by means of a projection from the image space to a parameter space.
            For instance, consider two points $p_1$, $p_2$ in the image space and
            their projection in the parameter space.
            If the two lines intersect at the point $(\tilde{m}, \tilde{c})$,
-            then the line parametrized on $\tilde{m}$ and $\tilde{c}$ passes through $p_1$ and $p_2$ in the image space.
+            then the line parametrized on $\tilde{m}$ and $\tilde{c}$ passes through both $p_1$ and $p_2$ in the image space.
            
            \begin{figure}[H]
                \centering
                \includegraphics[width=0.4\linewidth]{./img/hough_line_parameter_space.png}
            \end{figure}

+            \indenttbox
            \begin{remark}
                By projecting $n$ points of the image space, there are at most $\frac{n(n-1)}{2}$ intersections in the parameter space.
            \end{remark}
@ -267,7 +268,7 @@ Hough transform extended to detect an arbitrary shape.
                    \item Compute its gradient direction $\varphi(\vec{x})$ discretized to match the step $\Delta \varphi$ of the R-table.
                    \item For each $\vec{r}_i$ in the corresponding row of the R-table:
                    \begin{enumerate}
-                        \item Compute an estimate of the barycenter as $\vec{y} = \vec{x} - \vec{r}_i$.
+                        \item Compute an estimate of the barycenter as $\vec{y} = \vec{x} + \vec{r}_i$.
                        \item Cast a vote in the accumulator array $A[\vec{y}] \texttt{+=} 1$
                    \end{enumerate}
                \end{enumerate}
--- a/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex
@ -6,7 +6,7 @@

        \begin{example}[Homography]
            Align two images of the same scene to create a larger image.
-            Homography requires at least 4 correspondences. 
+            An homography requires at least 4 correspondences. 
            To find them, it does the following:
            \begin{itemize}
                \item Independently find salient points in the two images.
@ -264,8 +264,8 @@ but this is not always able to capture the same features due to the details diff
        \begin{enumerate}
            \item Create a Gaussian scale-space by applying the scale-normalized Laplacian of Gaussian with different values of $\sigma$.
            \item For each pixel, find the characteristic scale and its corresponding Laplacian response across the scale-space (automatic scale selection).
-            \item Filter out the pixels whose response is lower than a threshold and apply NMS.
-            \item The remaining pixels are the centers of the blobs. 
+            \item Filter out the pixels whose response is lower than a threshold and find the peaks.
+            \item The found pixels are the centers of the blobs. 
                It can be shown that the radius is given by $r = \sigma\sqrt{2}$.
        \end{enumerate}
 \end{description}
@ -332,7 +332,7 @@ When detecting a peak, there are two cases:
        \]

        \begin{theorem}
-            It can be proven that the DoG kernel is a scaled version of the LoG kernel:
+            It can be proved that the DoG kernel is a scaled version of the LoG kernel:
            \[ G(x, y, k\sigma) - G(x, y, \sigma) \approx (k-1)\sigma^2 \nabla^{(2)}G(x, y, \sigma) \]

            \begin{remark}
@ -341,7 +341,7 @@ When detecting a peak, there are two cases:
        \end{theorem}

    \item[Extrema detection] \marginnote{DoG extrema}
-        Given three DoG images with scales $\sigma_i$, $\sigma_{i-1}$ and $\sigma_{i+1}$,
+        Given three DoG images with scales $\sigma_{i-1}$, $\sigma_i$ and $\sigma_{i+1}$,
        a pixel $(x, y, \sigma_i)$ is an extrema (i.e. keypoint) iff:
        \begin{itemize}
            \item It is an extrema in a $3 \times 3$ patch centered on it (8 pixels as $(x, y, \sigma_i)$ is excluded).
@ -407,7 +407,7 @@ After finding the keypoints, a descriptor of a keypoint is computed from the pix
        \[ 
            \begin{split}
                \vert \nabla L(x, y) \vert &= \sqrt{ \big( L(x+1, y) - L(x-1, y) \big)^2 + \big( L(x, y+1) - L(x, y-1) \big)^2 } \\
-                \theta_L(x, y) &= \tan^{-1}\left( \frac{L(x, y+1) - L(x, y-1)}{L(x+1, y) - L(x-1, y)} \right)
+                \theta_L(x, y) &= \arctan\left( \frac{L(x, y+1) - L(x, y-1)}{L(x+1, y) - L(x-1, y)} \right)
            \end{split}
        \] 

--- a/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex
+++ b/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex
@ -5,7 +5,7 @@

 The noise added to a pixel $p$ is defined by $n_k(p)$, 
 where $k$ indicates the time step (i.e. noise changes depending on the moment the image is taken).
-It is assumed that $n_k(p)$ is i.i.d and $n_k(p) \sim \mathcal{N}(0, \sigma)$.
+It is assumed that $n_k(p)$ is i.i.d. and $n_k(p) \sim \mathcal{N}(0, \sigma)$.

 The information of a pixel $p$ is therefore defined as:
 \[ I_k(p) = \tilde{I}(p) + n_k(p) \]
@ -33,7 +33,7 @@ where $\tilde{I}(p)$ is the real information.
        Let $K_p$ be the pixels in a window around $p$ (included):
        \[ 
            \begin{split}
-                O(p) &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} I(p) \\
+                O(p) &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} I(q) \\
                    &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \Big( \tilde{I}(q) + n(q) \Big) \\
                    &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \tilde{I}(q) + \frac{1}{\vert K_p \vert} \sum_{q \in K_p} n(q) \\
                    &\approx \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \tilde{I}(q)
@ -80,7 +80,7 @@ where $\tilde{I}(p)$ is the real information.
        \end{descriptionlist}

    \item[Dirac delta] \marginnote{Dirac delta}
-        The Dirac delta "function" $\delta$ is defined as follows \cite{wiki:dirac,book:sonka}:
+        The Dirac delta ``function" $\delta$ is defined as follows \cite{wiki:dirac,book:sonka}:
        \[ \forall x \neq 0: \delta(x) = 0 \text{, constrained to } \int_{-\infty}^{+\infty} \delta(x) \,\text{d}x = 1 \]

        Extended to the 2-dimensional case, the definition is the following:
@ -93,7 +93,7 @@ where $\tilde{I}(p)$ is the real information.
                
                \begin{remark}
                    Exploiting the sifting property, the signal of an image can be expressed through an integral of Dirac deltas 
-                    (i.e. a linear combination) \cite{slides:filters,book:sonka}:
+                    (i.e. a linear combination) \cite{slides:filters, book:sonka}:
                    \[ i(x, y) = \int_{-\infty}^{+\infty}\int_{-\infty}^{+\infty} i(\alpha, \beta) \delta(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta \]
                \end{remark}
        \end{description}
@ -173,7 +173,8 @@ where $\tilde{I}(p)$ is the real information.
                & \text{linearity of $T\{ \cdot \}$} \\
                %
                &= \int_{-\infty}^{+\infty}\int_{-\infty}^{+\infty} i(\alpha, \beta) h(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta
-                & \text{\small translation-equivariance of $T\{ \cdot \}$} \\
+                & \text{impulse response} \\
+                % & \text{\small translation-equivariance of $T\{ \cdot \}$} \\
                %
                &= i(x, y) * h(x, y) 
                & \text{definition of convolution} \\
@ -214,7 +215,8 @@ where $\tilde{I}(p)$ is the real information.
            \begin{align*}
                h(x, y) * i(x, y) &= \int_{-\infty}^{+\infty} \int_{-\infty}^{+\infty} i(\alpha, \beta)h(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta \\
                    &= \int_{-\infty}^{+\infty} \int_{-\infty}^{+\infty} i(\alpha, \beta)h(\alpha-x, \beta-y) \,\text{d}\alpha\,\text{d}\beta 
-                    & \parbox[b]{0.25\textwidth}{\raggedleft signs in $h$ swappable for Dirac delta} \\
+                    % & \parbox[b]{0.25\textwidth}{\raggedleft signs in $h$ swappable for Dirac delta} 
+                    \\
                    &= h(x, y) \circ i(x, y)
            \end{align*}
        \end{remark}
@ -336,8 +338,8 @@ where $\tilde{I}(p)$ is the real information.
        \begin{description}
            \item[Sampling]
                In practice, the kernel is created by sampling from the wanted Gaussian distribution.
-                One can notice that a higher $\sigma$ results in a more spread distribution and therefore a larger kernel is more suited,
-                on the other hand, a smaller $\sigma$ can be represented using a smaller kernel as it is more concentrated around the origin.
+                One can notice that a higher $\sigma$ results in a more spread distribution and therefore a larger kernel is more suited.
+                On the other hand, a smaller $\sigma$ can be represented using a smaller kernel as it is more concentrated around the origin.

                As a rule-of-thumb, given $\sigma$, an ideal kernel is of size $(2\lceil 3\sigma \rceil + 1) \times (2\lceil 3\sigma \rceil + 1)$.

@ -448,7 +450,7 @@ where $\tilde{I}(p)$ is the real information.
                \begin{split}
                    O(p) &= \sum_{q \in S_p} w(p, q) \cdot \texttt{intensity}(q) \\
                    \text{where }& w(p, q) = \frac{1}{Z(p)} e^{-\frac{\Vert \mathcal{N}_p - \mathcal{N}_q \Vert_2^2}{h^2}} \\
-                                & Z(p) = \sum_{q \in I} e^{\frac{\Vert \mathcal{N}_p - \mathcal{N}_q \Vert_2^2}{h^2}}
+                                & Z(p) = \sum_{v \in I} e^{\frac{\Vert \mathcal{N}_p - \mathcal{N}_v \Vert_2^2}{h^2}}
                \end{split}
            \]
        \end{minipage}