From 329a2a736c029eb427a54ef08ffb1593277de61b Mon Sep 17 00:00:00 2001 From: NotXia <35894453+NotXia@users.noreply.github.com> Date: Sun, 16 Jun 2024 19:46:58 +0200 Subject: [PATCH] Fix typos --- .../module1/sections/_edge_detection.tex | 6 +++--- .../module1/sections/_image_acquisition.tex | 2 +- .../sections/_instance_obj_detection.tex | 9 +++++---- .../module1/sections/_local_features.tex | 12 +++++------ .../module1/sections/_spatial_filtering.tex | 20 ++++++++++--------- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex index d6908a9..0b9c2c3 100644 --- a/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex +++ b/src/year1/image-processing-and-computer-vision/module1/sections/_edge_detection.tex @@ -72,8 +72,8 @@ In a 2D signal (e.g. an image), the gradient allows to determine the magnitude a \end{remark} \end{description} - \item[Discete magnitude approximation] \marginnote{Discete magnitude approximation} - The gradient magnitude can be approximated using the approximated partial derivatives: + \item[Discrete magnitude approximation] \marginnote{Discrete magnitude approximation} + The gradient magnitude can be approximated in different ways by using the approximated partial derivatives: \[ \Vert \nabla I \Vert = \sqrt{(\partial_x I)^2 + (\partial_y I)^2} \hspace{1.5em} \Vert \nabla I \Vert_+ = \vert \partial_x I \vert + \vert \partial_y I \vert \hspace{1.5em} @@ -124,7 +124,7 @@ In a 2D signal (e.g. an image), the gradient allows to determine the magnitude a In practice, the signal of an image is not always smooth due to noise. Derivatives amplify noise and are therefore unable to recognize edges. - Smoothing the signal before computing the derivative allows to reduce the noise but also blurs the edges making it more difficult to localize them. + Smoothing the signal before computing the derivative allows to reduce the noise but also blurs the edges, making it more difficult to localize them. A solution is to smooth and differentiate in a single operation by approximating the gradient as a difference of averages. \end{remark} diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex index b15a0cf..7979258 100644 --- a/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex +++ b/src/year1/image-processing-and-computer-vision/module1/sections/_image_acquisition.tex @@ -253,7 +253,7 @@ Geometric model of a pinhole camera.\\ \subsection{Ratios and parallelism} Given a 3D line of length $L$ lying in a plane parallel to the image plane at distance $z$, -then its length $l$ in the image plane is: +its length $l$ in the image plane is: \[ l = L\frac{f}{z} \] In all the other cases (i.e. when the line is not parallel to the image plane), diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex index 05bb18b..529d8c7 100644 --- a/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex +++ b/src/year1/image-processing-and-computer-vision/module1/sections/_instance_obj_detection.tex @@ -64,7 +64,7 @@ Possible similarity/dissimilarity functions are: before computing \texttt{NCC}: \[ \mu(\tilde{I}_{i,j}) = \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} I(i+m, j+n) \hspace{3em} \mu(T) = \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} T(m, n) \] \[ - \texttt{NCC}(i, j) = + \texttt{ZNCC}(i, j) = \frac{ \sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \Big( \big(I(i+m, j+n) - \mu(\tilde{I}_{i,j})\big) \cdot \big(T(m, n) - \mu(T)\big) \Big) } { \sqrt{\sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \big(I(i+m, j+n) - \mu(\tilde{I}_{i,j})\big)^2} \cdot \sqrt{\sum\limits_{m=0}\limits^{M-1} \sum\limits_{n=0}\limits^{N-1} \big(T(m, n) - \mu(T)\big)^2} } \] @@ -97,7 +97,7 @@ Edge-based template matching that works as follows: \nabla \tilde{I}_{i,j}(\tilde{P}_k) = \begin{pmatrix} \partial_x \tilde{I}_{i,j}(\tilde{P}_k) \\ \partial_y \tilde{I}_{i,j}(\tilde{P}_k) \end{pmatrix} \hspace{2em} \tilde{\vec{u}}_k(\tilde{P}_k) = \frac{\nabla \tilde{I}_{i,j}(\tilde{P}_k)}{\Vert \nabla \tilde{I}_{i,j}(\tilde{P}_k) \Vert} \] - \item Compute the similarity as the sum of the cosine similarities of each pair of gradients: + \item Compute the similarity as the mean of the cosine similarities of each pair of gradients: \[ S(i, j) = \frac{1}{n} \sum_{k=1}^{n} \vec{u}_k(P_k) \cdot \tilde{\vec{u}}_k(\tilde{P}_k) = \frac{1}{n} \sum_{k=1}^{n} \cos \theta_k \in [-1, 1] \] $S(i, j) = 1$ when the gradients perfectly match. A minimum threshold $S_\text{min}$ is used to determine if there is a match. \end{enumerate} @@ -156,13 +156,14 @@ by means of a projection from the image space to a parameter space. For instance, consider two points $p_1$, $p_2$ in the image space and their projection in the parameter space. If the two lines intersect at the point $(\tilde{m}, \tilde{c})$, - then the line parametrized on $\tilde{m}$ and $\tilde{c}$ passes through $p_1$ and $p_2$ in the image space. + then the line parametrized on $\tilde{m}$ and $\tilde{c}$ passes through both $p_1$ and $p_2$ in the image space. \begin{figure}[H] \centering \includegraphics[width=0.4\linewidth]{./img/hough_line_parameter_space.png} \end{figure} + \indenttbox \begin{remark} By projecting $n$ points of the image space, there are at most $\frac{n(n-1)}{2}$ intersections in the parameter space. \end{remark} @@ -267,7 +268,7 @@ Hough transform extended to detect an arbitrary shape. \item Compute its gradient direction $\varphi(\vec{x})$ discretized to match the step $\Delta \varphi$ of the R-table. \item For each $\vec{r}_i$ in the corresponding row of the R-table: \begin{enumerate} - \item Compute an estimate of the barycenter as $\vec{y} = \vec{x} - \vec{r}_i$. + \item Compute an estimate of the barycenter as $\vec{y} = \vec{x} + \vec{r}_i$. \item Cast a vote in the accumulator array $A[\vec{y}] \texttt{+=} 1$ \end{enumerate} \end{enumerate} diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex index 492b6fc..e4877ad 100644 --- a/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex +++ b/src/year1/image-processing-and-computer-vision/module1/sections/_local_features.tex @@ -6,7 +6,7 @@ \begin{example}[Homography] Align two images of the same scene to create a larger image. - Homography requires at least 4 correspondences. + An homography requires at least 4 correspondences. To find them, it does the following: \begin{itemize} \item Independently find salient points in the two images. @@ -264,8 +264,8 @@ but this is not always able to capture the same features due to the details diff \begin{enumerate} \item Create a Gaussian scale-space by applying the scale-normalized Laplacian of Gaussian with different values of $\sigma$. \item For each pixel, find the characteristic scale and its corresponding Laplacian response across the scale-space (automatic scale selection). - \item Filter out the pixels whose response is lower than a threshold and apply NMS. - \item The remaining pixels are the centers of the blobs. + \item Filter out the pixels whose response is lower than a threshold and find the peaks. + \item The found pixels are the centers of the blobs. It can be shown that the radius is given by $r = \sigma\sqrt{2}$. \end{enumerate} \end{description} @@ -332,7 +332,7 @@ When detecting a peak, there are two cases: \] \begin{theorem} - It can be proven that the DoG kernel is a scaled version of the LoG kernel: + It can be proved that the DoG kernel is a scaled version of the LoG kernel: \[ G(x, y, k\sigma) - G(x, y, \sigma) \approx (k-1)\sigma^2 \nabla^{(2)}G(x, y, \sigma) \] \begin{remark} @@ -341,7 +341,7 @@ When detecting a peak, there are two cases: \end{theorem} \item[Extrema detection] \marginnote{DoG extrema} - Given three DoG images with scales $\sigma_i$, $\sigma_{i-1}$ and $\sigma_{i+1}$, + Given three DoG images with scales $\sigma_{i-1}$, $\sigma_i$ and $\sigma_{i+1}$, a pixel $(x, y, \sigma_i)$ is an extrema (i.e. keypoint) iff: \begin{itemize} \item It is an extrema in a $3 \times 3$ patch centered on it (8 pixels as $(x, y, \sigma_i)$ is excluded). @@ -407,7 +407,7 @@ After finding the keypoints, a descriptor of a keypoint is computed from the pix \[ \begin{split} \vert \nabla L(x, y) \vert &= \sqrt{ \big( L(x+1, y) - L(x-1, y) \big)^2 + \big( L(x, y+1) - L(x, y-1) \big)^2 } \\ - \theta_L(x, y) &= \tan^{-1}\left( \frac{L(x, y+1) - L(x, y-1)}{L(x+1, y) - L(x-1, y)} \right) + \theta_L(x, y) &= \arctan\left( \frac{L(x, y+1) - L(x, y-1)}{L(x+1, y) - L(x-1, y)} \right) \end{split} \] diff --git a/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex b/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex index 69fc819..1261381 100644 --- a/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex +++ b/src/year1/image-processing-and-computer-vision/module1/sections/_spatial_filtering.tex @@ -5,7 +5,7 @@ The noise added to a pixel $p$ is defined by $n_k(p)$, where $k$ indicates the time step (i.e. noise changes depending on the moment the image is taken). -It is assumed that $n_k(p)$ is i.i.d and $n_k(p) \sim \mathcal{N}(0, \sigma)$. +It is assumed that $n_k(p)$ is i.i.d. and $n_k(p) \sim \mathcal{N}(0, \sigma)$. The information of a pixel $p$ is therefore defined as: \[ I_k(p) = \tilde{I}(p) + n_k(p) \] @@ -33,7 +33,7 @@ where $\tilde{I}(p)$ is the real information. Let $K_p$ be the pixels in a window around $p$ (included): \[ \begin{split} - O(p) &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} I(p) \\ + O(p) &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} I(q) \\ &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \Big( \tilde{I}(q) + n(q) \Big) \\ &= \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \tilde{I}(q) + \frac{1}{\vert K_p \vert} \sum_{q \in K_p} n(q) \\ &\approx \frac{1}{\vert K_p \vert} \sum_{q \in K_p} \tilde{I}(q) @@ -80,7 +80,7 @@ where $\tilde{I}(p)$ is the real information. \end{descriptionlist} \item[Dirac delta] \marginnote{Dirac delta} - The Dirac delta "function" $\delta$ is defined as follows \cite{wiki:dirac,book:sonka}: + The Dirac delta ``function" $\delta$ is defined as follows \cite{wiki:dirac,book:sonka}: \[ \forall x \neq 0: \delta(x) = 0 \text{, constrained to } \int_{-\infty}^{+\infty} \delta(x) \,\text{d}x = 1 \] Extended to the 2-dimensional case, the definition is the following: @@ -93,7 +93,7 @@ where $\tilde{I}(p)$ is the real information. \begin{remark} Exploiting the sifting property, the signal of an image can be expressed through an integral of Dirac deltas - (i.e. a linear combination) \cite{slides:filters,book:sonka}: + (i.e. a linear combination) \cite{slides:filters, book:sonka}: \[ i(x, y) = \int_{-\infty}^{+\infty}\int_{-\infty}^{+\infty} i(\alpha, \beta) \delta(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta \] \end{remark} \end{description} @@ -173,7 +173,8 @@ where $\tilde{I}(p)$ is the real information. & \text{linearity of $T\{ \cdot \}$} \\ % &= \int_{-\infty}^{+\infty}\int_{-\infty}^{+\infty} i(\alpha, \beta) h(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta - & \text{\small translation-equivariance of $T\{ \cdot \}$} \\ + & \text{impulse response} \\ + % & \text{\small translation-equivariance of $T\{ \cdot \}$} \\ % &= i(x, y) * h(x, y) & \text{definition of convolution} \\ @@ -214,7 +215,8 @@ where $\tilde{I}(p)$ is the real information. \begin{align*} h(x, y) * i(x, y) &= \int_{-\infty}^{+\infty} \int_{-\infty}^{+\infty} i(\alpha, \beta)h(x-\alpha, y-\beta) \,\text{d}\alpha\,\text{d}\beta \\ &= \int_{-\infty}^{+\infty} \int_{-\infty}^{+\infty} i(\alpha, \beta)h(\alpha-x, \beta-y) \,\text{d}\alpha\,\text{d}\beta - & \parbox[b]{0.25\textwidth}{\raggedleft signs in $h$ swappable for Dirac delta} \\ + % & \parbox[b]{0.25\textwidth}{\raggedleft signs in $h$ swappable for Dirac delta} + \\ &= h(x, y) \circ i(x, y) \end{align*} \end{remark} @@ -336,8 +338,8 @@ where $\tilde{I}(p)$ is the real information. \begin{description} \item[Sampling] In practice, the kernel is created by sampling from the wanted Gaussian distribution. - One can notice that a higher $\sigma$ results in a more spread distribution and therefore a larger kernel is more suited, - on the other hand, a smaller $\sigma$ can be represented using a smaller kernel as it is more concentrated around the origin. + One can notice that a higher $\sigma$ results in a more spread distribution and therefore a larger kernel is more suited. + On the other hand, a smaller $\sigma$ can be represented using a smaller kernel as it is more concentrated around the origin. As a rule-of-thumb, given $\sigma$, an ideal kernel is of size $(2\lceil 3\sigma \rceil + 1) \times (2\lceil 3\sigma \rceil + 1)$. @@ -448,7 +450,7 @@ where $\tilde{I}(p)$ is the real information. \begin{split} O(p) &= \sum_{q \in S_p} w(p, q) \cdot \texttt{intensity}(q) \\ \text{where }& w(p, q) = \frac{1}{Z(p)} e^{-\frac{\Vert \mathcal{N}_p - \mathcal{N}_q \Vert_2^2}{h^2}} \\ - & Z(p) = \sum_{q \in I} e^{\frac{\Vert \mathcal{N}_p - \mathcal{N}_q \Vert_2^2}{h^2}} + & Z(p) = \sum_{v \in I} e^{\frac{\Vert \mathcal{N}_p - \mathcal{N}_v \Vert_2^2}{h^2}} \end{split} \] \end{minipage}