mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-15 19:12:22 +01:00
90 lines
3.8 KiB
TeX
90 lines
3.8 KiB
TeX
\chapter{Object detection}
|
|
|
|
|
|
\begin{description}
|
|
\item[Object detection] \marginnote{Object detection}
|
|
Given an RGB $W \times H$ image, determine a set of objects $\{ o_1, \dots, o_n \}$ contained in it. Each object $o_j$ is described by:
|
|
\begin{itemize}
|
|
\item A category $c_j \in \{ 1, \dots, C \}$ as in image classification.
|
|
\item A bounding box $BB_j = [ x_j, y_j, w_j, h_j ]$ where $x_j, w_j \in [0, W-1]$ and $y_j, h_j \in [0, H-1]$. ($x_j$, $y_j$) is the center and ($w_j$, $h_j$) is the size of the box.
|
|
\item A confidence score $\rho_j$.
|
|
\end{itemize}
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.45\linewidth]{./img/_object_detection_example.pdf}
|
|
\end{figure}
|
|
|
|
\begin{remark}
|
|
Differently from classification, a model has to:
|
|
\begin{itemize}
|
|
\item Be able to output a variable number of results.
|
|
\item Output both categorical and spatial information.
|
|
\item Work on high resolution input images.
|
|
\end{itemize}
|
|
\end{remark}
|
|
\end{description}
|
|
|
|
|
|
|
|
\section{Metrics}
|
|
|
|
\begin{description}
|
|
\item[Intersection over union (\texttt{IoU})]
|
|
\marginnote{Intersection over union (\texttt{IoU})}
|
|
Measures the amount of overlap between two boxes computed as the ratio of the area of intersection over the area of union:
|
|
\[ \texttt{IoU}(BB_i, BB_j) = \frac{\vert BB_i \cap BB_j \vert}{\vert BB_i \vert + \vert BB_j \vert - \vert BB_i \cup BB_j \vert} \]
|
|
|
|
\begin{description}
|
|
\item[True/false positive criteria]
|
|
Given a threshold $\rho_\texttt{IoU}$, a detection $BB_i$ is a true positive (\texttt{TP}) w.r.t. a ground truth $\hat{BB_j}$ if it is classified with the same class and:
|
|
\[ \texttt{IoU}(BB_i, \hat{BB_j}) > \rho_\texttt{IoU} \]
|
|
|
|
\begin{remark}
|
|
Confidence can also be considered when determining a match through a threshold $\rho_\text{min}$.
|
|
\end{remark}
|
|
\end{description}
|
|
|
|
\item[Recall]
|
|
Measures the number of ground truth objects that have been found:
|
|
\[ \texttt{recall} = \frac{\vert \texttt{TP} \vert}{\vert \text{ground truth boxes} \vert} \]
|
|
|
|
\item[Precision]
|
|
Measures the number of correct detections among all the predictions:
|
|
\[ \texttt{precision} = \frac{\vert \texttt{TP} \vert}{\vert \text{model detections} \vert} \]
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.7\linewidth]{./img/obj_det_recall_precision.png}
|
|
\caption{
|
|
Recall and precision in different scenarios
|
|
}
|
|
\end{figure}
|
|
|
|
\item[Precision-recall curve]
|
|
Plot that relates precision and recall.
|
|
|
|
\begin{example}
|
|
Consider the following image and the bounding boxes found by a detector:
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.4\linewidth]{./img/_example_precision_recall_curve1.pdf}
|
|
\caption{
|
|
\parbox[t]{0.6\linewidth}{
|
|
Ground truth (yellow boxes) and predictions (orange boxes) with their confidence score
|
|
}
|
|
}
|
|
\end{figure}
|
|
|
|
By sorting the confidence scores, it is possible to plot the precision-recall curve by varying the threshold $\rho_\text{min}$:
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.4\linewidth]{./img/_example_precision_recall_curve2.pdf}
|
|
\end{figure}
|
|
|
|
\indenttbox
|
|
\begin{remark}
|
|
Recall is monotonically decreasing, while precision can both decrease and increase.
|
|
\end{remark}
|
|
\end{example}
|
|
\end{description} |