diff --git a/src/year2/artificial-intelligence-in-industry/a3i.tex b/src/year2/artificial-intelligence-in-industry/a3i.tex new file mode 100644 index 0000000..369e4d9 --- /dev/null +++ b/src/year2/artificial-intelligence-in-industry/a3i.tex @@ -0,0 +1,13 @@ +\documentclass[11pt]{ainotes} + +\title{Artificial Intelligence in Industry} +\date{2024 -- 2025} +\def\lastupdate{{PLACEHOLDER-LAST-UPDATE}} +\def\giturl{{PLACEHOLDER-GIT-URL}} + +\begin{document} + + \makenotesfront + \input{./sections/_anomaly_detection.tex} + +\end{document} \ No newline at end of file diff --git a/src/year2/artificial-intelligence-in-industry/ainotes.cls b/src/year2/artificial-intelligence-in-industry/ainotes.cls new file mode 120000 index 0000000..146fd3c --- /dev/null +++ b/src/year2/artificial-intelligence-in-industry/ainotes.cls @@ -0,0 +1 @@ +../../ainotes.cls \ No newline at end of file diff --git a/src/year2/artificial-intelligence-in-industry/img/_ad_taxi_data.pdf b/src/year2/artificial-intelligence-in-industry/img/_ad_taxi_data.pdf new file mode 100644 index 0000000..4ad0298 Binary files /dev/null and b/src/year2/artificial-intelligence-in-industry/img/_ad_taxi_data.pdf differ diff --git a/src/year2/artificial-intelligence-in-industry/metadata.json b/src/year2/artificial-intelligence-in-industry/metadata.json new file mode 100644 index 0000000..31476d7 --- /dev/null +++ b/src/year2/artificial-intelligence-in-industry/metadata.json @@ -0,0 +1,11 @@ +{ + "name": "Artificial Intelligence in Industry", + "year": 2, + "semester": 1, + "pdfs": [ + { + "name": null, + "path": "a3i.pdf" + } + ] +} \ No newline at end of file diff --git a/src/year2/artificial-intelligence-in-industry/sections/_anomaly_detection.tex b/src/year2/artificial-intelligence-in-industry/sections/_anomaly_detection.tex new file mode 100644 index 0000000..c110bbe --- /dev/null +++ b/src/year2/artificial-intelligence-in-industry/sections/_anomaly_detection.tex @@ -0,0 +1,60 @@ +\chapter{Anomaly detection: Taxi calls} + +\begin{description} + \item[Anomaly] \marginnote{Anomaly} + Event that deviates from the usual pattern. + + \item[Time series] \marginnote{Time series} + Data with an ordering (e.g., chronological). +\end{description} + + + +\section{Data} + +The dataset is a time series and it is a \texttt{DataFrame} with the following fields: +\begin{descriptionlist} + \item[\texttt{timestamp}] with a 30 minutes granularity. + \item[\texttt{value}] number of calls. +\end{descriptionlist} + +The label is a \texttt{Series} containing the timestamps of the anomalies. + +An additional \texttt{DataFrame} contains information about the time window in which the anomalies happen: +\begin{descriptionlist} + \item[\texttt{begin}] acceptable moment from which an anomaly can be detected. + \item[\texttt{end}] acceptable moment from which there are no anomalies anymore. +\end{descriptionlist} + +\begin{figure}[H] + \centering + \includegraphics[width=0.7\linewidth]{./img/_ad_taxi_data.pdf} + \caption{Plot of the time series, anomalies, and windows} +\end{figure} + + + +\section{Approaches} + + +\subsection{Gaussian assumption} + +Assuming that the data follows a Gaussian distribution, mean and variance can be used to determine anomalies through a threshold. $z$-score can also be used. + + +\subsection{Characterize data distribution} + +Classify a data point as an anomaly if it is too unlikely. + +\begin{description} + \item[Formalization] + Given a random variable $X$ with values $x$ to represent the number of taxi calls, we want to find its probability density function (PDF) $f(x)$. + + An anomaly is determined whether: + \[ f(x) \leq \varepsilon \] + where $\varepsilon$ is a threshold. + + \begin{remark} + The PDF can be reasonably used even though the dataset is discrete if its data points are sufficiently fine-grained. + \end{remark} +\end{description} \ No newline at end of file