diff --git a/src/machine-learning-and-data-mining/img/crisp.png b/src/machine-learning-and-data-mining/img/crisp.png new file mode 100644 index 0000000..779f2db Binary files /dev/null and b/src/machine-learning-and-data-mining/img/crisp.png differ diff --git a/src/machine-learning-and-data-mining/main.tex b/src/machine-learning-and-data-mining/main.tex index 8afd97d..208abc4 100644 --- a/src/machine-learning-and-data-mining/main.tex +++ b/src/machine-learning-and-data-mining/main.tex @@ -15,6 +15,7 @@ \DeclareAcronym{etl}{short=ETL, long=Extraction{,} Transformation{,} Loading} \DeclareAcronym{dfm}{short=DFM, long=Dimensional Fact Model} \DeclareAcronym{cdc}{short=CDC, long=Change Data Capture} +\DeclareAcronym{crisp}{short=CRISP-DM, long=Cross Industry Standard Process for Data Mining} \begin{document} @@ -26,5 +27,6 @@ \input{sections/_intro.tex} \input{sections/_data_warehouse.tex} \input{sections/_data_lake.tex} + \input{sections/_crisp.tex} \end{document} \ No newline at end of file diff --git a/src/machine-learning-and-data-mining/sections/_crisp.tex b/src/machine-learning-and-data-mining/sections/_crisp.tex new file mode 100644 index 0000000..aa8d5d6 --- /dev/null +++ b/src/machine-learning-and-data-mining/sections/_crisp.tex @@ -0,0 +1,57 @@ +\chapter{CRISP-DM} + +\begin{description} + \item[\Acl{crisp}] \marginnote{\acs{crisp}} + Standardized process for data mining. + \begin{figure}[ht] + \centering + \includegraphics[width=0.45\textwidth]{img/crisp.png} + \caption{\ac{crisp} workflow} + \end{figure} +\end{description} + + +\section{Business understanding} +\begin{itemize} + \item Determine the objective and the success criteria. + \marginnote{Business understanding} + \item Feasibility study. + \item Produce a plan. +\end{itemize} + +\section{Data understanding} +\begin{itemize} + \item Determine the available (raw) data. + \marginnote{Data understanding} + \item Determine the cost of the data. + \item Collect, describe, explore and verify data. +\end{itemize} + +\section{Data preparation} +\begin{itemize} + \item Data cleaning. + \marginnote{Data preparation} + \item Data transformations. +\end{itemize} + +\section{Modelling} +\begin{itemize} + \item Select modelling technique. + \marginnote{Modelling} + \item Build/train the model. +\end{itemize} + +\section{Evaluation} +\begin{itemize} + \item Evaluate results. + \marginnote{Evaluation} + \item Review process. +\end{itemize} + +\section{Deployment} +\begin{itemize} + \item Plan deployment. + \marginnote{Deployment} + \item Plan monitoring and maintenance. + \item Final report and review. +\end{itemize}