Add ML/DM association rules

2026-02-04 07:41:43 +01:00 · 2023-12-22 20:00:40 +01:00
parent c09e318212
commit 1d56ef172f
5 changed files with 357 additions and 0 deletions
--- a/src/ainotes.cls
+++ b/src/ainotes.cls
@ -63,6 +63,7 @@
 \newtheorem*{example}{Example}
 \theoremstyle{definition}
 \newtheorem*{definition}{Def}
+\newtheorem*{remark}{Remark}

 \newcommand{\ubar}[1]{\text{\b{$#1$}}}
 \renewcommand{\vec}[1]{{\bm{\mathbf{#1}}}}
--- a/src/machine-learning-and-data-mining/img/itemset_apriori.png
+++ b/src/machine-learning-and-data-mining/img/itemset_apriori.png
--- a/src/machine-learning-and-data-mining/img/rules_apriori.png
+++ b/src/machine-learning-and-data-mining/img/rules_apriori.png
--- a/src/machine-learning-and-data-mining/main.tex
+++ b/src/machine-learning-and-data-mining/main.tex
@ -33,5 +33,8 @@
    \input{sections/_classification.tex}
    \input{sections/_regression.tex}
    \input{sections/_clustering.tex}
+    \input{sections/_association_rules.tex}
+
+    \eoc

 \end{document}
--- a/src/machine-learning-and-data-mining/sections/_association_rules.tex
+++ b/src/machine-learning-and-data-mining/sections/_association_rules.tex
@ -0,0 +1,353 @@
+\chapter{Association rules}
+
+
+\section{Frequent itemset}
+
+\begin{description}
+    \item[Itemset] \marginnote{Itemset}
+        Collection of one or more items (e.g. $\{ \text{milk}, \text{bread}, \text{diapers} \}$).
+
+    \item[K-itemset] \marginnote{K-itemset}
+        Itemset with $k$ items.
+
+    \item[Support count] \marginnote{Support count}
+        Number of occurrences of an itemset in a dataset.
+        \begin{example}
+            \phantom{}\\
+            \begin{minipage}{0.4\textwidth}
+                Given the following transactions:
+                \begin{center}
+                    \begin{tabular}{|c|l|}
+                        \hline
+                        1 & bread, milk \\
+                        2 & beer, bread, diaper, eggs \\
+                        3 & beer, coke, diaper, milk \\
+                        \textbf{4} & \textbf{beer, bread, diaper, milk} \\
+                        \textbf{5} & \textbf{bread, coke, diaper, milk} \\
+                        \hline
+                    \end{tabular}
+                \end{center}
+            \end{minipage}
+            \begin{minipage}{0.5\textwidth}
+                The support count of the itemset containing bread, diapers and milk is:
+                \[ \sigma(\{ \text{bread}, \text{diapers}, \text{milk} \}) = 2 \]
+            \end{minipage}
+        \end{example}
+
+        \item[Association rule] \marginnote{Association rule}
+        Given two itemsets $A$ and $C$, an association rule has form:
+        \[ A \rightarrow C \]
+        It means that there are transactions in the dataset where $A$ and $C$ co-occur. 
+        Note that it is not strictly a logical implication.
+
+    \item[Metrics] \phantom{}
+        \begin{description}
+            \item[Support] \marginnote{Support}
+            Given $N$ transactions, the support of an itemset $A$ is:
+            \[ \texttt{sup}(A) = \frac{\sigma(A)}{N} \]
+            The support of an association rule $A \rightarrow C$ is:
+            \[ \texttt{sup}(A \rightarrow C) = \texttt{sup}(A \cup C) = \frac{\sigma(A \cup C)}{N} \]
+
+            Low support implies random associations.
+
+            \begin{description}
+                \item[Frequent itemset] \marginnote{Frequent itemset}
+                    Itemset whose support is at least a given threshold.
+            \end{description}
+    
+        \item[Confidence] \marginnote{Confidence}
+            Given an association rule $A \rightarrow C$, its confidence is given by:
+            \[ \texttt{conf}(A \rightarrow C) = \frac{\sigma(A \cup C)}{\sigma(A)} \in [0, 1] \]
+
+            Low confidence implies low reliability.
+
+            \begin{theorem}
+                The confidence of $A \rightarrow C$ can be computed given the supports of $A \rightarrow C$ and $A$:
+                \[ \texttt{conf}(A \rightarrow C) = \frac{\texttt{sup}(A \rightarrow C)}{\texttt{sup}(A)} \]
+            \end{theorem}
+    \end{description}
+
+    \item[Association rule mining] \marginnote{Association rule mining}
+        Given $N$ transactions and two thresholds \texttt{min\_sup} and \texttt{min\_conf},
+        association rule mining finds all the rules $A \rightarrow C$ such that:
+        \[ \begin{split}
+            \texttt{sup}(A \rightarrow C) &\geq \texttt{min\_sup} \\
+            \texttt{conf}(A \rightarrow C) &\geq \texttt{min\_conf}
+        \end{split} \]
+
+        This can be done in two steps:
+        \begin{enumerate}
+            \item \marginnote{Frequent itemset generation}
+                Determine the itemsets with $\text{support} \geq \texttt{min\_sup}$ (frequent itemsets).
+            \item \marginnote{Rule generation}
+                Determine the the association rules with $\text{confidence} \geq \texttt{min\_conf}$.
+        \end{enumerate}
+\end{description}
+
+
+
+\section{Frequent itemset generation}
+
+\subsection{Brute force}
+Given $D$ items, there are $2^D$ possible itemsets.
+To compute the support of a single itemset, the complexity is $O(NW)$ where 
+$N$ is the number of transactions and $W$ is the width of the largest transaction.
+Listing all the itemsets and computing their support have an exponential complexity of $O(NW2^D)$.
+
+
+\subsection{Apriori principle}
+
+\begin{theorem} \marginnote{Apriori principle}
+    If an itemset is frequent, then all of its subsets are frequent.
+
+    \begin{proof}
+        By the definition of support, it holds that:
+        \[ \forall X, Y: (X \subseteq Y) \Rightarrow (\texttt{sup}(X) \geq \texttt{sup}(Y)) \]
+
+        In other words, the support metric is anti-monotone.
+    \end{proof}
+\end{theorem}
+
+\begin{corollary}
+    If an itemset is infrequent, then all of its supersets are infrequent.
+\end{corollary}
+
+\begin{example} \phantom{}
+    \begin{center}
+        \includegraphics[width=0.6\textwidth]{img/itemset_apriori.png}
+    \end{center}
+\end{example}
+
+\begin{algorithm}[H]
+\caption{Apriori principle}
+\begin{lstlisting}[mathescape=true]
+def candidatesGeneration(freq_itemsets$_k$):
+    candidate_itemsets$_{k+1}$ = selfJoin(freq_itemsets$_k$)
+    for itemset in candidate_itemsets$_{k+1}$:
+        for sub in subsetsOfSize($k$, itemset):
+            if sub not in freq_itemsets$_k$:
+                candidate_itemsets$_{k+1}$.remove(itemset)
+    return candidate_itemsets$_{k+1}$
+
+def aprioriItemsetGeneration(transactions, min_sup):
+    freq_itemsets$_1$ = itemsetsOfSize(1, transactions)
+    k = 1
+    while freq_itemsets$_1$ is not null:
+        candidate_itemsets$_{k+1}$ = candidatesGeneration(freq_itemsets$_k$)
+        freq_itemsets$_{k+1}$ = $\{ c \in \texttt{candidate\_itemsets}_{k+1} \mid \texttt{sup(}c\texttt{)} \geq \texttt{min\_sup} \}$
+        k += 1
+    return freq_itemsets$_k$
+\end{lstlisting}
+\end{algorithm}
+
+\begin{description}
+    \item[Complexity]
+        The complexity of the apriori principle depends on:
+        \begin{itemize}
+            \item The choice of the support threshold.
+            \item The number of unique items.
+            \item The number and the width of the transactions.
+        \end{itemize}
+\end{description}
+
+
+
+\section{Rule generation}
+
+\subsection{Brute force}
+Given a frequent $k$-itemset $L$, there are $2^k-2$ possible association rules ($-2$ as $L \rightarrow \varnothing$ and $\varnothing \rightarrow L$ can be ignored).
+For each possible rule, it is necessary to compute the confidence. The overall complexity is exponential.
+
+\subsection{Apriori principle}
+
+\begin{theorem} \marginnote{Apriori principle}
+    Without loss of generality, consider an itemset $\{ A, B, C, D \}$.
+    It holds that:
+    \[ \texttt{conf}(ABC \rightarrow D) \geq \texttt{conf}(AB \rightarrow CD) \geq \texttt{conf}(A \rightarrow BCD) \]
+\end{theorem}
+
+\begin{example} \phantom{}
+    \begin{center}
+        \includegraphics[width=0.5\textwidth]{img/rules_apriori.png}
+    \end{center}
+\end{example}
+
+
+
+\section{Interestingness measures}
+
+\begin{description}
+    \item[Contingency table] \marginnote{Contingency table}
+        Given an association rule $A \rightarrow C$, its contingency table is defined as:
+        \begin{center}
+            \def\arraystretch{1.1}
+            \begin{tabular}{c|c|c|c}
+                & $C$ & $\overline{C}$ & \\
+                \hline
+                $A$ & $\prob{A \land C}$ & $\prob{A \land \overline{C}}$ & $\prob{A}$ \\
+                \hline
+                $\overline{A}$ & $\prob{\overline{A} \land C}$ & $\prob{\overline{A} \land \overline{C}}$ & $\prob{\overline{A}}$ \\
+                \hline
+                & $\prob{C}$ & $\prob{\overline{C}}$ & 100 \\
+            \end{tabular}
+        \end{center}
+\end{description}
+
+\begin{remark}
+    Confidence can be misleading.
+    \begin{example} \phantom{}\\
+        \begin{minipage}[t]{0.36\textwidth}
+            Given the following contingency table:
+            \begin{center}
+                \begin{tabular}{c|c|c|c}
+                                                & coffee    & $\overline{\text{coffee}}$ & \\
+                    \hline
+                    tea                         & 15        & 5                     & 20 \\
+                    \hline
+                    $\overline{\text{tea}}$     & 75        & 5                     & 80 \\
+                    \hline
+                                                & 90        & 10                    & 100 \\
+                \end{tabular}
+            \end{center}
+        \end{minipage}
+        \hspace{0.5cm}
+        \begin{minipage}[t]{0.6\textwidth}
+            We have that:
+            \[ \texttt{conf}(\text{tea} \rightarrow \text{coffee}) = \frac{\texttt{sup}(\text{tea}, \text{coffee})}{\texttt{sup}(\text{tea})} = \frac{15}{20} = 0.75 \]
+            But, we also have that:
+            \[ \prob{\text{coffee}} = 0.9 \hspace*{1cm} \prob{\text{coffee} \mid \overline{\text{tea}}} = \frac{75}{80} = 0.9375 \]
+            So, despite the high confidence of $(\text{tea} \rightarrow \text{coffee})$,
+            the probability of coffee increases in absence of tea.
+        \end{minipage}
+    \end{example}
+\end{remark}
+
+
+\subsection{Statistical-based measures}
+
+Measures that take into account the statistical independence of the items.
+
+\begin{description}
+    \item[Lift] \marginnote{Lift}
+        \[ \texttt{lift}(A \rightarrow C) = \frac{\texttt{conf}(A \rightarrow C)}{\texttt{sup}(C)} = \frac{\prob{A \land C}}{\prob{A}\prob{C}} \]
+
+        If $\texttt{lift}(A \rightarrow C) = 1$, then $A$ and $C$ are independent.
+    
+    \item[Leverage] \marginnote{Leverage}
+        \[ \texttt{leve}(A \rightarrow C) = \texttt{sup}(A \cup C) - \texttt{sup}(A)\texttt{sup}(C) = \prob{A \land C} - \prob{A}\prob{C} \]
+        
+        If $\texttt{leve}(A \rightarrow C) = 0$, then $A$ and $C$ are independent.
+    
+    \item[Conviction] \marginnote{Conviction}
+        \[ \texttt{conv}(A \rightarrow C) = \frac{1 - \texttt{sup}(C)}{1 - \texttt{conf}(A \rightarrow C)} = \frac{\prob{A}(1-\prob{C})}{\prob{A}-\prob{A \land C}} \]
+\end{description}
+
+\begin{table}[H]
+    \centering
+    \begin{tabular}{c|p{10cm}}
+        \hline
+        \textbf{Metric} & \textbf{Interpretation} \\
+        \hline
+        High support        & The rule applies to many transactions. \\
+        \hline
+        High confidence     & The chance that the rule is true for some transaction is high. \\
+        \hline
+        High lift           & Low chance that the rule is just a coincidence. \\
+        \hline
+        High conviction     & The rule is violated less often compared to the case when the antecedent and consequent are independent. \\
+        \hline
+    \end{tabular}
+    \caption{Intuitive interpretation of the measures}
+\end{table}
+
+
+
+\section{Multi-dimensional association rules}
+
+\begin{description}
+    \item[Mono-dimensional events] \marginnote{Mono-dimensional events}
+        Represented as transactions. Each event contains items that appear together.
+        
+    \item[Multi-dimensional events] \marginnote{Multi-dimensional events}
+        Represented as tuples. Each event contains the values of its attributes.
+
+    \item[Mono/Multi-dimensional equivalence] \marginnote{Equivalence}
+        Mono-dimensional events can be converted into multi-dimensional events and vice versa.
+
+        To transform quantitative attributes, it is usually useful to discretize them.
+
+        \begin{example}[Multi to mono] \phantom{}\\
+            \begin{minipage}{0.35\textwidth}
+                \begin{center}
+                    \begin{tabular}{c|c|c}
+                        \textbf{Id} & \textbf{co2} & \textbf{tin\_oxide} \\
+                        \hline
+                        1 & high & medium \\
+                        2 & medium & low \\
+                    \end{tabular}
+                \end{center}
+            \end{minipage}
+            $\rightarrow$
+            \begin{minipage}{0.48\textwidth}
+                \begin{center}
+                    \begin{tabular}{c|c}
+                        \textbf{Id} & \textbf{Transaction} \\
+                        \hline
+                        1 & $\{ \text{co2/high}, \text{tin\_oxide/medium} \}$ \\
+                        2 & $\{ \text{co2/medium}, \text{tin\_oxide/low} \}$ \\
+                    \end{tabular}
+                \end{center}
+            \end{minipage}
+        \end{example}
+
+        \begin{example}[Mono to multi] \phantom{}\\
+            \begin{minipage}{0.35\textwidth}
+                \begin{center}
+                    \begin{tabular}{c|c|c|c|c}
+                        \textbf{Id} & \textbf{a} & \textbf{b} & \textbf{c} & \textbf{d} \\
+                        \hline
+                        1 & yes & yes & no & no \\
+                        2 & yes & no & yes & no \\
+                    \end{tabular}
+                \end{center}
+            \end{minipage}
+            $\leftarrow$
+            \begin{minipage}{0.30\textwidth}
+                \begin{center}
+                    \begin{tabular}{c|c}
+                        \textbf{Id} & \textbf{Transaction} \\
+                        \hline
+                        1 & $\{ \text{a}, \text{b} \}$ \\
+                        2 & $\{ \text{a}, \text{c} \}$ \\
+                    \end{tabular}
+                \end{center}
+            \end{minipage}
+        \end{example}
+
+\end{description}
+
+
+
+\section{Multi-level association rules}
+Organize items into an hierarchy.
+
+\begin{description}
+    \item[Specialized to general] \marginnote{Specialized to general} 
+        Generally, the support of the rule increases.
+        \begin{example}
+            From $(\text{apple} \rightarrow \text{milk})$ to $(\text{fruit} \rightarrow \text{dairy})$
+        \end{example}
+
+    \item[General to specialized] \marginnote{General to specialized} 
+        Generally, the support of the rule decreases.
+        \begin{example}
+            From $(\text{fruit} \rightarrow \text{dairy})$ to $(\text{apple} \rightarrow \text{milk})$
+        \end{example}
+
+    \item[Redundant level] \marginnote{Redundant level}
+        A more specialized rule in the hierarchy is redundant if its confidence is similar to the one of the more general rule.
+
+    \item[Multi-level association rule mining] \marginnote{Multi-level association rule mining}
+        Run association rule mining on different levels of abstraction (general to specialized).
+        At each level, the support threshold is decreased.
+\end{description}