From 617fd5b7bd4be32214582268e7e1f0aefd4928cb Mon Sep 17 00:00:00 2001
From: NotXia <35894453+NotXia@users.noreply.github.com>
Date: Sat, 19 Apr 2025 11:39:55 +0200
Subject: [PATCH] Add ethics2 explainability

---
 src/year2/ethics-in-ai/module3/ethics3.tex    |   1 +
 .../module3/sections/_explainability.tex      | 202 ++++++++++++++++++
 2 files changed, 203 insertions(+)
 create mode 100644 src/year2/ethics-in-ai/module3/sections/_explainability.tex

diff --git a/src/year2/ethics-in-ai/module3/ethics3.tex b/src/year2/ethics-in-ai/module3/ethics3.tex
index 553732b..0619ba2 100644
--- a/src/year2/ethics-in-ai/module3/ethics3.tex
+++ b/src/year2/ethics-in-ai/module3/ethics3.tex
@@ -10,5 +10,6 @@
     \makenotesfront
     \include{./sections/_human_agency_oversight.tex}
     \include{./sections/_robustness_safety.tex}
+    \include{./sections/_explainability.tex}
 
 \end{document}
\ No newline at end of file
diff --git a/src/year2/ethics-in-ai/module3/sections/_explainability.tex b/src/year2/ethics-in-ai/module3/sections/_explainability.tex
new file mode 100644
index 0000000..cad7cde
--- /dev/null
+++ b/src/year2/ethics-in-ai/module3/sections/_explainability.tex
@@ -0,0 +1,202 @@
+\chapter{Explainability}
+
+
+\begin{description}
+    \item[Transparency] \marginnote{Transparency}
+        Ensure that appropriate information reaches the relevant stakeholders.
+
+    \item[Explanation] \marginnote{Explanation}
+        Evidence, support, or reasoning related to a system's output or process.
+
+        An explanation can be assessed by the following properties:
+        \begin{descriptionlist}
+            \item[Quality] Related to the accuracy of the explanation.
+            \item[Quantity] Related to the amount of information delivered.
+            \item[Relation] Whether it only contains relevant information.
+            \item[Manner] How the information is delivered.
+            \item[Context-oriented] Whether it accounts for the knowledge capabilities of the recipient of the explanation.
+            \item[Knowledge limit] Whether it is limited to the training data.
+        \end{descriptionlist}
+
+        An explanation can be:
+        \begin{descriptionlist}
+            \item[Attribute based] Describe the contribution of the input features.
+            \item[Rule based] If-then rules based on the input features.
+            \item[Counterfactual] Determine which input features would have made the prediction different.
+            \item[Argumentation based] Produce the explanation by extracting and processing arguments. 
+        \end{descriptionlist}
+\end{description}
+
+
+\section{Explanation taxonomy}
+
+
+\subsection{Global vs local}
+
+\begin{description}
+    \item[Global explanation] \marginnote{Global explanation}
+        Explain the model as a whole.
+
+    \item[Local explanation] \marginnote{Local explanation}
+        Explain the output of the model for a particular instance.
+\end{description}
+
+
+\subsection{Approaches}
+
+\begin{description}
+    \item[Model (global) explanation] \marginnote{Model (global) explanation}
+        Create an interpretable predictor that mimics the one to be explained on the entire input space.
+    
+    \item[Outcome (local) explanation] \marginnote{Outcome (local) explanation}
+        Create an interpretable predictor that mimics the one to be explained on a portion of the input space.
+
+    \item[Model inspection] \marginnote{Model inspection}
+        Create a representation that models the behavior of the system.
+
+    \item[Transparent box design] \marginnote{Transparent box design}
+        Use an interpretable predictor.
+\end{description}
+
+
+
+\section{XAI abstract framework}
+
+\begin{description}
+    \item[Interpretation] \marginnote{Interpretation}
+        Associate a (subjective) meaning to an object.
+    
+    \item[Explanation] \marginnote{Explanation}
+        Extract relevant aspects of an object to ease interpretation.
+\end{description}
+
+\begin{description}
+    \item[XAI abstract framework] \marginnote{XAI abstract framework}
+        System composed of:
+        \begin{itemize}
+            \item A model $M$ to explain with representation $R$,
+            \item An explanation function $E$.
+        \end{itemize}
+        The explanation function should produce another model $M' = E(M)$ such that its representation $R'$ is more interpretable and the performance difference between $M$ and $M'$ should be minimized.
+\end{description}
+
+
+
+\section{Explanation via feature importance}
+
+
+\begin{description}
+    \item[Feature importance explanation] \marginnote{Feature importance explanation}
+        Method that quantifies the importance score of each input feature for either local or global explanation.
+\end{description}
+
+
+\subsection{Local interpretable model-agnostic explanations (LIME)}
+
+\begin{description}
+    \item[LIME] \marginnote{LIME}
+        Model-agnostic method for post-hoc (after training) explanation. Given a model $f$ to explain and an input $\vec{x}$, LIME works as follows:
+        \begin{enumerate}
+            \item Sample $N$ points $\vec{z}_1, \dots, \vec{z}_N$ around $\vec{x}$ according to some proximity measure.
+            \item Form a dataset of the sampled points $\langle \vec{z}_i', y_i \rangle$ where $\vec{z}_i'$ is the one-hot encoding of $\vec{z}_i$ and $y_i = f(\vec{z}_i)$.
+            \item Train an interpretable local surrogate model $g$ on the sampled data.
+            \item Repeat with different hyperparameters of $g$ and pick the one that maximizes the fidelity with $f$ and minimizes the complexity of $g$.
+            \item Use the coefficients of $g$ to measure feature importance.
+        \end{enumerate}
+
+        \begin{remark}
+            Global explanation can be performed by aggregating over multiple points.
+        \end{remark}
+\end{description}
+
+
+
+\section{Explanation via symbolic knowledge extraction}
+
+\begin{description}
+    \item[Symbolic knowledge extraction explanation] \marginnote{Symbolic knowledge extraction explanation}
+        Method that given a sub-symbolic model produces a symbolic representation of it (e.g., rule list, decision tree, decision table).
+
+        The expressiveness of the extracted knowledge can be:
+        \begin{descriptionlist}
+            \item[Propositional] Boolean statements and logical connectives.
+            \item[Fuzzy] Hierarchical set of if-then-else statements with comparison between variables and constants.
+            \item[Oblique] Propositional logic with arithmetic comparison.
+            \item[M-of-N] Propositional, fuzzy, or oblique with the addition of statements in the form of $m \text{ of } \{ \phi_1, \dots, \phi_n \}$.
+        \end{descriptionlist}
+\end{description}
+
+
+
+\section{Symbolic knowledge injection}
+
+\begin{description}
+    \item[Symbolic knowledge injection] \marginnote{Symbolic knowledge injection}
+        Method to modify a predictor so that it is consistent with some symbolic knowledge provided by the user.
+
+        Symbolic knowledge can be injected through:
+        \begin{descriptionlist}
+            \item[Guided learning]
+                Encode input knowledge as a cost factor and include it in the training loss.
+
+            \item[Structuring]
+                Modify the architecture of the predictor to mimic the knowledge.
+
+            \item[Embedding]
+                Embed knowledge and inject it into the training set.
+        \end{descriptionlist}
+\end{description}
+
+
+\section{Argumentation}
+
+\begin{description}
+    \item[Argumentation] \marginnote{Argumentation}
+        Approach that, given some input, extracts the arguments and their semantics allowing to study their properties.
+\end{description}
+
+
+\subsection{Computational argumentation}
+
+\begin{description}
+    \item[Abstract argumentation] \marginnote{Abstract argumentation}
+        Directed graph where nodes are arguments and arcs are relationships between arguments (i.e., support or attack).
+
+        There are two common approaches to classify arguments:
+        \begin{descriptionlist}
+            \item[Extension-based] 
+                Determine extensions (i.e., set of arguments):
+                \begin{descriptionlist}
+                    \item[Complete] Set of arguments that is able to defend itself and includes all the arguments it defends. 
+                    \item[Grounded] Set of arguments whose defended by the initial arguments.
+                    \item[Stable] Set of arguments that attack all the arguments not included in it.
+                    \item[Preferred] Set of arguments that is as large as possible and able to defend itself.
+                \end{descriptionlist}
+            \item[Labeling-based] 
+                Determine labels (e.g., the states of an argument).
+        \end{descriptionlist}
+
+    \item[Structured argumentation] \marginnote{Structured argumentation}
+        Explicitly model the relationship between premises and conclusions of the arguments.
+\end{description}
+
+
+\subsection{Defeasible logic as argumentation}
+
+\begin{description}
+    \item[Conclusive reasoning] \marginnote{Conclusive reasoning}
+        A reasoning schema is conclusive if its conclusions are always true when the premises hold.
+
+    \item[Defeasible reasoning] \marginnote{Defeasible reasoning}
+        A reasoning schema is defeasible if, under certain conditions, its conclusions are not true when the premises hold.
+\end{description}
+
+\begin{description}
+    \item[Defeasible logic argumentation] \marginnote{Defeasible logic argumentation}
+        Arguments are defined as proof trees. Their relationships can be:
+        \begin{descriptionlist}
+            \item[Attack] An argument $A$ attacks a defeasible argument $B$ if the conclusion of $A$ is the complement of the conclusion of $B$ and the conclusion of $B$ is not part of a strict sub-argument of $B$.
+            \item[Support] A set of arguments $S$ supports a defeasible argument $A$ if every proper sub-argument of $A$ is in $S$.
+            \item[Undercut] A defeasible argument $A$ is undercut by a set of arguments $S$ if $S$ supports an argument $B$ that attacks $A$.
+        \end{descriptionlist}
+\end{description}
\ No newline at end of file