mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-16 19:32:21 +01:00
Compare commits
12 Commits
b307aa2786
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 9e9c616993 | |||
| ae243c1346 | |||
|
e0ef470c3e
|
|||
|
bc77ff5740
|
|||
|
cebd07759c
|
|||
|
e0143d5c9f
|
|||
|
cabdb4fc97
|
|||
|
234455d41d
|
|||
|
7baa556800
|
|||
|
067db624fb
|
|||
|
7545b870c9
|
|||
|
fdcbfa6e4a
|
@ -1,4 +1,5 @@
|
|||||||
\documentclass[11pt]{ainotes}
|
\documentclass[11pt]{ainotes}
|
||||||
|
\usepackage{appendix}
|
||||||
|
|
||||||
\title{Distributed Autonomous Systems}
|
\title{Distributed Autonomous Systems}
|
||||||
\date{2024 -- 2025}
|
\date{2024 -- 2025}
|
||||||
@ -53,14 +54,15 @@
|
|||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\makenotesfront
|
\makenotesfront
|
||||||
\include{./sections/_graphs.tex}
|
\input{./sections/_graphs.tex}
|
||||||
\include{./sections/_averaging_systems.tex}
|
\input{./sections/_averaging_systems.tex}
|
||||||
\include{./sections/_containment.tex}
|
\input{./sections/_containment.tex}
|
||||||
\include{./sections/_optimization.tex}
|
\input{./sections/_optimization.tex}
|
||||||
\include{./sections/_formation_control.tex}
|
\input{./sections/_formation_control.tex}
|
||||||
\include{./sections/_cooperative_robotics.tex}
|
\input{./sections/_cooperative_robotics.tex}
|
||||||
\include{./sections/_safety_controllers.tex}
|
\input{./sections/_safety_controllers.tex}
|
||||||
\include{./sections/_feedback_optimization.tex}
|
\input{./sections/_feedback_optimization.tex}
|
||||||
\include{./sections/_neural_networks.tex}
|
\input{./sections/_neural_networks.tex}
|
||||||
|
\eoc
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
@ -91,171 +91,6 @@
|
|||||||
|
|
||||||
\subsection{Consensus}
|
\subsection{Consensus}
|
||||||
|
|
||||||
% \begin{remark}
|
|
||||||
% The distributed consensus algorithm is a positive system (i.e., $\matr{A}$ is positive).
|
|
||||||
% \end{remark}
|
|
||||||
|
|
||||||
\begin{description}
|
|
||||||
\item[Positive matrix characterization]
|
|
||||||
Given $\A \in \mathbb{R}^{N \times N}$, it can be:
|
|
||||||
\begin{description}
|
|
||||||
\item[Non-negative] \marginnote{Non-negative matrix}
|
|
||||||
$\A \geq 0$.
|
|
||||||
\item[Irreducible] \marginnote{Irreducible matrix}
|
|
||||||
$\sum_{h=0}^{N-1} \A^h > 0$.
|
|
||||||
\item[Primitive] \marginnote{Primitive matrix}
|
|
||||||
$\exists h \in \{ 1, \dots, N \}: A^h > 0$.
|
|
||||||
% \begin{remark}
|
|
||||||
% A graph with a primitive adjacency matrix is connected.
|
|
||||||
% \end{remark}
|
|
||||||
\item[Positive] \marginnote{Positive matrix}
|
|
||||||
$\A > 0$.
|
|
||||||
\end{description}
|
|
||||||
\end{description}
|
|
||||||
|
|
||||||
|
|
||||||
% Equilibrium:
|
|
||||||
% \[
|
|
||||||
% \x^{k+1} = \matr{A}\x^k
|
|
||||||
% \]
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% \x_\text{eq} = \matr{A} \x_\text{eq} \\
|
|
||||||
% \iff (\matr{I} - \matr{A}) \x_\text{eq} = 0
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
% We are interested in the null space of $(\matr{I} - \matr{A})$ (at least one eigenvector).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem} \label{th:positive_matrix_digraph_connected}
|
|
||||||
Given a weighted digraph $G$ with $N \geq 2$ nodes and adjacency matrix $\A$, it holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item $\A$ is irreducible $\iff$ $G$ is strongly connected.
|
|
||||||
\item $\A$ is primitive $\iff$ $G$ is strongly connected and aperiodic.
|
|
||||||
\end{itemize}
|
|
||||||
\end{theorem}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem}[Gershgorin] \label{th:gershgorin} \marginnote{Gershgorin theorem}
|
|
||||||
For any square matrix $\A \in \mathbb{C}^{N \times N}$, it holds that the spectrum of $\A$ (i.e., set of eigenvalues) is contained in the Gershgorin disks:
|
|
||||||
\[
|
|
||||||
\text{spec}(\A) \subset \bigcup_{i=1}^{N} \left\{ s \in \mathbb{C} \,\,\bigg|\,\, |s - a_{ii}| \leq \sum_{j=1, j \neq i}^{N} |a_{ij}| \right\}
|
|
||||||
\]
|
|
||||||
In other words, it is the union of the disks with center $a_{ii}$ and radius $\sum_{j=1, j \neq i}^{N} |a_{ij}|$.
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{remark}
|
|
||||||
This theorem provides an approximate location of the eigenvalues.
|
|
||||||
\end{remark}
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{example}
|
|
||||||
Consider the matrix:
|
|
||||||
\[
|
|
||||||
\begin{bmatrix}
|
|
||||||
10 & 1 & 0 & 1 \\
|
|
||||||
0.2 & 8 & 0.2 & 0.2 \\
|
|
||||||
1 & 1 & 2 & 1 \\
|
|
||||||
-1 & -1 & -1 & -11
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
|
|
||||||
Its eigenvalues are $\{ -10.870, 1.906, 7.918, 10.046 \}$.
|
|
||||||
|
|
||||||
The Gershgorin disks are:
|
|
||||||
\begin{figure}[H]
|
|
||||||
\centering
|
|
||||||
\includegraphics[width=0.4\linewidth]{./img/gershgorin.png}
|
|
||||||
\end{figure}
|
|
||||||
\end{example}
|
|
||||||
\end{theorem}
|
|
||||||
|
|
||||||
% \begin{lemma}
|
|
||||||
% If all the disks are within the unit disk, the eigenvalues are stable.
|
|
||||||
% \[
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \frac{1}{2} & \frac{1}{2} & 0 \\
|
|
||||||
% \frac{1}{3} & \frac{1}{3} & \frac{1}{3} \\
|
|
||||||
% 0 & \frac{3}{4} & \frac{1}{4}
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \]
|
|
||||||
% \end{lemma}
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem}[Perron-Frobenius] \label{th:perron_frobenius} \marginnote{Perron-Frobenius theorem}
|
|
||||||
Let $\A \in \R^{N \times N}$ with $N \geq 2$ be a non-negative matrix. It holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item There exists a real eigenvalue $\lambda \geq 0$ that is dominant for all the other eigenvalues $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda \geq |\mu|$),
|
|
||||||
\item The right eigenvector $\v \in \R^N$ and left eigenvector $\w \in \R^N$ associated to $\lambda$ can be chosen to be non-negative.
|
|
||||||
\end{itemize}
|
|
||||||
If $\A \in \R^{N \times N}$ is irreducible, then:
|
|
||||||
\begin{itemize}
|
|
||||||
\item The eigenvalue $\lambda$ is strictly positive ($\lambda > 0$) and simple.
|
|
||||||
\item The right and left eigenvalues $\v$ and $\w$ associated to $\lambda$ are unique and positive.
|
|
||||||
\end{itemize}
|
|
||||||
If $\A \in \R^{N \times N}$ is primitive, then:
|
|
||||||
\begin{itemize}
|
|
||||||
\item The eigenvalue $\lambda$ is strictly dominant for all $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda > |\mu|$).
|
|
||||||
\end{itemize}
|
|
||||||
\end{theorem}
|
|
||||||
|
|
||||||
\begin{lemma} \label{th:row_stochastic_unit_disk}
|
|
||||||
Given a row stochastic matrix $\A$, it holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item $\lambda=1$ is an eigenvalue,
|
|
||||||
\item By \hyperref[th:gershgorin]{Gershgorin Theorem}, $\text{spec}(\A)$ is a subset of the unit disk (i.e., all Gershgorin disks lie inside the unit disk).
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\begin{figure}[H]
|
|
||||||
\centering
|
|
||||||
\includegraphics[width=0.2\linewidth]{./img/gershgorin_unit.png}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{corollary}
|
|
||||||
The eigenvalue $\lambda=1 \geq |\mu|$ is dominant.
|
|
||||||
\end{corollary}
|
|
||||||
\end{lemma}
|
|
||||||
|
|
||||||
\begin{lemma}
|
|
||||||
Given a row stochastic and primitive matrix $\A$, by \Cref{th:row_stochastic_unit_disk} and \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} it holds that $\lambda = 1$ is simple and strictly dominant.
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{corollary}
|
|
||||||
The consensus averaging system is marginally stable (i.e., converges but not necessarily to the origin) as the largest distinct eigenvalue is $\lambda = 1$.
|
|
||||||
\end{corollary}
|
|
||||||
\end{lemma}
|
|
||||||
|
|
||||||
|
|
||||||
% \begin{lemma}
|
|
||||||
% \[
|
|
||||||
% \x_\text{eq} = ker(\matr{I} - \A) = \{ \vec{1}\beta \mid \beta \in \R \}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
% \[
|
|
||||||
% \w^T \x^{k+1} = \w^T(\A \x^{k}) = \w^T \x^k
|
|
||||||
% \]
|
|
||||||
% i.e., $\w$ is left eigenvector of $\A$ with $\lambda = 1$.
|
|
||||||
|
|
||||||
% Therefore, the above must be true for:
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% \w^T \x_\text{eq} \\
|
|
||||||
% \w^T \x^{0} \\
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
% \[
|
|
||||||
% \w^T \vec{1}\beta \Rightarrow \beta = \frac{\w^T\x^{0}}{\w^T\vec{1}}
|
|
||||||
% \]
|
|
||||||
% \end{lemma}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem}[Discrete-time consensus] \marginnote{Discrete-time consensus}
|
\begin{theorem}[Discrete-time consensus] \marginnote{Discrete-time consensus}
|
||||||
Consider a discrete-time averaging system with digraph $G$ and weighted adjacency matrix $\matr{A}$. Assume $G$ strongly connected and aperiodic, and $\matr{A}$ row stochastic.
|
Consider a discrete-time averaging system with digraph $G$ and weighted adjacency matrix $\matr{A}$. Assume $G$ strongly connected and aperiodic, and $\matr{A}$ row stochastic.
|
||||||
|
|
||||||
@ -304,122 +139,6 @@
|
|||||||
% \end{gathered}
|
% \end{gathered}
|
||||||
% \]
|
% \]
|
||||||
% \end{proof}
|
% \end{proof}
|
||||||
|
|
||||||
\begin{proof}[Proof (Jordan-form approach)]
|
|
||||||
As is $G$ strongly connected and aperiodic, and $\A$ is row stochastic, it holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item By \Cref{th:positive_matrix_digraph_connected}, $\A$ is primitive.
|
|
||||||
\item By \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} and \Cref{th:row_stochastic_unit_disk}, the eigenvalue $\lambda=1$ is strictly dominant and it is associated to the right eigenvector $\vec{1}$ (row stochasticity) and left eigenvector $\w$.
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
Consider the non-singular matrix $\matr{T} \in \R^{N \times N}$ defined as:
|
|
||||||
\[
|
|
||||||
\matr{T} = \begin{bmatrix}
|
|
||||||
\vert & \vert & & \vert \\
|
|
||||||
\vec{1} & \v^2 & \dots & \v^N \\
|
|
||||||
\vert & \vert & & \vert \\
|
|
||||||
\end{bmatrix} = \begin{bmatrix}
|
|
||||||
\vec{1} & \matr{W}_R
|
|
||||||
\end{bmatrix}
|
|
||||||
\qquad
|
|
||||||
\matr{T}^{-1} = \begin{bmatrix}
|
|
||||||
- & (\w)^T & - \\
|
|
||||||
- & (\w^2)^T & - \\
|
|
||||||
- & \vdots & - \\
|
|
||||||
- & (\w^N)^T & - \\
|
|
||||||
\end{bmatrix} = \begin{bmatrix}
|
|
||||||
\w^T \\ \matr{W}_L
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
|
|
||||||
A change in coordinates defined as:
|
|
||||||
\[
|
|
||||||
\x \mapsto \tilde{\x} = \matr{T}^{-1} \x
|
|
||||||
\]
|
|
||||||
allows to obtain the Jordan form $\matr{T}^{-1}\A\matr{T}$:
|
|
||||||
\[
|
|
||||||
\matr{T}^{-1}\A\matr{T} = \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
with $\matr{J}_2 \in \mathbb{R}^{(N-1) \times (N-1)}$ Schur (i.e., $\text{spec}(\matr{J}_2)$ inside the open unit disk).
|
|
||||||
|
|
||||||
The dynamics $\x^{k+1} = \A \x^k$ in the new coordinate system is:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\tilde{\x}^{k+1} &= \matr{T}^{-1} \x^{k+1} = \matr{T}^{-1} \A \matr{T} \tilde{\x}^k \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix} \tilde{\x}^k
|
|
||||||
= \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix}^{k+1} \tilde{\x}^0
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
Let's denote:
|
|
||||||
\[
|
|
||||||
\tilde{\x}^k = \matr{T}^{-1}\x^k = \begin{bmatrix}
|
|
||||||
\w^T\x^k \\ \matr{W}_L\x^k
|
|
||||||
\end{bmatrix}
|
|
||||||
= \begin{bmatrix}
|
|
||||||
\tilde{\x}^k_{m} \\ \tilde{\x}^k_{\bot}
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
We have that:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\lim_{k \rightarrow \infty} \tilde{\x}^k
|
|
||||||
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix}^k \tilde{\x}^0 \\
|
|
||||||
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & (\matr{J}_2)^k \\
|
|
||||||
\end{bmatrix} \begin{bmatrix}
|
|
||||||
\tilde{\x}^0_{m} \\ \tilde{\x}^0_{\bot}
|
|
||||||
\end{bmatrix} \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
1 \cdot \tilde{\x}^0_{m} \\
|
|
||||||
\lim_{k \rightarrow \infty} (\matr{J}_2)^k \tilde{\x}^0_{\bot}
|
|
||||||
\end{bmatrix} \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
\w^T \x^0 \\
|
|
||||||
0
|
|
||||||
\end{bmatrix} \\
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
Note that $\lim_{k \rightarrow \infty} \matr{J}_2^k = 0$ as it is stable (i.e., all eigenvalues are in the open unit disk $|\mu| < 1$).
|
|
||||||
|
|
||||||
In the original coordinate system, the limit is:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\lim_{k \rightarrow \infty} \x^k
|
|
||||||
&= \lim_{k \rightarrow \infty} \matr{T} \tilde{\x}^k \\
|
|
||||||
&= \matr{T} \lim_{k \rightarrow \infty} \tilde{\x}^k \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
\vec{1} & \matr{W}_R
|
|
||||||
\end{bmatrix} \begin{bmatrix}
|
|
||||||
\w^T \x^0 \\
|
|
||||||
0
|
|
||||||
\end{bmatrix}
|
|
||||||
= \vec{1} (\w^T \x^0)
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{remark}
|
|
||||||
It is assumed that $\Vert \w \Vert = 1$ (i.e., no normalization term).
|
|
||||||
\end{remark}
|
|
||||||
\end{proof}
|
|
||||||
\end{theorem}
|
\end{theorem}
|
||||||
|
|
||||||
\begin{example}[Metropolis-Hasting weights]
|
\begin{example}[Metropolis-Hasting weights]
|
||||||
@ -435,45 +154,6 @@
|
|||||||
\end{example}
|
\end{example}
|
||||||
|
|
||||||
|
|
||||||
% \begin{proof}[Lyapunov approach]
|
|
||||||
% $\A - \vec{1}\w^T$ is rank-1. This is to change one specific eigenvalue (move 1 to 0).
|
|
||||||
|
|
||||||
% Dissensus vector represents error:
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% delta^{k+1}
|
|
||||||
% = \x^{k+1} - \vec{1}\w^T \x^0 \\
|
|
||||||
% = \x^{k+1} - \vec{1}\w^T \x^{k+1} \\
|
|
||||||
% = (\matr{I} - \vec{1}\w^T) \x^{k+1} \\
|
|
||||||
% = (\matr{I} - \vec{1}\w^T) \A\x^{k} \\
|
|
||||||
% = (\A - \vec{1}\w^T) \x^{k} \\
|
|
||||||
% = (\A - \vec{1}\w^T) \delta^{k} \\
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
% Study:
|
|
||||||
% \[
|
|
||||||
% \delta^{k+1} = (\A - \vec{1}\w^T) \delta{k}
|
|
||||||
% \]
|
|
||||||
% If $\delta^k \rightarrow 0$, then $\x^k \rightarrow\vec{1}\w^T\x^0$.
|
|
||||||
% Note $(\A - \vec{1}\w^T)$ is Schur.
|
|
||||||
|
|
||||||
% Lyapunov equation for discrete time systems:
|
|
||||||
% \[
|
|
||||||
% \bar{\A}^T \matr{P} \bar{\A} = - \matr{P} = - \matr{Q}
|
|
||||||
% \]
|
|
||||||
% where $\bar{\A}$ is the Jordan-form of $(\A - \vec{1}\w^T)$
|
|
||||||
|
|
||||||
% Select $Q_2$ to be block-diagonal and $p_1$
|
|
||||||
|
|
||||||
|
|
||||||
% \[
|
|
||||||
% V(\delta) = \delta^T (\matr{T}^{-1})^T \matr{P} \matr{T}^{-1} \delta
|
|
||||||
% \]
|
|
||||||
% \end{proof}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\section{Discrete-time averaging algorithm over time-varying graphs}
|
\section{Discrete-time averaging algorithm over time-varying graphs}
|
||||||
|
|
||||||
@ -658,3 +338,376 @@
|
|||||||
\begin{remark}
|
\begin{remark}
|
||||||
The result also holds for unweighted digraphs as $\vec{1}$ is both a left and right eigenvector of $\matr{L}$.
|
The result also holds for unweighted digraphs as $\vec{1}$ is both a left and right eigenvector of $\matr{L}$.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{subappendices}
|
||||||
|
|
||||||
|
\section{Appendix: Discrete time averaging system consensus proof}
|
||||||
|
|
||||||
|
% \begin{remark}
|
||||||
|
% The distributed consensus algorithm is a positive system (i.e., $\matr{A}$ is positive).
|
||||||
|
% \end{remark}
|
||||||
|
|
||||||
|
\begin{description}
|
||||||
|
\item[Positive matrix characterization]
|
||||||
|
Given $\A \in \mathbb{R}^{N \times N}$, it can be:
|
||||||
|
\begin{description}
|
||||||
|
\item[Non-negative] \marginnote{Non-negative matrix}
|
||||||
|
$\A \geq 0$.
|
||||||
|
\item[Irreducible] \marginnote{Irreducible matrix}
|
||||||
|
$\sum_{h=0}^{N-1} \A^h > 0$.
|
||||||
|
\item[Primitive] \marginnote{Primitive matrix}
|
||||||
|
$\exists h \in \{ 1, \dots, N \}: A^h > 0$.
|
||||||
|
% \begin{remark}
|
||||||
|
% A graph with a primitive adjacency matrix is connected.
|
||||||
|
% \end{remark}
|
||||||
|
\item[Positive] \marginnote{Positive matrix}
|
||||||
|
$\A > 0$.
|
||||||
|
\end{description}
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
% Equilibrium:
|
||||||
|
% \[
|
||||||
|
% \x^{k+1} = \matr{A}\x^k
|
||||||
|
% \]
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% \x_\text{eq} = \matr{A} \x_\text{eq} \\
|
||||||
|
% \iff (\matr{I} - \matr{A}) \x_\text{eq} = 0
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
% We are interested in the null space of $(\matr{I} - \matr{A})$ (at least one eigenvector).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{theorem} \label{th:positive_matrix_digraph_connected}
|
||||||
|
Given a weighted digraph $G$ with $N \geq 2$ nodes and adjacency matrix $\A$, it holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\A$ is irreducible $\iff$ $G$ is strongly connected.
|
||||||
|
\item $\A$ is primitive $\iff$ $G$ is strongly connected and aperiodic.
|
||||||
|
\end{itemize}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{theorem}[Gershgorin] \label{th:gershgorin} \marginnote{Gershgorin theorem}
|
||||||
|
For any square matrix $\A \in \mathbb{C}^{N \times N}$, it holds that the spectrum of $\A$ (i.e., set of eigenvalues) is contained in the Gershgorin disks:
|
||||||
|
\[
|
||||||
|
\text{spec}(\A) \subset \bigcup_{i=1}^{N} \left\{ s \in \mathbb{C} \,\,\bigg|\,\, |s - a_{ii}| \leq \sum_{j=1, j \neq i}^{N} |a_{ij}| \right\}
|
||||||
|
\]
|
||||||
|
In other words, it is the union of the disks with center $a_{ii}$ and radius $\sum_{j=1, j \neq i}^{N} |a_{ij}|$.
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{remark}
|
||||||
|
This theorem provides an approximate location of the eigenvalues.
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{example}
|
||||||
|
Consider the matrix:
|
||||||
|
\[
|
||||||
|
\begin{bmatrix}
|
||||||
|
10 & 1 & 0 & 1 \\
|
||||||
|
0.2 & 8 & 0.2 & 0.2 \\
|
||||||
|
1 & 1 & 2 & 1 \\
|
||||||
|
-1 & -1 & -1 & -11
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
|
||||||
|
Its eigenvalues are $\{ -10.870, 1.906, 7.918, 10.046 \}$.
|
||||||
|
|
||||||
|
The Gershgorin disks are:
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.4\linewidth]{./img/gershgorin.png}
|
||||||
|
\end{figure}
|
||||||
|
\end{example}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
% \begin{lemma}
|
||||||
|
% If all the disks are within the unit disk, the eigenvalues are stable.
|
||||||
|
% \[
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \frac{1}{2} & \frac{1}{2} & 0 \\
|
||||||
|
% \frac{1}{3} & \frac{1}{3} & \frac{1}{3} \\
|
||||||
|
% 0 & \frac{3}{4} & \frac{1}{4}
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \]
|
||||||
|
% \end{lemma}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{theorem}[Perron-Frobenius] \label{th:perron_frobenius} \marginnote{Perron-Frobenius theorem}
|
||||||
|
Let $\A \in \R^{N \times N}$ with $N \geq 2$ be a non-negative matrix. It holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item There exists a real eigenvalue $\lambda \geq 0$ that is dominant for all the other eigenvalues $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda \geq |\mu|$),
|
||||||
|
\item The right eigenvector $\v \in \R^N$ and left eigenvector $\w \in \R^N$ associated to $\lambda$ can be chosen to be non-negative.
|
||||||
|
\end{itemize}
|
||||||
|
If $\A \in \R^{N \times N}$ is irreducible, then:
|
||||||
|
\begin{itemize}
|
||||||
|
\item The eigenvalue $\lambda$ is strictly positive ($\lambda > 0$) and simple.
|
||||||
|
\item The right and left eigenvalues $\v$ and $\w$ associated to $\lambda$ are unique and positive.
|
||||||
|
\end{itemize}
|
||||||
|
If $\A \in \R^{N \times N}$ is primitive, then:
|
||||||
|
\begin{itemize}
|
||||||
|
\item The eigenvalue $\lambda$ is strictly dominant for all $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda > |\mu|$).
|
||||||
|
\end{itemize}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\begin{lemma} \label{th:row_stochastic_unit_disk}
|
||||||
|
Given a row stochastic matrix $\A$, it holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\lambda=1$ is an eigenvalue,
|
||||||
|
\item By \hyperref[th:gershgorin]{Gershgorin Theorem}, $\text{spec}(\A)$ is a subset of the unit disk (i.e., all Gershgorin disks lie inside the unit disk).
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.2\linewidth]{./img/gershgorin_unit.png}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{corollary}
|
||||||
|
The eigenvalue $\lambda=1 \geq |\mu|$ is dominant.
|
||||||
|
\end{corollary}
|
||||||
|
\end{lemma}
|
||||||
|
|
||||||
|
\begin{lemma}
|
||||||
|
Given a row stochastic and primitive matrix $\A$, by \Cref{th:row_stochastic_unit_disk} and \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} it holds that $\lambda = 1$ is simple and strictly dominant.
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{corollary}
|
||||||
|
The consensus averaging system is marginally stable (i.e., converges but not necessarily to the origin) as the largest distinct eigenvalue is $\lambda = 1$.
|
||||||
|
\end{corollary}
|
||||||
|
\end{lemma}
|
||||||
|
|
||||||
|
|
||||||
|
% \begin{lemma}
|
||||||
|
% \[
|
||||||
|
% \x_\text{eq} = ker(\matr{I} - \A) = \{ \vec{1}\beta \mid \beta \in \R \}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
% \[
|
||||||
|
% \w^T \x^{k+1} = \w^T(\A \x^{k}) = \w^T \x^k
|
||||||
|
% \]
|
||||||
|
% i.e., $\w$ is left eigenvector of $\A$ with $\lambda = 1$.
|
||||||
|
|
||||||
|
% Therefore, the above must be true for:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% \w^T \x_\text{eq} \\
|
||||||
|
% \w^T \x^{0} \\
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
% \[
|
||||||
|
% \w^T \vec{1}\beta \Rightarrow \beta = \frac{\w^T\x^{0}}{\w^T\vec{1}}
|
||||||
|
% \]
|
||||||
|
% \end{lemma}
|
||||||
|
|
||||||
|
\begin{theorem}[Discrete-time consensus] \marginnote{Discrete-time consensus}
|
||||||
|
Consider a discrete-time averaging system with digraph $G$ and weighted adjacency matrix $\matr{A}$. Assume $G$ strongly connected and aperiodic, and $\matr{A}$ row stochastic.
|
||||||
|
|
||||||
|
It holds that there exists a left eigenvector $\vec{w} \in \mathbb{R}^N$, $\vec{w} > 0$ such that the consensus converges to:
|
||||||
|
\[
|
||||||
|
\lim_{k \rightarrow \infty} \vec{x}^k
|
||||||
|
= \vec{1}\frac{\vec{w}^T \vec{x}^0}{\vec{w}^T\vec{1}}
|
||||||
|
= \begin{bmatrix} 1 \\ \vdots \\ 1 \end{bmatrix} \frac{\sum_{i=1}^N w_i x_i^0}{\sum_{j=1}^N w_j}
|
||||||
|
= \begin{bmatrix} 1 \\ \vdots \\ 1 \end{bmatrix} \sum_{i=1}^N \frac{w_i}{\sum_{j=1}^N w_j} x_i^0
|
||||||
|
\]
|
||||||
|
where $\tilde{w}_i = \frac{w_i}{\sum_{i=j}^N w_j}$ are all normalized and sum to 1 (i.e., they produce a convex combination).
|
||||||
|
|
||||||
|
Moreover, if $\matr{A}$ is doubly stochastic, then it holds that the consensus is the average as $\vec{w} = 1$:
|
||||||
|
\[
|
||||||
|
\lim_{k \rightarrow \infty} \vec{x}^k = \vec{1} \frac{1}{N} \sum_{i=1}^N x_i^0
|
||||||
|
\]
|
||||||
|
|
||||||
|
% \begin{proof}[Sketch of proof]
|
||||||
|
% Let $\matr{T} = \begin{bmatrix} \vec{1} & \vec{v}^2 & \cdots & \vec{v}^N \end{bmatrix}$ be a change in coordinates that transforms an adjacency matrix into its Jordan form $\matr{J}$:
|
||||||
|
% \[ \matr{J} = \matr{T}^{-1} \matr{A} \matr{T} \]
|
||||||
|
% As $\lambda=1$ is a simple eigenvalue (\Cref{th:strongly_connected_eigenvalues}), it holds that:
|
||||||
|
% \[
|
||||||
|
% \matr{J} = \begin{bmatrix}
|
||||||
|
% 1 & 0 & \cdots & 0 \\
|
||||||
|
% 0 & & & \\
|
||||||
|
% \vdots & & \matr{J}_2 & \\
|
||||||
|
% 0 & & & \\
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \]
|
||||||
|
% where the eigenvalues of $\matr{J}_2 \in \mathbb{R}^{(N-1) \times (N-1)}$ lie inside the open unit disk.
|
||||||
|
|
||||||
|
% Let $\vec{x}^k = \matr{T}\bar{\vec{x}}^k$, then we have that:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% &\vec{x}^{k+1} = \matr{A} \vec{x}^{k} \\
|
||||||
|
% &\iff \matr{T} \bar{\vec{x}}^{k+1} = \matr{A} (\matr{T} \bar{\vec{x}}^k) \\
|
||||||
|
% &\iff \bar{\vec{x}}^{k+1} = \matr{T}^{-1} \matr{A} (\matr{T} \bar{\vec{x}}^k) = \matr{J}\bar{\vec{x}}^k
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
% Therefore:
|
||||||
|
% \[
|
||||||
|
% \begin{gathered}
|
||||||
|
% \lim_{k \rightarrow \infty} \bar{\vec{x}}^k = \bar{x}_1^0 \begin{bmatrix} 1 \\ 0 \\ \vdots \\ 0 \end{bmatrix} \\
|
||||||
|
% \bar{x}_1^{k+1} = \bar{x}_1^k \quad \forall k \geq 0 \\
|
||||||
|
% \lim_{k \rightarrow \infty} \bar{x}_i^{k} = 0 \quad \forall i = 2, \dots, N \\
|
||||||
|
% \end{gathered}
|
||||||
|
% \]
|
||||||
|
% \end{proof}
|
||||||
|
|
||||||
|
\begin{proof}[Proof (Jordan-form approach)]
|
||||||
|
As is $G$ strongly connected and aperiodic, and $\A$ is row stochastic, it holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item By \Cref{th:positive_matrix_digraph_connected}, $\A$ is primitive.
|
||||||
|
\item By \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} and \Cref{th:row_stochastic_unit_disk}, the eigenvalue $\lambda=1$ is strictly dominant and it is associated to the right eigenvector $\vec{1}$ (row stochasticity) and left eigenvector $\w$.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Consider the non-singular matrix $\matr{T} \in \R^{N \times N}$ defined as:
|
||||||
|
\[
|
||||||
|
\matr{T} = \begin{bmatrix}
|
||||||
|
\vert & \vert & & \vert \\
|
||||||
|
\vec{1} & \v^2 & \dots & \v^N \\
|
||||||
|
\vert & \vert & & \vert \\
|
||||||
|
\end{bmatrix} = \begin{bmatrix}
|
||||||
|
\vec{1} & \matr{W}_R
|
||||||
|
\end{bmatrix}
|
||||||
|
\qquad
|
||||||
|
\matr{T}^{-1} = \begin{bmatrix}
|
||||||
|
- & (\w)^T & - \\
|
||||||
|
- & (\w^2)^T & - \\
|
||||||
|
- & \vdots & - \\
|
||||||
|
- & (\w^N)^T & - \\
|
||||||
|
\end{bmatrix} = \begin{bmatrix}
|
||||||
|
\w^T \\ \matr{W}_L
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
|
||||||
|
A change in coordinates defined as:
|
||||||
|
\[
|
||||||
|
\x \mapsto \tilde{\x} = \matr{T}^{-1} \x
|
||||||
|
\]
|
||||||
|
allows to obtain the Jordan form $\matr{T}^{-1}\A\matr{T}$:
|
||||||
|
\[
|
||||||
|
\matr{T}^{-1}\A\matr{T} = \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
with $\matr{J}_2 \in \mathbb{R}^{(N-1) \times (N-1)}$ Schur (i.e., $\text{spec}(\matr{J}_2)$ inside the open unit disk).
|
||||||
|
|
||||||
|
The dynamics $\x^{k+1} = \A \x^k$ in the new coordinate system is:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\tilde{\x}^{k+1} &= \matr{T}^{-1} \x^{k+1} = \matr{T}^{-1} \A \matr{T} \tilde{\x}^k \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix} \tilde{\x}^k
|
||||||
|
= \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix}^{k+1} \tilde{\x}^0
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
Let's denote:
|
||||||
|
\[
|
||||||
|
\tilde{\x}^k = \matr{T}^{-1}\x^k = \begin{bmatrix}
|
||||||
|
\w^T\x^k \\ \matr{W}_L\x^k
|
||||||
|
\end{bmatrix}
|
||||||
|
= \begin{bmatrix}
|
||||||
|
\tilde{\x}^k_{m} \\ \tilde{\x}^k_{\bot}
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
We have that:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\lim_{k \rightarrow \infty} \tilde{\x}^k
|
||||||
|
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix}^k \tilde{\x}^0 \\
|
||||||
|
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & (\matr{J}_2)^k \\
|
||||||
|
\end{bmatrix} \begin{bmatrix}
|
||||||
|
\tilde{\x}^0_{m} \\ \tilde{\x}^0_{\bot}
|
||||||
|
\end{bmatrix} \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
1 \cdot \tilde{\x}^0_{m} \\
|
||||||
|
\lim_{k \rightarrow \infty} (\matr{J}_2)^k \tilde{\x}^0_{\bot}
|
||||||
|
\end{bmatrix} \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
\w^T \x^0 \\
|
||||||
|
0
|
||||||
|
\end{bmatrix} \\
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
Note that $\lim_{k \rightarrow \infty} \matr{J}_2^k = 0$ as it is stable (i.e., all eigenvalues are in the open unit disk $|\mu| < 1$).
|
||||||
|
|
||||||
|
In the original coordinate system, the limit is:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\lim_{k \rightarrow \infty} \x^k
|
||||||
|
&= \lim_{k \rightarrow \infty} \matr{T} \tilde{\x}^k \\
|
||||||
|
&= \matr{T} \lim_{k \rightarrow \infty} \tilde{\x}^k \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
\vec{1} & \matr{W}_R
|
||||||
|
\end{bmatrix} \begin{bmatrix}
|
||||||
|
\w^T \x^0 \\
|
||||||
|
0
|
||||||
|
\end{bmatrix}
|
||||||
|
= \vec{1} (\w^T \x^0)
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{remark}
|
||||||
|
It is assumed that $\Vert \w \Vert = 1$ (i.e., no normalization term).
|
||||||
|
\end{remark}
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
|
% \begin{proof}[Lyapunov approach]
|
||||||
|
% $\A - \vec{1}\w^T$ is rank-1. This is to change one specific eigenvalue (move 1 to 0).
|
||||||
|
|
||||||
|
% Dissensus vector represents error:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% delta^{k+1}
|
||||||
|
% = \x^{k+1} - \vec{1}\w^T \x^0 \\
|
||||||
|
% = \x^{k+1} - \vec{1}\w^T \x^{k+1} \\
|
||||||
|
% = (\matr{I} - \vec{1}\w^T) \x^{k+1} \\
|
||||||
|
% = (\matr{I} - \vec{1}\w^T) \A\x^{k} \\
|
||||||
|
% = (\A - \vec{1}\w^T) \x^{k} \\
|
||||||
|
% = (\A - \vec{1}\w^T) \delta^{k} \\
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
% Study:
|
||||||
|
% \[
|
||||||
|
% \delta^{k+1} = (\A - \vec{1}\w^T) \delta{k}
|
||||||
|
% \]
|
||||||
|
% If $\delta^k \rightarrow 0$, then $\x^k \rightarrow\vec{1}\w^T\x^0$.
|
||||||
|
% Note $(\A - \vec{1}\w^T)$ is Schur.
|
||||||
|
|
||||||
|
% Lyapunov equation for discrete time systems:
|
||||||
|
% \[
|
||||||
|
% \bar{\A}^T \matr{P} \bar{\A} = - \matr{P} = - \matr{Q}
|
||||||
|
% \]
|
||||||
|
% where $\bar{\A}$ is the Jordan-form of $(\A - \vec{1}\w^T)$
|
||||||
|
|
||||||
|
% Select $Q_2$ to be block-diagonal and $p_1$
|
||||||
|
|
||||||
|
|
||||||
|
% \[
|
||||||
|
% V(\delta) = \delta^T (\matr{T}^{-1})^T \matr{P} \matr{T}^{-1} \delta
|
||||||
|
% \]
|
||||||
|
% \end{proof}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\end{subappendices}
|
||||||
@ -102,7 +102,7 @@
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item An estimate $\z_i^k$ of its optimal position $\z_i^*$,
|
\item An estimate $\z_i^k$ of its optimal position $\z_i^*$,
|
||||||
\item An estimate $\s_i^k$ of the aggregation function $\sigma(\z^k) = \frac{1}{N} \sum_{j=1}^{N} \phi_j(\z_j^k)$,
|
\item An estimate $\s_i^k$ of the aggregation function $\sigma(\z^k) = \frac{1}{N} \sum_{j=1}^{N} \phi_j(\z_j^k)$,
|
||||||
\item An estimate $\v_i^k$ of the gradient with respect to the second argument of the loss $\sum_{j=1}^{N} \nabla_{[\sigma(\z^k)]} l_j(\z_j^k, \sigma(\z^k))$.
|
\item An estimate $\v_i^k$ of the gradient with respect to the second argument of the loss $\frac{1}{N} \sum_{j=1}^{N} \nabla_{[\sigma(\z^k)]} l_j(\z_j^k, \sigma(\z^k))$.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
The step is based on the centralized gradient method using the local estimates:
|
The step is based on the centralized gradient method using the local estimates:
|
||||||
|
|||||||
@ -173,7 +173,7 @@
|
|||||||
Use $\x_i$ to approximate $h_i(\u_i)$ and dynamic average consensus for the aggregation function. The dynamics is:
|
Use $\x_i$ to approximate $h_i(\u_i)$ and dynamic average consensus for the aggregation function. The dynamics is:
|
||||||
\[
|
\[
|
||||||
\begin{split}
|
\begin{split}
|
||||||
\dot{\u}_i &= -\delta_1 \nabla h_i(\u_i) \left( \nabla_{[\x_i]} l_i(\x_i, \phi_i(\x_i)+\w_i) + \left( \nabla_{[\phi_i(\x_i)+\w_i]} l_i(\x_i, \phi_i(\x_i)+\w_i) + \v_i \right) \nabla \phi_i(\x_i) \right) \\
|
\dot{\u}_i &= -\delta_1 \nabla h_i(\u_i) \Big( \nabla_{[\x_i]} l_i(\x_i, \phi_i(\x_i)+\w_i) + \left( \nabla_{[\phi_i(\x_i)+\w_i]} l_i(\x_i, \phi_i(\x_i)+\w_i) + \v_i \right) \nabla \phi_i(\x_i) \Big) \\
|
||||||
\delta_2 \dot{\w}_i &= - \sum_{j \in \mathcal{N}_i} a_{ij} (\w_i - \w_j) - \sum_{j \in \mathcal{N}_i} a_{ij} (\phi_i(\x_i) - \phi_i(\x_j)) \\
|
\delta_2 \dot{\w}_i &= - \sum_{j \in \mathcal{N}_i} a_{ij} (\w_i - \w_j) - \sum_{j \in \mathcal{N}_i} a_{ij} (\phi_i(\x_i) - \phi_i(\x_j)) \\
|
||||||
\delta_2 \dot{\v}_i &= - \sum_{j \in \mathcal{N}_i} a_{ij} (\v_i - \v_j) - \sum_{j \in \mathcal{N}_i} a_{ij} (\nabla_{[\phi_i(\x_i)+\w_i]} l_i(\x_i, \phi_i(\x_i)+\w_i) - \nabla_{[\phi_j(\x_j)+\w_j]} l_j(\x_j, \phi_j(\x_j)+\w_j)) \\
|
\delta_2 \dot{\v}_i &= - \sum_{j \in \mathcal{N}_i} a_{ij} (\v_i - \v_j) - \sum_{j \in \mathcal{N}_i} a_{ij} (\nabla_{[\phi_i(\x_i)+\w_i]} l_i(\x_i, \phi_i(\x_i)+\w_i) - \nabla_{[\phi_j(\x_j)+\w_j]} l_j(\x_j, \phi_j(\x_j)+\w_j)) \\
|
||||||
\end{split}
|
\end{split}
|
||||||
|
|||||||
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Mass-spring system] \marginnote{Mass-spring system}
|
\item[Mass-spring system] \marginnote{Mass-spring system}
|
||||||
System of $N$ masses where each mass $i$ has a position $x_i \in \mathbb{R}$ and is connected through a sprint to mass $i-1$ and $i+1$. Each spring has an elastic constant $a_{j, i} = a_{i, j} > 0$.
|
System of $N$ masses where each mass $i$ has a position $x_i \in \mathbb{R}$ and is connected through a spring to mass $i-1$ and $i+1$. Each spring has an elastic constant $a_{j, i} = a_{i, j} > 0$.
|
||||||
|
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
|
|||||||
@ -115,9 +115,9 @@
|
|||||||
\begin{remark}
|
\begin{remark}
|
||||||
It holds that:
|
It holds that:
|
||||||
\[
|
\[
|
||||||
\matr{D}^\text{IN} = \text{diag}(\matr{A}^T \vec{1})
|
\matr{D}^\text{OUT} = \text{diag}(\matr{A}^T \vec{1})
|
||||||
\quad
|
\quad
|
||||||
\matr{D}^\text{OUT} = \text{diag}(\matr{A} \vec{1})
|
\matr{D}^\text{IN} = \text{diag}(\matr{A} \vec{1})
|
||||||
\]
|
\]
|
||||||
where $\vec{1}$ is a vector of ones.
|
where $\vec{1}$ is a vector of ones.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
|
|||||||
@ -611,7 +611,7 @@
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Distributed gradient algorithm] \marginnote{Distributed gradient algorithm}
|
\item[Distributed gradient algorithm] \marginnote{Distributed gradient algorithm}
|
||||||
Method that estimates a (more precise) set of parameters as a weighted sum those of its neighbors' (self-loop included):
|
Method that estimates a (more precise) set of parameters as a weighted sum of those of its neighbors' (self-loop included):
|
||||||
\[
|
\[
|
||||||
\vec{v}_i^{k+1} = \sum_{j \in \mathcal{N}_i} a_{ij} \z_j^k
|
\vec{v}_i^{k+1} = \sum_{j \in \mathcal{N}_i} a_{ij} \z_j^k
|
||||||
\]
|
\]
|
||||||
@ -749,12 +749,33 @@
|
|||||||
\rho \Vert \z_i^{k+1} - \z^* \Vert \leq \rho^k \Vert \z_i^0 - \z^* \Vert
|
\rho \Vert \z_i^{k+1} - \z^* \Vert \leq \rho^k \Vert \z_i^0 - \z^* \Vert
|
||||||
\]
|
\]
|
||||||
|
|
||||||
{
|
|
||||||
\indenttbox
|
\indenttbox
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
It can be shown that gradient tracking also works with non-convex optimization and, under the correct assumptions, converges to a stationary point.
|
It can be shown that gradient tracking also works with non-convex optimization and, under the correct assumptions, converges to a stationary point.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
}
|
\end{theorem}
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{subappendices}
|
||||||
|
|
||||||
|
\section{Appendix: Gradient tracking optimality and stability proof}
|
||||||
|
|
||||||
|
\begin{theorem}[Gradient tracking algorithm optimality] \marginnote{Gradient tracking algorithm optimality}
|
||||||
|
If:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\matr{A}$ is the adjacency matrix of an undirected and connected communication graph $G$ such that it is doubly stochastic and $a_{ij} > 0$.
|
||||||
|
\item Each cost function $l_i$ is $\mu$-strongly convex and its gradient $L$-Lipschitz continuous.
|
||||||
|
\end{itemize}
|
||||||
|
Then, there exists $\alpha^* > 0$ such that, for any choice of the step size $\alpha \in (0, \alpha^*)$, the sequence of local solutions $\{ \z_i^k \}_{k \in \mathbb{N}}$ of each agent generated by the gradient tracking algorithm asymptotically converges to a consensual optimal solution $\z^*$:
|
||||||
|
\[ \lim_{k \rightarrow \infty} \Vert \z_i^k - \z^* \Vert = 0 \]
|
||||||
|
|
||||||
|
Moreover, the convergence rate is linear and stability is exponential:
|
||||||
|
\[
|
||||||
|
\exists \rho \in (0,1): \Vert \z_i^k - \z^* \Vert \leq \rho \Vert \z_i^{k+1} - \z^* \Vert
|
||||||
|
\,\,\land\,\,
|
||||||
|
\rho \Vert \z_i^{k+1} - \z^* \Vert \leq \rho^k \Vert \z_i^0 - \z^* \Vert
|
||||||
|
\]
|
||||||
|
|
||||||
\begin{proof}
|
\begin{proof}
|
||||||
Consider the gradient tracking algorithm written in matrix form:
|
Consider the gradient tracking algorithm written in matrix form:
|
||||||
@ -819,6 +840,24 @@
|
|||||||
Then, it must be that $\z_\text{eq} = \vec{1}\beta_2$ is an optimum with $\beta_2 = z^*$.
|
Then, it must be that $\z_\text{eq} = \vec{1}\beta_2$ is an optimum with $\beta_2 = z^*$.
|
||||||
|
|
||||||
\item[Stability]
|
\item[Stability]
|
||||||
|
Apply the following change in coordinates:
|
||||||
|
\[
|
||||||
|
\begin{bmatrix}
|
||||||
|
\z^k \\ \xi^k
|
||||||
|
\end{bmatrix}
|
||||||
|
\mapsto
|
||||||
|
\begin{bmatrix}
|
||||||
|
\tilde{\z}^k \\ \tilde{\xi}^k
|
||||||
|
\end{bmatrix}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
\z^k - \vec{1}z^* \\
|
||||||
|
\xi^k - \alpha \nabla \vec{l}(\vec{1}z^*)
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
So that the equilibrium of the system is shifted to $0$.
|
||||||
|
|
||||||
|
Then, exploit strong convexity to re-formulate the overall system in such a way that the Lyapunov theorem can be applied to prove exponential stability.
|
||||||
% Change in coordinates to avoid having $\z^{k+1}$ in $\s^{k}$. The (non-linear) transformation is:
|
% Change in coordinates to avoid having $\z^{k+1}$ in $\s^{k}$. The (non-linear) transformation is:
|
||||||
% \[
|
% \[
|
||||||
% \begin{bmatrix}
|
% \begin{bmatrix}
|
||||||
@ -854,7 +893,7 @@
|
|||||||
% \begin{bmatrix}
|
% \begin{bmatrix}
|
||||||
% \z^k \\ \vec{\xi}^k
|
% \z^k \\ \vec{\xi}^k
|
||||||
% \end{bmatrix}
|
% \end{bmatrix}
|
||||||
% - alpha \begin{bmatrix}
|
% - \alpha \begin{bmatrix}
|
||||||
% \matr{I} \\ \A \matr{I}
|
% \matr{I} \\ \A \matr{I}
|
||||||
% \end{bmatrix}
|
% \end{bmatrix}
|
||||||
% \nabla \vec{l}(\z^k)
|
% \nabla \vec{l}(\z^k)
|
||||||
@ -902,7 +941,14 @@
|
|||||||
% \u^k = \nabla \vec{l}(\vec{y}^k)
|
% \u^k = \nabla \vec{l}(\vec{y}^k)
|
||||||
% \end{gathered}
|
% \end{gathered}
|
||||||
% \]
|
% \]
|
||||||
|
|
||||||
|
|
||||||
|
% Take Lyapunov function $V(\tilde{\z}, \xi^k)$ and check whether:
|
||||||
|
% \[
|
||||||
|
% V(\tilde{\z}^{k+1}, \tilde{\xi}^{k+1}) - V(\tilde{\z}^{k}, \tilde{\xi}^{k}) \leq 0
|
||||||
|
% \]
|
||||||
\end{description}
|
\end{description}
|
||||||
\end{proof}
|
\end{proof}
|
||||||
\end{theorem}
|
\end{theorem}
|
||||||
\end{description}
|
|
||||||
|
\end{subappendices}
|
||||||
|
|||||||
@ -9,7 +9,7 @@
|
|||||||
\]
|
\]
|
||||||
with $\x(t) \in \mathbb{R}^n$, $\u(t) \in U \subseteq \mathbb{R}^m$, $f(\x(t)) \in \mathbb{R}^n$, and $g(\x(t)) \in \mathbb{R}^{n \times m}$.
|
with $\x(t) \in \mathbb{R}^n$, $\u(t) \in U \subseteq \mathbb{R}^m$, $f(\x(t)) \in \mathbb{R}^n$, and $g(\x(t)) \in \mathbb{R}^{n \times m}$.
|
||||||
|
|
||||||
$f(\x(t))$ can be seen as the drift of the system and $\u(t)$ a coefficient that controls how much $g(\x(t))$ is injected into $f(\x(t))$.
|
$f(\x(t))$ can be seen as the drift of the system and $\u(t)$ as a coefficient that controls how much $g(\x(t))$ is injected into $f(\x(t))$.
|
||||||
|
|
||||||
The overall system can be interpreted as composed of:
|
The overall system can be interpreted as composed of:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@ -191,7 +191,7 @@
|
|||||||
- &\gamma(V_{ij}^{s}(\x_i, \x_j)) \leq 0 \\
|
- &\gamma(V_{ij}^{s}(\x_i, \x_j)) \leq 0 \\
|
||||||
&\forall j \in \mathcal{N}_i, \forall i \in \{1, \dots, N\} \Big\}
|
&\forall j \in \mathcal{N}_i, \forall i \in \{1, \dots, N\} \Big\}
|
||||||
\end{aligned} \\
|
\end{aligned} \\
|
||||||
= \Big\{ \u \in \mathbb{R}^{dN} \mid -2(\x_i, \x_j)^T \u_i - 2(\x_j-\x_i)^T \u_j - \gamma(V_{ij}^s(\x_i, \x_j)) \leq 0 \,\,\forall j \in \mathcal{N}_i, \forall i \in \{1, \dots, N\} \Big\}
|
= \Big\{ \u \in \mathbb{R}^{dN} \mid -2(\x_i-\x_j)^T \u_i - 2(\x_j-\x_i)^T \u_j - \gamma(V_{ij}^s(\x_i, \x_j)) \leq 0 \,\,\forall j \in \mathcal{N}_i, \forall i \in \{1, \dots, N\} \Big\}
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\]
|
\]
|
||||||
|
|
||||||
@ -208,7 +208,7 @@
|
|||||||
\arg\min_{\u \in \mathbb{R}^N} \sum_{i=1}^{N} \Vert \u_i - \u_i^\text{ref} \Vert^2 \\
|
\arg\min_{\u \in \mathbb{R}^N} \sum_{i=1}^{N} \Vert \u_i - \u_i^\text{ref} \Vert^2 \\
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\text{subject to }
|
\text{subject to }
|
||||||
&-2(\x_i, \x_j)^T \u_i - 2(\x_j-\x_i)^T \u_j - \gamma(V_{ij}^s(\x_i, \x_j)) \leq 0 \\
|
&-2(\x_i-\x_j)^T \u_i - 2(\x_j-\x_i)^T \u_j - \gamma(V_{ij}^s(\x_i, \x_j)) \leq 0 \\
|
||||||
& \Vert \u_i \Vert \leq \u_i^\text{max} \\
|
& \Vert \u_i \Vert \leq \u_i^\text{max} \\
|
||||||
& \forall j \in \mathcal{N}_i, \forall i \in \{ 1, \dots, N \}
|
& \forall j \in \mathcal{N}_i, \forall i \in \{ 1, \dots, N \}
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
@ -217,7 +217,7 @@
|
|||||||
where $\u_i^\text{ref}$ is the reference input of the high level controller and $\u_i^\text{max}$ is the bound.
|
where $\u_i^\text{ref}$ is the reference input of the high level controller and $\u_i^\text{max}$ is the bound.
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
The policy should be computed continuously for each $x_i(t)$.
|
The policy should be computed continuously for each $\x_i(t)$.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
\item[Decentralized safety controller] \marginnote{Decentralized safety controller}
|
\item[Decentralized safety controller] \marginnote{Decentralized safety controller}
|
||||||
@ -255,13 +255,13 @@
|
|||||||
\begin{split}
|
\begin{split}
|
||||||
\dot{\vec{p}}_x &= v \cos(\theta) \\
|
\dot{\vec{p}}_x &= v \cos(\theta) \\
|
||||||
\dot{\vec{p}}_y &= v \sin(\theta) \\
|
\dot{\vec{p}}_y &= v \sin(\theta) \\
|
||||||
\theta &= \omega \\
|
\dot{\theta} &= \omega \\
|
||||||
\end{split}
|
\end{split}
|
||||||
\]
|
\]
|
||||||
where:
|
where:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item $(\vec{p}_x, \vec{p}_y)$ is the position of the center of mass,
|
\item $(\vec{p}_x, \vec{p}_y)$ is the position of the center of mass,
|
||||||
\item $\theta$ is the orientation,
|
\item $\dot{\theta}$ is the orientation,
|
||||||
\item $v$ is the linear velocity,
|
\item $v$ is the linear velocity,
|
||||||
\item $\omega$ is the angular velocity.
|
\item $\omega$ is the angular velocity.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|||||||
@ -10,10 +10,6 @@
|
|||||||
{
|
{
|
||||||
"name": "Ethics module 2",
|
"name": "Ethics module 2",
|
||||||
"path": "module2/ethics2.pdf"
|
"path": "module2/ethics2.pdf"
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Ethics module 3",
|
|
||||||
"path": "module3/ethics3.pdf"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -18,7 +18,7 @@
|
|||||||
AI systems could undermine this right if used for surveillance, profiling, automated assessment, manipulation, or interference.
|
AI systems could undermine this right if used for surveillance, profiling, automated assessment, manipulation, or interference.
|
||||||
|
|
||||||
\item[Right to equality and non-discrimination]
|
\item[Right to equality and non-discrimination]
|
||||||
Digitally disadvantaged individuals might be excluded from accessing AI system or exploited. AI systems themselves can be biased and reproduce existing discriminatory practices.
|
Digitally disadvantaged individuals might be excluded from accessing AI systems or exploited by it. AI systems themselves can be biased and reproduce existing discriminatory practices.
|
||||||
|
|
||||||
\item[Right to privacy]
|
\item[Right to privacy]
|
||||||
Related to the right of a person to make autonomous decisions, and to have control of the data collected and how it is processed.
|
Related to the right of a person to make autonomous decisions, and to have control of the data collected and how it is processed.
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Morality] \marginnote{Morality}
|
\item[Morality] \marginnote{Morality}
|
||||||
There is no widely agreed definition of morality. On a high level, it refers to norms to determine which actions are right and wrong.
|
There is no widely agreed upon definition of morality. On a high-level, it refers to norms to determine which actions are right or wrong.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
@ -48,7 +48,7 @@
|
|||||||
Field that uses scientific techniques to study how people reason and act.
|
Field that uses scientific techniques to study how people reason and act.
|
||||||
|
|
||||||
\item[Meta-ethics] \marginnote{Meta-ethics}
|
\item[Meta-ethics] \marginnote{Meta-ethics}
|
||||||
Field that analysis the language, concepts, and methods of reasoning in normative ethics. Some research questions are ``can ethical judgments be true or false?'', ``does morality correspond to facts in the world?'', \dots
|
Field that analyzes the language, concepts, and methods of reasoning in normative ethics. Some research questions are ``can ethical judgments be true or false?'', ``does morality correspond to facts in the world?'', \dots
|
||||||
\end{description}
|
\end{description}
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
@ -133,7 +133,7 @@
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item It is too demanding on the individuals as it requires constant self-sacrifice.
|
\item It is too demanding on the individuals as it requires constant self-sacrifice.
|
||||||
\item It does not provide a decision procedure or a way to assess decisions.
|
\item It does not provide a decision procedure or a way to assess decisions.
|
||||||
\item It has no room for impartiality (i.e., a family member is as important as a stranger).
|
\item It has no room for partiality (i.e., a family member is as important as a stranger).
|
||||||
\item If the majority of society is against a minority group, unjust actions against the minority increases the overall world's well-being.
|
\item If the majority of society is against a minority group, unjust actions against the minority increases the overall world's well-being.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{remark}
|
\end{remark}
|
||||||
@ -239,7 +239,7 @@
|
|||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
\item[Categorical imperatives] \marginnote{Categorical imperatives}
|
\item[Categorical imperatives] \marginnote{Categorical imperatives}
|
||||||
Imperatives that do not depend on a single individual but are applicable to every rational beings. Categorical imperatives command to do things that one might want or not want to do. Disregarding them makes one irrational.
|
Imperatives that do not depend on a single individual but are applicable to every rational being. Categorical imperatives command to do things that one might want or not want to do. Disregarding them makes one irrational.
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
According to Kant's \textit{argument for the irrationality of immorality}, moral duties are categorical imperatives:
|
According to Kant's \textit{argument for the irrationality of immorality}, moral duties are categorical imperatives:
|
||||||
@ -293,7 +293,7 @@
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Proceduralism] \marginnote{Proceduralism}
|
\item[Proceduralism] \marginnote{Proceduralism}
|
||||||
Approach to ethics that does not start by make assumptions on any basic moral views but rather follows a procedure to show that they are morally right.
|
Approach to ethics that does not start by making assumptions on any basic moral views but rather follows a procedure to show that they are morally right.
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
The golden rule, rule consequentialism, Kant's principle of universalizability are all instances of proceduralism.
|
The golden rule, rule consequentialism, Kant's principle of universalizability are all instances of proceduralism.
|
||||||
@ -316,17 +316,17 @@
|
|||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
\item[Contractarianism (moral)] \marginnote{Contractarianism (moral)}
|
\item[Contractarianism (moral)] \marginnote{Contractarianism (moral)}
|
||||||
Ethical theory which states that actions are morally right if and only if they would be accepted by free, equal, and rational people, on the condition that everyone obey to these rules.
|
Ethical theory which states that actions are morally right if and only if they would be accepted by free, equal, and rational people, on the condition that everyone obeys to these rules.
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Prisoner's dilemma] \marginnote{Prisoner's dilemma}
|
\item[Prisoner's dilemma] \marginnote{Prisoner's dilemma}
|
||||||
Situation where the best outcome would be obtained if everyone stops pursuing their self-interest.
|
Situation where the best outcome would be obtained if everyone stops pursuing its self-interest.
|
||||||
|
|
||||||
\begin{table}[H]
|
\begin{table}[H]
|
||||||
\caption{
|
\caption{
|
||||||
\parbox[t]{0.7\linewidth}{
|
\parbox[t]{0.7\linewidth}{
|
||||||
Scenario that the dilemma takes inspiration from: two prisoners are interrogated separately, they can either stay silent (cooperate) or snitch the other (betray). The numbers are the years in prison each of them would get.
|
Scenario that the dilemma takes inspiration from: two prisoners are interrogated separately; they can either stay silent (cooperate) or snitch the other (betray). The numbers are the years in prison each of them would get.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\centering
|
\centering
|
||||||
@ -354,7 +354,7 @@
|
|||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Contractarianism characteristics] \phantom{}
|
\item[Contractarianism characteristics] \phantom{}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Morality is a social phenomenon: moral rules are basically rules of cooperation. There are no self-regarding moral duties, so any action that do not have bearing on others is morally right.
|
\item Morality is a social phenomenon: moral rules are basically rules of cooperation. There are no self-regarding moral duties, so any action that does not have bearing on others is morally right.
|
||||||
\item Basic moral rules are justified.
|
\item Basic moral rules are justified.
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
\item[Veil of ignorance] \marginnote{Veil of ignorance}
|
\item[Veil of ignorance] \marginnote{Veil of ignorance}
|
||||||
@ -363,7 +363,7 @@
|
|||||||
\item Each person will prioritize basic liberties, which will match those of everyone.
|
\item Each person will prioritize basic liberties, which will match those of everyone.
|
||||||
\item Social and economic inequalities are allowed if everyone has equal access to those positions and the benefits should be aimed to the least advantaged members of society.
|
\item Social and economic inequalities are allowed if everyone has equal access to those positions and the benefits should be aimed to the least advantaged members of society.
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
Overall, what will be selected is going match the basic moral rules.
|
Overall, what will be selected is going to match the basic moral rules.
|
||||||
\end{descriptionlist}
|
\end{descriptionlist}
|
||||||
\item There is a procedure to determine if an action is right or wrong: ask whether free, equal, and rational people would agree to rules that allow that action.
|
\item There is a procedure to determine if an action is right or wrong: ask whether free, equal, and rational people would agree to rules that allow that action.
|
||||||
\item Contractarianism justifies the origin of morality as originated from the same society we live in, but in a more rational and free version.
|
\item Contractarianism justifies the origin of morality as originated from the same society we live in, but in a more rational and free version.
|
||||||
|
|||||||
@ -28,7 +28,7 @@
|
|||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
\subsection{Opportunities and risks of AI for society}
|
\subsection{Opportunities and risks of AI on society}
|
||||||
|
|
||||||
This chapter identifies four opportunity-risk points of AI systems:
|
This chapter identifies four opportunity-risk points of AI systems:
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
@ -54,7 +54,6 @@ This chapter identifies four opportunity-risk points of AI systems:
|
|||||||
\subsection{Unified framework of principles for AI in society}
|
\subsection{Unified framework of principles for AI in society}
|
||||||
|
|
||||||
This chapter groups and presents the common principles used by different organizations and initiatives.
|
This chapter groups and presents the common principles used by different organizations and initiatives.
|
||||||
|
|
||||||
Most of them overlap with the principles of bioethics:
|
Most of them overlap with the principles of bioethics:
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
\item[Beneficence] \marginnote{Beneficence}
|
\item[Beneficence] \marginnote{Beneficence}
|
||||||
@ -84,7 +83,7 @@ This chapter presents 20 action points of four types:
|
|||||||
\item[Assessment] \phantom{}
|
\item[Assessment] \phantom{}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Assess the capabilities of existing institutions in dealing with harms caused by AI systems.
|
\item Assess the capabilities of existing institutions in dealing with harms caused by AI systems.
|
||||||
\item Assess which task should not be delegated to AI systems.
|
\item Assess which tasks should not be delegated to AI systems.
|
||||||
\item Assess whether current regulations are sufficiently grounded in ethics.
|
\item Assess whether current regulations are sufficiently grounded in ethics.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
@ -148,7 +147,7 @@ This chapter presents 20 action points of four types:
|
|||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
\item[Ethical] \marginnote{Ethical}
|
\item[Ethical] \marginnote{Ethical}
|
||||||
AI must be in line with ethical principles and values (i.e., moral AI) for which laws might be lacking or unsuited for the purpose.
|
AI must be in line with ethical principles and values (i.e., moral AI) for which laws might be lacking or be unsuited for the purpose.
|
||||||
|
|
||||||
\item[Robust] \marginnote{Robust}
|
\item[Robust] \marginnote{Robust}
|
||||||
AI must be technically and socially robust in order to minimize intentional or unintentional harm.
|
AI must be technically and socially robust in order to minimize intentional or unintentional harm.
|
||||||
@ -217,10 +216,10 @@ The concept of AI ethics presented in the framework is rooted to the fundamental
|
|||||||
Seen as legally enforceable rights, fundamental rights can be considered as part of the \textsc{lawful} AI component. Seen as the rights of everyone, from a moral status, they fall within the \textsc{ethical} AI component.
|
Seen as legally enforceable rights, fundamental rights can be considered as part of the \textsc{lawful} AI component. Seen as the rights of everyone, from a moral status, they fall within the \textsc{ethical} AI component.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
This chapter describes four ethical principle for trustworthy AI based on fundamental rights:
|
This chapter describes four ethical principles for trustworthy AI based on fundamental rights:
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
\item[Principle of respect for human autonomy] \marginnote{Principle of respect for human autonomy}
|
\item[Principle of respect for human autonomy] \marginnote{Principle of respect for human autonomy}
|
||||||
AI users should keep full self-determination. AI systems should be human-centric leaving room for human choices and they should not manipulate them.
|
AI users should keep full self-determination. AI systems should be human-centric leaving room for human choices and be without manipulation.
|
||||||
|
|
||||||
% AI should empower individuals and not control and restrict freedom. Vulnerable groups need extra protection.
|
% AI should empower individuals and not control and restrict freedom. Vulnerable groups need extra protection.
|
||||||
|
|
||||||
@ -284,7 +283,7 @@ The main requirements the framework defines are:
|
|||||||
\item Possible unintended uses or abuse of the system should be taken into account and mitigated.
|
\item Possible unintended uses or abuse of the system should be taken into account and mitigated.
|
||||||
\item There should be fallback plans in case of problems (e.g., switching from a statistical to a rule-based algorithm, asking a human, \dots).
|
\item There should be fallback plans in case of problems (e.g., switching from a statistical to a rule-based algorithm, asking a human, \dots).
|
||||||
\item There should be an explicit evaluation process to assess the accuracy of the AI system and determine its error rate.
|
\item There should be an explicit evaluation process to assess the accuracy of the AI system and determine its error rate.
|
||||||
\item The output of an AI system should be reliable (robust to a wide range of inputs) and reproducible.
|
\item The output of an AI system should be reliable (i.e., robust to a wide range of inputs) and reproducible.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\item[Privacy and data governance] \marginnote{Privacy and data governance}
|
\item[Privacy and data governance] \marginnote{Privacy and data governance}
|
||||||
@ -326,7 +325,7 @@ The main requirements the framework defines are:
|
|||||||
The impact of AI systems should also consider society in general and the environment (principles of fairness and prevention of harm):
|
The impact of AI systems should also consider society in general and the environment (principles of fairness and prevention of harm):
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item The environmental impact of the lifecycle of an AI system should be assessed.
|
\item The environmental impact of the lifecycle of an AI system should be assessed.
|
||||||
\item The effects of AI systems on people's physical and mental well-being, as well as institutions, democracy, and society should be assessed and monitored.
|
\item The effects of AI systems on people's physical and mental well-being, as well as on institutions, democracy, and society should be assessed and monitored.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
% AI should not have negative impacts on the society and environment.
|
% AI should not have negative impacts on the society and environment.
|
||||||
|
|
||||||
@ -379,7 +378,7 @@ The chapter also describes some technical and non-technical methods to ensure tr
|
|||||||
Organizations should appoint a person or a board for decisions regarding ethics.
|
Organizations should appoint a person or a board for decisions regarding ethics.
|
||||||
|
|
||||||
\item[Education and awareness]
|
\item[Education and awareness]
|
||||||
Educate, and train involved stakeholders.
|
Educate and train involved stakeholders.
|
||||||
|
|
||||||
\item[Stakeholder participation and social dialogue]
|
\item[Stakeholder participation and social dialogue]
|
||||||
Ensure open discussions between stakeholders and involve the general public.
|
Ensure open discussions between stakeholders and involve the general public.
|
||||||
|
|||||||
@ -122,7 +122,7 @@
|
|||||||
\item[Trolley problem] \marginnote{Trolley problem}
|
\item[Trolley problem] \marginnote{Trolley problem}
|
||||||
A trolley is headed towards a path where it will kill five people. If a lever is pulled, the trolley will be diverted and kill one person.
|
A trolley is headed towards a path where it will kill five people. If a lever is pulled, the trolley will be diverted and kill one person.
|
||||||
|
|
||||||
The dilemma is whether to do nothing and kill five people or pull the lever and kill one.
|
The dilemma is whether to do nothing and kill five people or pull the lever and actively kill one.
|
||||||
|
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
@ -130,7 +130,7 @@
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\item[Trolley problem (fat person)] \marginnote{Trolley problem (fat person)}
|
\item[Trolley problem (fat person)] \marginnote{Trolley problem (fat person)}
|
||||||
Variation of the trolley problem where the trolley goes towards a single path that it will kill some people and can be stopped by pushing a fat person on the track.
|
Variation of the trolley problem where the trolley goes towards a single path that will kill some people and can be stopped by pushing a fat person on the track.
|
||||||
|
|
||||||
This scenario tests whether a direct physical involvement affecting someone not in danger changes the morality in the decision.
|
This scenario tests whether a direct physical involvement affecting someone not in danger changes the morality in the decision.
|
||||||
\end{description}
|
\end{description}
|
||||||
@ -145,8 +145,8 @@
|
|||||||
Consider the following scenarios:
|
Consider the following scenarios:
|
||||||
\begin{enumerate}[label=(\Alph*)]
|
\begin{enumerate}[label=(\Alph*)]
|
||||||
\item The car can either kill many pedestrians crossing the street or a single person on the side of the road.
|
\item The car can either kill many pedestrians crossing the street or a single person on the side of the road.
|
||||||
\item The car can either kill a single pedestrian crossing the street or hit a wall killing its passengers.
|
\item The car can either kill a single pedestrian crossing the street or hit a wall killing its driver.
|
||||||
\item The car can either kill many pedestrians crossing the street or hit a wall killing its passengers.
|
\item The car can either kill many pedestrians crossing the street or hit a wall killing its driver.
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[CLAUDETTE] \marginnote{CLAUDETTE}
|
\item[CLAUDETTE] \marginnote{CLAUDETTE}
|
||||||
Clause detector (CLAUDETTE) is a system to classify clauses in terms of services or privacy policies as:
|
Clause detector (CLAUDETTE) is a system to classify clauses in a terms of service or privacy policy as:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \textsc{Clearly fair},
|
\item \textsc{Clearly fair},
|
||||||
\item \textsc{Potentially unfair},
|
\item \textsc{Potentially unfair},
|
||||||
@ -35,6 +35,13 @@
|
|||||||
\item \textsc{potentially unfair}, if the provider can unilaterally modify the terms of service or the service.
|
\item \textsc{potentially unfair}, if the provider can unilaterally modify the terms of service or the service.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
|
\item[Unilateral termination] \marginnote{Unilateral termination}
|
||||||
|
A clause is classified as:
|
||||||
|
\begin{itemize}
|
||||||
|
\item \textsc{potentially unfair}, if the provider has the right to suspend or terminate the service and the reasons are specified.
|
||||||
|
\item \textsc{clearly unfair}, if the provider can suspend or terminate the service for any reason.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
\item[Jurisdiction clause] \marginnote{Jurisdiction clause}
|
\item[Jurisdiction clause] \marginnote{Jurisdiction clause}
|
||||||
A clause is classified as:
|
A clause is classified as:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@ -65,13 +72,6 @@
|
|||||||
\item \textsc{clearly unfair}, if the provider is never liable (intentional damage included).
|
\item \textsc{clearly unfair}, if the provider is never liable (intentional damage included).
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\item[Unilateral termination] \marginnote{Unilateral termination}
|
|
||||||
A clause is classified as:
|
|
||||||
\begin{itemize}
|
|
||||||
\item \textsc{potentially unfair}, if the provider has the right to suspend or terminate the service and the reasons are specified.
|
|
||||||
\item \textsc{clearly unfair}, if the provider can suspend or terminate the service for any reason.
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\item[Content removal] \marginnote{Content removal}
|
\item[Content removal] \marginnote{Content removal}
|
||||||
A clause is classified as:
|
A clause is classified as:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
|
|||||||
@ -16,15 +16,15 @@
|
|||||||
|
|
||||||
\begin{description}
|
\begin{description}
|
||||||
\item[Historical bias] \marginnote{Historical bias}
|
\item[Historical bias] \marginnote{Historical bias}
|
||||||
System trained on intrinsically biased data will reproduce the same biased behavior.
|
Systems trained on intrinsically biased data will reproduce the same biased behavior.
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
Data can be biased because it comes from past human judgement or by the hierarchies of the society (e.g., systems working on marginalized languages will most likely have lower performance compared to a widespread language).
|
Data can be biased because it comes from past human judgement or by the hierarchies of society (e.g., systems working on marginalized languages will most likely have lower performance compared to a widespread language).
|
||||||
\end{remark}
|
\end{remark}
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
\begin{example}[Amazon AI recruiting tool]
|
\begin{example}[Amazon AI recruiting tool]
|
||||||
Tool that Amazon used in the past to review job applications. It was heavily biased towards male applicants and, even with the gender removed, it was able to infer it from the other features.
|
Tool that Amazon was using in the past to review job applications. It was heavily biased towards male applicants and, even with the gender removed, it was able to infer it from the other features.
|
||||||
|
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
@ -34,7 +34,7 @@
|
|||||||
\end{example}
|
\end{example}
|
||||||
|
|
||||||
\begin{example}[UK AI visa and asylum system]
|
\begin{example}[UK AI visa and asylum system]
|
||||||
System used by the UK government to assess visa and asylum applications. It was found that:
|
System used by the UK government to assess visa and asylum applications. It was found out that:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item The system ranked applications based on nationality.
|
\item The system ranked applications based on nationality.
|
||||||
\item Applicants from certain countries were automatically flagged as high risk.
|
\item Applicants from certain countries were automatically flagged as high risk.
|
||||||
@ -66,7 +66,7 @@
|
|||||||
|
|
||||||
In the context of language models, some systems implement a refusal mechanism to prevent a biased response. However:
|
In the context of language models, some systems implement a refusal mechanism to prevent a biased response. However:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Using a different prompt on the same topic might bypass the filter.
|
\item Using a different prompt of the same topic might bypass the filter.
|
||||||
\item Refusal might be applied unequally depending on demographics or domain.
|
\item Refusal might be applied unequally depending on demographics or domain.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{example}
|
\end{example}
|
||||||
@ -168,10 +168,10 @@
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item The overall accuracy is moderate-low ($61.2\%$),
|
\item The overall accuracy is moderate-low ($61.2\%$),
|
||||||
\item Black defendants were more likely labeled with a high level of risk, leading to a higher probability of high risk misclassification ($45\%$ blacks vs $23\%$ whites).
|
\item Black defendants were more likely labeled with a high level of risk, leading to a higher probability of high risk misclassification ($45\%$ blacks vs $23\%$ whites).
|
||||||
\item White defendants were more likely labeled with a low level of risk, leading to a higher probability of low risk misclassification ($48\%$ blacks vs $28\%$ whites).
|
\item White defendants were more likely labeled with a low level of risk, leading to a higher probability of low risk misclassification ($48\%$ whites vs $28\%$ blacks).
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
Northpointe, the software house of COMPAS, stated that ProPublic made several statistical and technical errors as:
|
Northpointe, the software house of COMPAS, stated that ProPublica made several statistical and technical errors as:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item The accuracy of COMPAS is higher that human judgement.
|
\item The accuracy of COMPAS is higher that human judgement.
|
||||||
\item The general recidivism risk scale is equally accurate for blacks and whites,
|
\item The general recidivism risk scale is equally accurate for blacks and whites,
|
||||||
@ -202,7 +202,7 @@
|
|||||||
\item The training data is composed of 3000 defendants divided into 1500 blues (1000 previous offenders) and 1500 greens (500 previous offenders).
|
\item The training data is composed of 3000 defendants divided into 1500 blues (1000 previous offenders) and 1500 greens (500 previous offenders).
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
Therefore, the real aggregated outcomes are:
|
Therefore, the ground-truth aggregated outcomes are:
|
||||||
\begin{center}
|
\begin{center}
|
||||||
\footnotesize
|
\footnotesize
|
||||||
\begin{tabular}{c|cc}
|
\begin{tabular}{c|cc}
|
||||||
@ -336,7 +336,7 @@
|
|||||||
\item[Conditional use error/false rate] \marginnote{Conditional use error/false rate}
|
\item[Conditional use error/false rate] \marginnote{Conditional use error/false rate}
|
||||||
The proportion of incorrect predictions should be equal for each class within each group.
|
The proportion of incorrect predictions should be equal for each class within each group.
|
||||||
\begin{example}[SAPMOC]
|
\begin{example}[SAPMOC]
|
||||||
SAPMOC satisfies conditional use error/:
|
SAPMOC satisfies conditional use error:
|
||||||
\begin{center}
|
\begin{center}
|
||||||
\footnotesize
|
\footnotesize
|
||||||
\begin{tabular}{c|cc}
|
\begin{tabular}{c|cc}
|
||||||
|
|||||||
@ -137,7 +137,7 @@ Processing of personal data is lawful if at least one of the following condition
|
|||||||
As a rule of thumb, legitimate interests of the controller can be pursued if only a reasonably limited amount of personal data is used.
|
As a rule of thumb, legitimate interests of the controller can be pursued if only a reasonably limited amount of personal data is used.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
\begin{example}
|
\begin{example}
|
||||||
The gym one is subscribed in can send (contextual) advertisements by email to pursue economic interests.
|
The gym one is subscribed in can send (contextual) advertisement by email to pursue economic interests.
|
||||||
\end{example}
|
\end{example}
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
Targeted advertising is in principle prohibited. However, companies commonly pair legitimate interest with the request for consent.
|
Targeted advertising is in principle prohibited. However, companies commonly pair legitimate interest with the request for consent.
|
||||||
@ -407,7 +407,7 @@ There are two main opinions on AI systems:
|
|||||||
Agreement of the data subject that allows to process its personal data. Consent should be:
|
Agreement of the data subject that allows to process its personal data. Consent should be:
|
||||||
\begin{descriptionlist}
|
\begin{descriptionlist}
|
||||||
\item[Freely given]
|
\item[Freely given]
|
||||||
The data subject have the choice to give consent for profiling
|
The data subject has the choice to give consent for profiling.
|
||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
A common practice is the ``take-or-leave'' approach, which is illegal.
|
A common practice is the ``take-or-leave'' approach, which is illegal.
|
||||||
@ -432,7 +432,7 @@ There are two main opinions on AI systems:
|
|||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
\item[Unambiguously provided]
|
\item[Unambiguously provided]
|
||||||
Consent should be explicitly provided by the data subject through a statement of affirmative action.
|
Consent should be explicitly provided by the data subject through a statement or affirmative action.
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
An illegal practice in many privacy policies is to state that there can be changes and continuing using the service implies an implicit acceptance of the new terms.
|
An illegal practice in many privacy policies is to state that there can be changes and continuing using the service implies an implicit acceptance of the new terms.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
@ -465,7 +465,7 @@ When personal data is collected, the controller should provide the data subject
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item The identity of the controller, its representative (when applicable), and its contact details should be available.
|
\item The identity of the controller, its representative (when applicable), and its contact details should be available.
|
||||||
\item Contact details of the data officer (referee of the company that ensures that the GDPR is respected) should be available.
|
\item Contact details of the data officer (referee of the company that ensures that the GDPR is respected) should be available.
|
||||||
\item Purposes and legal basis of the processing.
|
\item Purposes and legal basis of processing.
|
||||||
\item Categories of data collected.
|
\item Categories of data collected.
|
||||||
\item Recipients or categories of recipients.
|
\item Recipients or categories of recipients.
|
||||||
\item Period of time or the criteria to determine how long the data is stored.
|
\item Period of time or the criteria to determine how long the data is stored.
|
||||||
@ -486,7 +486,7 @@ Moreover, in case of automated decision-making, the following information should
|
|||||||
|
|
||||||
\subsection{Right to access (article 15)} \marginnote{Right to access}
|
\subsection{Right to access (article 15)} \marginnote{Right to access}
|
||||||
|
|
||||||
Data subjects have the right to have confirmation from the controller on whether their data has been processed and access both input and inferred personal data.
|
Data subjects have the right to have confirmation from the controller on whether their data has been processed and can access both input and inferred personal data.
|
||||||
|
|
||||||
This right is limited if it affects the rights or freedoms of others.
|
This right is limited if it affects the rights or freedoms of others.
|
||||||
|
|
||||||
@ -512,7 +512,7 @@ Data subjects have the right to have their own personal data erased without dela
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item The data is no longer necessary for the purpose it was collected for.
|
\item The data is no longer necessary for the purpose it was collected for.
|
||||||
\begin{example}
|
\begin{example}
|
||||||
An e-shop cannot delete the address until the order is arrived.
|
An e-shop cannot delete the address until the order has arrived.
|
||||||
\end{example}
|
\end{example}
|
||||||
|
|
||||||
\item The data subject has withdrawn its consent, unless there are other legal basis.
|
\item The data subject has withdrawn its consent, unless there are other legal basis.
|
||||||
|
|||||||
8
src/year2/ethics-in-ai/module2/sections/_gen_ai.tex
Normal file
8
src/year2/ethics-in-ai/module2/sections/_gen_ai.tex
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
\begin{remark}
|
||||||
|
Transformative rule: Two purposes of a work:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Original of the work (e.g., creative work)
|
||||||
|
\item Training
|
||||||
|
\end{itemize}
|
||||||
|
It is allowed if the original and training reason is different.
|
||||||
|
\end{remark}
|
||||||
Reference in New Issue
Block a user