mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-14 18:51:52 +01:00
Add DAS appendices
This commit is contained in:
@ -1,4 +1,5 @@
|
|||||||
\documentclass[11pt]{ainotes}
|
\documentclass[11pt]{ainotes}
|
||||||
|
\usepackage{appendix}
|
||||||
|
|
||||||
\title{Distributed Autonomous Systems}
|
\title{Distributed Autonomous Systems}
|
||||||
\date{2024 -- 2025}
|
\date{2024 -- 2025}
|
||||||
@ -53,14 +54,15 @@
|
|||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\makenotesfront
|
\makenotesfront
|
||||||
\include{./sections/_graphs.tex}
|
\input{./sections/_graphs.tex}
|
||||||
\include{./sections/_averaging_systems.tex}
|
\input{./sections/_averaging_systems.tex}
|
||||||
\include{./sections/_containment.tex}
|
\input{./sections/_containment.tex}
|
||||||
\include{./sections/_optimization.tex}
|
\input{./sections/_optimization.tex}
|
||||||
\include{./sections/_formation_control.tex}
|
\input{./sections/_formation_control.tex}
|
||||||
\include{./sections/_cooperative_robotics.tex}
|
\input{./sections/_cooperative_robotics.tex}
|
||||||
\include{./sections/_safety_controllers.tex}
|
\input{./sections/_safety_controllers.tex}
|
||||||
\include{./sections/_feedback_optimization.tex}
|
\input{./sections/_feedback_optimization.tex}
|
||||||
\include{./sections/_neural_networks.tex}
|
\input{./sections/_neural_networks.tex}
|
||||||
|
\eoc
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
@ -91,171 +91,6 @@
|
|||||||
|
|
||||||
\subsection{Consensus}
|
\subsection{Consensus}
|
||||||
|
|
||||||
% \begin{remark}
|
|
||||||
% The distributed consensus algorithm is a positive system (i.e., $\matr{A}$ is positive).
|
|
||||||
% \end{remark}
|
|
||||||
|
|
||||||
\begin{description}
|
|
||||||
\item[Positive matrix characterization]
|
|
||||||
Given $\A \in \mathbb{R}^{N \times N}$, it can be:
|
|
||||||
\begin{description}
|
|
||||||
\item[Non-negative] \marginnote{Non-negative matrix}
|
|
||||||
$\A \geq 0$.
|
|
||||||
\item[Irreducible] \marginnote{Irreducible matrix}
|
|
||||||
$\sum_{h=0}^{N-1} \A^h > 0$.
|
|
||||||
\item[Primitive] \marginnote{Primitive matrix}
|
|
||||||
$\exists h \in \{ 1, \dots, N \}: A^h > 0$.
|
|
||||||
% \begin{remark}
|
|
||||||
% A graph with a primitive adjacency matrix is connected.
|
|
||||||
% \end{remark}
|
|
||||||
\item[Positive] \marginnote{Positive matrix}
|
|
||||||
$\A > 0$.
|
|
||||||
\end{description}
|
|
||||||
\end{description}
|
|
||||||
|
|
||||||
|
|
||||||
% Equilibrium:
|
|
||||||
% \[
|
|
||||||
% \x^{k+1} = \matr{A}\x^k
|
|
||||||
% \]
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% \x_\text{eq} = \matr{A} \x_\text{eq} \\
|
|
||||||
% \iff (\matr{I} - \matr{A}) \x_\text{eq} = 0
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
% We are interested in the null space of $(\matr{I} - \matr{A})$ (at least one eigenvector).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem} \label{th:positive_matrix_digraph_connected}
|
|
||||||
Given a weighted digraph $G$ with $N \geq 2$ nodes and adjacency matrix $\A$, it holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item $\A$ is irreducible $\iff$ $G$ is strongly connected.
|
|
||||||
\item $\A$ is primitive $\iff$ $G$ is strongly connected and aperiodic.
|
|
||||||
\end{itemize}
|
|
||||||
\end{theorem}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem}[Gershgorin] \label{th:gershgorin} \marginnote{Gershgorin theorem}
|
|
||||||
For any square matrix $\A \in \mathbb{C}^{N \times N}$, it holds that the spectrum of $\A$ (i.e., set of eigenvalues) is contained in the Gershgorin disks:
|
|
||||||
\[
|
|
||||||
\text{spec}(\A) \subset \bigcup_{i=1}^{N} \left\{ s \in \mathbb{C} \,\,\bigg|\,\, |s - a_{ii}| \leq \sum_{j=1, j \neq i}^{N} |a_{ij}| \right\}
|
|
||||||
\]
|
|
||||||
In other words, it is the union of the disks with center $a_{ii}$ and radius $\sum_{j=1, j \neq i}^{N} |a_{ij}|$.
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{remark}
|
|
||||||
This theorem provides an approximate location of the eigenvalues.
|
|
||||||
\end{remark}
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{example}
|
|
||||||
Consider the matrix:
|
|
||||||
\[
|
|
||||||
\begin{bmatrix}
|
|
||||||
10 & 1 & 0 & 1 \\
|
|
||||||
0.2 & 8 & 0.2 & 0.2 \\
|
|
||||||
1 & 1 & 2 & 1 \\
|
|
||||||
-1 & -1 & -1 & -11
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
|
|
||||||
Its eigenvalues are $\{ -10.870, 1.906, 7.918, 10.046 \}$.
|
|
||||||
|
|
||||||
The Gershgorin disks are:
|
|
||||||
\begin{figure}[H]
|
|
||||||
\centering
|
|
||||||
\includegraphics[width=0.4\linewidth]{./img/gershgorin.png}
|
|
||||||
\end{figure}
|
|
||||||
\end{example}
|
|
||||||
\end{theorem}
|
|
||||||
|
|
||||||
% \begin{lemma}
|
|
||||||
% If all the disks are within the unit disk, the eigenvalues are stable.
|
|
||||||
% \[
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \frac{1}{2} & \frac{1}{2} & 0 \\
|
|
||||||
% \frac{1}{3} & \frac{1}{3} & \frac{1}{3} \\
|
|
||||||
% 0 & \frac{3}{4} & \frac{1}{4}
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \]
|
|
||||||
% \end{lemma}
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem}[Perron-Frobenius] \label{th:perron_frobenius} \marginnote{Perron-Frobenius theorem}
|
|
||||||
Let $\A \in \R^{N \times N}$ with $N \geq 2$ be a non-negative matrix. It holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item There exists a real eigenvalue $\lambda \geq 0$ that is dominant for all the other eigenvalues $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda \geq |\mu|$),
|
|
||||||
\item The right eigenvector $\v \in \R^N$ and left eigenvector $\w \in \R^N$ associated to $\lambda$ can be chosen to be non-negative.
|
|
||||||
\end{itemize}
|
|
||||||
If $\A \in \R^{N \times N}$ is irreducible, then:
|
|
||||||
\begin{itemize}
|
|
||||||
\item The eigenvalue $\lambda$ is strictly positive ($\lambda > 0$) and simple.
|
|
||||||
\item The right and left eigenvalues $\v$ and $\w$ associated to $\lambda$ are unique and positive.
|
|
||||||
\end{itemize}
|
|
||||||
If $\A \in \R^{N \times N}$ is primitive, then:
|
|
||||||
\begin{itemize}
|
|
||||||
\item The eigenvalue $\lambda$ is strictly dominant for all $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda > |\mu|$).
|
|
||||||
\end{itemize}
|
|
||||||
\end{theorem}
|
|
||||||
|
|
||||||
\begin{lemma} \label{th:row_stochastic_unit_disk}
|
|
||||||
Given a row stochastic matrix $\A$, it holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item $\lambda=1$ is an eigenvalue,
|
|
||||||
\item By \hyperref[th:gershgorin]{Gershgorin Theorem}, $\text{spec}(\A)$ is a subset of the unit disk (i.e., all Gershgorin disks lie inside the unit disk).
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\begin{figure}[H]
|
|
||||||
\centering
|
|
||||||
\includegraphics[width=0.2\linewidth]{./img/gershgorin_unit.png}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{corollary}
|
|
||||||
The eigenvalue $\lambda=1 \geq |\mu|$ is dominant.
|
|
||||||
\end{corollary}
|
|
||||||
\end{lemma}
|
|
||||||
|
|
||||||
\begin{lemma}
|
|
||||||
Given a row stochastic and primitive matrix $\A$, by \Cref{th:row_stochastic_unit_disk} and \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} it holds that $\lambda = 1$ is simple and strictly dominant.
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{corollary}
|
|
||||||
The consensus averaging system is marginally stable (i.e., converges but not necessarily to the origin) as the largest distinct eigenvalue is $\lambda = 1$.
|
|
||||||
\end{corollary}
|
|
||||||
\end{lemma}
|
|
||||||
|
|
||||||
|
|
||||||
% \begin{lemma}
|
|
||||||
% \[
|
|
||||||
% \x_\text{eq} = ker(\matr{I} - \A) = \{ \vec{1}\beta \mid \beta \in \R \}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
% \[
|
|
||||||
% \w^T \x^{k+1} = \w^T(\A \x^{k}) = \w^T \x^k
|
|
||||||
% \]
|
|
||||||
% i.e., $\w$ is left eigenvector of $\A$ with $\lambda = 1$.
|
|
||||||
|
|
||||||
% Therefore, the above must be true for:
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% \w^T \x_\text{eq} \\
|
|
||||||
% \w^T \x^{0} \\
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
% \[
|
|
||||||
% \w^T \vec{1}\beta \Rightarrow \beta = \frac{\w^T\x^{0}}{\w^T\vec{1}}
|
|
||||||
% \]
|
|
||||||
% \end{lemma}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem}[Discrete-time consensus] \marginnote{Discrete-time consensus}
|
\begin{theorem}[Discrete-time consensus] \marginnote{Discrete-time consensus}
|
||||||
Consider a discrete-time averaging system with digraph $G$ and weighted adjacency matrix $\matr{A}$. Assume $G$ strongly connected and aperiodic, and $\matr{A}$ row stochastic.
|
Consider a discrete-time averaging system with digraph $G$ and weighted adjacency matrix $\matr{A}$. Assume $G$ strongly connected and aperiodic, and $\matr{A}$ row stochastic.
|
||||||
|
|
||||||
@ -304,122 +139,6 @@
|
|||||||
% \end{gathered}
|
% \end{gathered}
|
||||||
% \]
|
% \]
|
||||||
% \end{proof}
|
% \end{proof}
|
||||||
|
|
||||||
\begin{proof}[Proof (Jordan-form approach)]
|
|
||||||
As is $G$ strongly connected and aperiodic, and $\A$ is row stochastic, it holds that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item By \Cref{th:positive_matrix_digraph_connected}, $\A$ is primitive.
|
|
||||||
\item By \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} and \Cref{th:row_stochastic_unit_disk}, the eigenvalue $\lambda=1$ is strictly dominant and it is associated to the right eigenvector $\vec{1}$ (row stochasticity) and left eigenvector $\w$.
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
Consider the non-singular matrix $\matr{T} \in \R^{N \times N}$ defined as:
|
|
||||||
\[
|
|
||||||
\matr{T} = \begin{bmatrix}
|
|
||||||
\vert & \vert & & \vert \\
|
|
||||||
\vec{1} & \v^2 & \dots & \v^N \\
|
|
||||||
\vert & \vert & & \vert \\
|
|
||||||
\end{bmatrix} = \begin{bmatrix}
|
|
||||||
\vec{1} & \matr{W}_R
|
|
||||||
\end{bmatrix}
|
|
||||||
\qquad
|
|
||||||
\matr{T}^{-1} = \begin{bmatrix}
|
|
||||||
- & (\w)^T & - \\
|
|
||||||
- & (\w^2)^T & - \\
|
|
||||||
- & \vdots & - \\
|
|
||||||
- & (\w^N)^T & - \\
|
|
||||||
\end{bmatrix} = \begin{bmatrix}
|
|
||||||
\w^T \\ \matr{W}_L
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
|
|
||||||
A change in coordinates defined as:
|
|
||||||
\[
|
|
||||||
\x \mapsto \tilde{\x} = \matr{T}^{-1} \x
|
|
||||||
\]
|
|
||||||
allows to obtain the Jordan form $\matr{T}^{-1}\A\matr{T}$:
|
|
||||||
\[
|
|
||||||
\matr{T}^{-1}\A\matr{T} = \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
with $\matr{J}_2 \in \mathbb{R}^{(N-1) \times (N-1)}$ Schur (i.e., $\text{spec}(\matr{J}_2)$ inside the open unit disk).
|
|
||||||
|
|
||||||
The dynamics $\x^{k+1} = \A \x^k$ in the new coordinate system is:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\tilde{\x}^{k+1} &= \matr{T}^{-1} \x^{k+1} = \matr{T}^{-1} \A \matr{T} \tilde{\x}^k \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix} \tilde{\x}^k
|
|
||||||
= \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix}^{k+1} \tilde{\x}^0
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
Let's denote:
|
|
||||||
\[
|
|
||||||
\tilde{\x}^k = \matr{T}^{-1}\x^k = \begin{bmatrix}
|
|
||||||
\w^T\x^k \\ \matr{W}_L\x^k
|
|
||||||
\end{bmatrix}
|
|
||||||
= \begin{bmatrix}
|
|
||||||
\tilde{\x}^k_{m} \\ \tilde{\x}^k_{\bot}
|
|
||||||
\end{bmatrix}
|
|
||||||
\]
|
|
||||||
We have that:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\lim_{k \rightarrow \infty} \tilde{\x}^k
|
|
||||||
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & \matr{J}_2 \\
|
|
||||||
\end{bmatrix}^k \tilde{\x}^0 \\
|
|
||||||
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
|
||||||
1 & 0 & \dots \\
|
|
||||||
0 & & \\
|
|
||||||
\vdots & & (\matr{J}_2)^k \\
|
|
||||||
\end{bmatrix} \begin{bmatrix}
|
|
||||||
\tilde{\x}^0_{m} \\ \tilde{\x}^0_{\bot}
|
|
||||||
\end{bmatrix} \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
1 \cdot \tilde{\x}^0_{m} \\
|
|
||||||
\lim_{k \rightarrow \infty} (\matr{J}_2)^k \tilde{\x}^0_{\bot}
|
|
||||||
\end{bmatrix} \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
\w^T \x^0 \\
|
|
||||||
0
|
|
||||||
\end{bmatrix} \\
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
Note that $\lim_{k \rightarrow \infty} \matr{J}_2^k = 0$ as it is stable (i.e., all eigenvalues are in the open unit disk $|\mu| < 1$).
|
|
||||||
|
|
||||||
In the original coordinate system, the limit is:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\lim_{k \rightarrow \infty} \x^k
|
|
||||||
&= \lim_{k \rightarrow \infty} \matr{T} \tilde{\x}^k \\
|
|
||||||
&= \matr{T} \lim_{k \rightarrow \infty} \tilde{\x}^k \\
|
|
||||||
&= \begin{bmatrix}
|
|
||||||
\vec{1} & \matr{W}_R
|
|
||||||
\end{bmatrix} \begin{bmatrix}
|
|
||||||
\w^T \x^0 \\
|
|
||||||
0
|
|
||||||
\end{bmatrix}
|
|
||||||
= \vec{1} (\w^T \x^0)
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
|
|
||||||
\indenttbox
|
|
||||||
\begin{remark}
|
|
||||||
It is assumed that $\Vert \w \Vert = 1$ (i.e., no normalization term).
|
|
||||||
\end{remark}
|
|
||||||
\end{proof}
|
|
||||||
\end{theorem}
|
\end{theorem}
|
||||||
|
|
||||||
\begin{example}[Metropolis-Hasting weights]
|
\begin{example}[Metropolis-Hasting weights]
|
||||||
@ -435,45 +154,6 @@
|
|||||||
\end{example}
|
\end{example}
|
||||||
|
|
||||||
|
|
||||||
% \begin{proof}[Lyapunov approach]
|
|
||||||
% $\A - \vec{1}\w^T$ is rank-1. This is to change one specific eigenvalue (move 1 to 0).
|
|
||||||
|
|
||||||
% Dissensus vector represents error:
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% delta^{k+1}
|
|
||||||
% = \x^{k+1} - \vec{1}\w^T \x^0 \\
|
|
||||||
% = \x^{k+1} - \vec{1}\w^T \x^{k+1} \\
|
|
||||||
% = (\matr{I} - \vec{1}\w^T) \x^{k+1} \\
|
|
||||||
% = (\matr{I} - \vec{1}\w^T) \A\x^{k} \\
|
|
||||||
% = (\A - \vec{1}\w^T) \x^{k} \\
|
|
||||||
% = (\A - \vec{1}\w^T) \delta^{k} \\
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
% Study:
|
|
||||||
% \[
|
|
||||||
% \delta^{k+1} = (\A - \vec{1}\w^T) \delta{k}
|
|
||||||
% \]
|
|
||||||
% If $\delta^k \rightarrow 0$, then $\x^k \rightarrow\vec{1}\w^T\x^0$.
|
|
||||||
% Note $(\A - \vec{1}\w^T)$ is Schur.
|
|
||||||
|
|
||||||
% Lyapunov equation for discrete time systems:
|
|
||||||
% \[
|
|
||||||
% \bar{\A}^T \matr{P} \bar{\A} = - \matr{P} = - \matr{Q}
|
|
||||||
% \]
|
|
||||||
% where $\bar{\A}$ is the Jordan-form of $(\A - \vec{1}\w^T)$
|
|
||||||
|
|
||||||
% Select $Q_2$ to be block-diagonal and $p_1$
|
|
||||||
|
|
||||||
|
|
||||||
% \[
|
|
||||||
% V(\delta) = \delta^T (\matr{T}^{-1})^T \matr{P} \matr{T}^{-1} \delta
|
|
||||||
% \]
|
|
||||||
% \end{proof}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\section{Discrete-time averaging algorithm over time-varying graphs}
|
\section{Discrete-time averaging algorithm over time-varying graphs}
|
||||||
|
|
||||||
@ -657,4 +337,377 @@
|
|||||||
|
|
||||||
\begin{remark}
|
\begin{remark}
|
||||||
The result also holds for unweighted digraphs as $\vec{1}$ is both a left and right eigenvector of $\matr{L}$.
|
The result also holds for unweighted digraphs as $\vec{1}$ is both a left and right eigenvector of $\matr{L}$.
|
||||||
\end{remark}
|
\end{remark}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{subappendices}
|
||||||
|
|
||||||
|
\section{Appendix: Discrete time averaging system consensus proof}
|
||||||
|
|
||||||
|
% \begin{remark}
|
||||||
|
% The distributed consensus algorithm is a positive system (i.e., $\matr{A}$ is positive).
|
||||||
|
% \end{remark}
|
||||||
|
|
||||||
|
\begin{description}
|
||||||
|
\item[Positive matrix characterization]
|
||||||
|
Given $\A \in \mathbb{R}^{N \times N}$, it can be:
|
||||||
|
\begin{description}
|
||||||
|
\item[Non-negative] \marginnote{Non-negative matrix}
|
||||||
|
$\A \geq 0$.
|
||||||
|
\item[Irreducible] \marginnote{Irreducible matrix}
|
||||||
|
$\sum_{h=0}^{N-1} \A^h > 0$.
|
||||||
|
\item[Primitive] \marginnote{Primitive matrix}
|
||||||
|
$\exists h \in \{ 1, \dots, N \}: A^h > 0$.
|
||||||
|
% \begin{remark}
|
||||||
|
% A graph with a primitive adjacency matrix is connected.
|
||||||
|
% \end{remark}
|
||||||
|
\item[Positive] \marginnote{Positive matrix}
|
||||||
|
$\A > 0$.
|
||||||
|
\end{description}
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
% Equilibrium:
|
||||||
|
% \[
|
||||||
|
% \x^{k+1} = \matr{A}\x^k
|
||||||
|
% \]
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% \x_\text{eq} = \matr{A} \x_\text{eq} \\
|
||||||
|
% \iff (\matr{I} - \matr{A}) \x_\text{eq} = 0
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
% We are interested in the null space of $(\matr{I} - \matr{A})$ (at least one eigenvector).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{theorem} \label{th:positive_matrix_digraph_connected}
|
||||||
|
Given a weighted digraph $G$ with $N \geq 2$ nodes and adjacency matrix $\A$, it holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\A$ is irreducible $\iff$ $G$ is strongly connected.
|
||||||
|
\item $\A$ is primitive $\iff$ $G$ is strongly connected and aperiodic.
|
||||||
|
\end{itemize}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{theorem}[Gershgorin] \label{th:gershgorin} \marginnote{Gershgorin theorem}
|
||||||
|
For any square matrix $\A \in \mathbb{C}^{N \times N}$, it holds that the spectrum of $\A$ (i.e., set of eigenvalues) is contained in the Gershgorin disks:
|
||||||
|
\[
|
||||||
|
\text{spec}(\A) \subset \bigcup_{i=1}^{N} \left\{ s \in \mathbb{C} \,\,\bigg|\,\, |s - a_{ii}| \leq \sum_{j=1, j \neq i}^{N} |a_{ij}| \right\}
|
||||||
|
\]
|
||||||
|
In other words, it is the union of the disks with center $a_{ii}$ and radius $\sum_{j=1, j \neq i}^{N} |a_{ij}|$.
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{remark}
|
||||||
|
This theorem provides an approximate location of the eigenvalues.
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{example}
|
||||||
|
Consider the matrix:
|
||||||
|
\[
|
||||||
|
\begin{bmatrix}
|
||||||
|
10 & 1 & 0 & 1 \\
|
||||||
|
0.2 & 8 & 0.2 & 0.2 \\
|
||||||
|
1 & 1 & 2 & 1 \\
|
||||||
|
-1 & -1 & -1 & -11
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
|
||||||
|
Its eigenvalues are $\{ -10.870, 1.906, 7.918, 10.046 \}$.
|
||||||
|
|
||||||
|
The Gershgorin disks are:
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.4\linewidth]{./img/gershgorin.png}
|
||||||
|
\end{figure}
|
||||||
|
\end{example}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
% \begin{lemma}
|
||||||
|
% If all the disks are within the unit disk, the eigenvalues are stable.
|
||||||
|
% \[
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \frac{1}{2} & \frac{1}{2} & 0 \\
|
||||||
|
% \frac{1}{3} & \frac{1}{3} & \frac{1}{3} \\
|
||||||
|
% 0 & \frac{3}{4} & \frac{1}{4}
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \]
|
||||||
|
% \end{lemma}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{theorem}[Perron-Frobenius] \label{th:perron_frobenius} \marginnote{Perron-Frobenius theorem}
|
||||||
|
Let $\A \in \R^{N \times N}$ with $N \geq 2$ be a non-negative matrix. It holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item There exists a real eigenvalue $\lambda \geq 0$ that is dominant for all the other eigenvalues $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda \geq |\mu|$),
|
||||||
|
\item The right eigenvector $\v \in \R^N$ and left eigenvector $\w \in \R^N$ associated to $\lambda$ can be chosen to be non-negative.
|
||||||
|
\end{itemize}
|
||||||
|
If $\A \in \R^{N \times N}$ is irreducible, then:
|
||||||
|
\begin{itemize}
|
||||||
|
\item The eigenvalue $\lambda$ is strictly positive ($\lambda > 0$) and simple.
|
||||||
|
\item The right and left eigenvalues $\v$ and $\w$ associated to $\lambda$ are unique and positive.
|
||||||
|
\end{itemize}
|
||||||
|
If $\A \in \R^{N \times N}$ is primitive, then:
|
||||||
|
\begin{itemize}
|
||||||
|
\item The eigenvalue $\lambda$ is strictly dominant for all $\mu \in \text{spec}(\A) \smallsetminus \{\lambda\}$ (i.e., $\lambda > |\mu|$).
|
||||||
|
\end{itemize}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\begin{lemma} \label{th:row_stochastic_unit_disk}
|
||||||
|
Given a row stochastic matrix $\A$, it holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\lambda=1$ is an eigenvalue,
|
||||||
|
\item By \hyperref[th:gershgorin]{Gershgorin Theorem}, $\text{spec}(\A)$ is a subset of the unit disk (i.e., all Gershgorin disks lie inside the unit disk).
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.2\linewidth]{./img/gershgorin_unit.png}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{corollary}
|
||||||
|
The eigenvalue $\lambda=1 \geq |\mu|$ is dominant.
|
||||||
|
\end{corollary}
|
||||||
|
\end{lemma}
|
||||||
|
|
||||||
|
\begin{lemma}
|
||||||
|
Given a row stochastic and primitive matrix $\A$, by \Cref{th:row_stochastic_unit_disk} and \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} it holds that $\lambda = 1$ is simple and strictly dominant.
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{corollary}
|
||||||
|
The consensus averaging system is marginally stable (i.e., converges but not necessarily to the origin) as the largest distinct eigenvalue is $\lambda = 1$.
|
||||||
|
\end{corollary}
|
||||||
|
\end{lemma}
|
||||||
|
|
||||||
|
|
||||||
|
% \begin{lemma}
|
||||||
|
% \[
|
||||||
|
% \x_\text{eq} = ker(\matr{I} - \A) = \{ \vec{1}\beta \mid \beta \in \R \}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
% \[
|
||||||
|
% \w^T \x^{k+1} = \w^T(\A \x^{k}) = \w^T \x^k
|
||||||
|
% \]
|
||||||
|
% i.e., $\w$ is left eigenvector of $\A$ with $\lambda = 1$.
|
||||||
|
|
||||||
|
% Therefore, the above must be true for:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% \w^T \x_\text{eq} \\
|
||||||
|
% \w^T \x^{0} \\
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
% \[
|
||||||
|
% \w^T \vec{1}\beta \Rightarrow \beta = \frac{\w^T\x^{0}}{\w^T\vec{1}}
|
||||||
|
% \]
|
||||||
|
% \end{lemma}
|
||||||
|
|
||||||
|
\begin{theorem}[Discrete-time consensus] \marginnote{Discrete-time consensus}
|
||||||
|
Consider a discrete-time averaging system with digraph $G$ and weighted adjacency matrix $\matr{A}$. Assume $G$ strongly connected and aperiodic, and $\matr{A}$ row stochastic.
|
||||||
|
|
||||||
|
It holds that there exists a left eigenvector $\vec{w} \in \mathbb{R}^N$, $\vec{w} > 0$ such that the consensus converges to:
|
||||||
|
\[
|
||||||
|
\lim_{k \rightarrow \infty} \vec{x}^k
|
||||||
|
= \vec{1}\frac{\vec{w}^T \vec{x}^0}{\vec{w}^T\vec{1}}
|
||||||
|
= \begin{bmatrix} 1 \\ \vdots \\ 1 \end{bmatrix} \frac{\sum_{i=1}^N w_i x_i^0}{\sum_{j=1}^N w_j}
|
||||||
|
= \begin{bmatrix} 1 \\ \vdots \\ 1 \end{bmatrix} \sum_{i=1}^N \frac{w_i}{\sum_{j=1}^N w_j} x_i^0
|
||||||
|
\]
|
||||||
|
where $\tilde{w}_i = \frac{w_i}{\sum_{i=j}^N w_j}$ are all normalized and sum to 1 (i.e., they produce a convex combination).
|
||||||
|
|
||||||
|
Moreover, if $\matr{A}$ is doubly stochastic, then it holds that the consensus is the average as $\vec{w} = 1$:
|
||||||
|
\[
|
||||||
|
\lim_{k \rightarrow \infty} \vec{x}^k = \vec{1} \frac{1}{N} \sum_{i=1}^N x_i^0
|
||||||
|
\]
|
||||||
|
|
||||||
|
% \begin{proof}[Sketch of proof]
|
||||||
|
% Let $\matr{T} = \begin{bmatrix} \vec{1} & \vec{v}^2 & \cdots & \vec{v}^N \end{bmatrix}$ be a change in coordinates that transforms an adjacency matrix into its Jordan form $\matr{J}$:
|
||||||
|
% \[ \matr{J} = \matr{T}^{-1} \matr{A} \matr{T} \]
|
||||||
|
% As $\lambda=1$ is a simple eigenvalue (\Cref{th:strongly_connected_eigenvalues}), it holds that:
|
||||||
|
% \[
|
||||||
|
% \matr{J} = \begin{bmatrix}
|
||||||
|
% 1 & 0 & \cdots & 0 \\
|
||||||
|
% 0 & & & \\
|
||||||
|
% \vdots & & \matr{J}_2 & \\
|
||||||
|
% 0 & & & \\
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \]
|
||||||
|
% where the eigenvalues of $\matr{J}_2 \in \mathbb{R}^{(N-1) \times (N-1)}$ lie inside the open unit disk.
|
||||||
|
|
||||||
|
% Let $\vec{x}^k = \matr{T}\bar{\vec{x}}^k$, then we have that:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% &\vec{x}^{k+1} = \matr{A} \vec{x}^{k} \\
|
||||||
|
% &\iff \matr{T} \bar{\vec{x}}^{k+1} = \matr{A} (\matr{T} \bar{\vec{x}}^k) \\
|
||||||
|
% &\iff \bar{\vec{x}}^{k+1} = \matr{T}^{-1} \matr{A} (\matr{T} \bar{\vec{x}}^k) = \matr{J}\bar{\vec{x}}^k
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
% Therefore:
|
||||||
|
% \[
|
||||||
|
% \begin{gathered}
|
||||||
|
% \lim_{k \rightarrow \infty} \bar{\vec{x}}^k = \bar{x}_1^0 \begin{bmatrix} 1 \\ 0 \\ \vdots \\ 0 \end{bmatrix} \\
|
||||||
|
% \bar{x}_1^{k+1} = \bar{x}_1^k \quad \forall k \geq 0 \\
|
||||||
|
% \lim_{k \rightarrow \infty} \bar{x}_i^{k} = 0 \quad \forall i = 2, \dots, N \\
|
||||||
|
% \end{gathered}
|
||||||
|
% \]
|
||||||
|
% \end{proof}
|
||||||
|
|
||||||
|
\begin{proof}[Proof (Jordan-form approach)]
|
||||||
|
As is $G$ strongly connected and aperiodic, and $\A$ is row stochastic, it holds that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item By \Cref{th:positive_matrix_digraph_connected}, $\A$ is primitive.
|
||||||
|
\item By \hyperref[th:perron_frobenius]{Perron-Frobenius Theorem} and \Cref{th:row_stochastic_unit_disk}, the eigenvalue $\lambda=1$ is strictly dominant and it is associated to the right eigenvector $\vec{1}$ (row stochasticity) and left eigenvector $\w$.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Consider the non-singular matrix $\matr{T} \in \R^{N \times N}$ defined as:
|
||||||
|
\[
|
||||||
|
\matr{T} = \begin{bmatrix}
|
||||||
|
\vert & \vert & & \vert \\
|
||||||
|
\vec{1} & \v^2 & \dots & \v^N \\
|
||||||
|
\vert & \vert & & \vert \\
|
||||||
|
\end{bmatrix} = \begin{bmatrix}
|
||||||
|
\vec{1} & \matr{W}_R
|
||||||
|
\end{bmatrix}
|
||||||
|
\qquad
|
||||||
|
\matr{T}^{-1} = \begin{bmatrix}
|
||||||
|
- & (\w)^T & - \\
|
||||||
|
- & (\w^2)^T & - \\
|
||||||
|
- & \vdots & - \\
|
||||||
|
- & (\w^N)^T & - \\
|
||||||
|
\end{bmatrix} = \begin{bmatrix}
|
||||||
|
\w^T \\ \matr{W}_L
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
|
||||||
|
A change in coordinates defined as:
|
||||||
|
\[
|
||||||
|
\x \mapsto \tilde{\x} = \matr{T}^{-1} \x
|
||||||
|
\]
|
||||||
|
allows to obtain the Jordan form $\matr{T}^{-1}\A\matr{T}$:
|
||||||
|
\[
|
||||||
|
\matr{T}^{-1}\A\matr{T} = \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
with $\matr{J}_2 \in \mathbb{R}^{(N-1) \times (N-1)}$ Schur (i.e., $\text{spec}(\matr{J}_2)$ inside the open unit disk).
|
||||||
|
|
||||||
|
The dynamics $\x^{k+1} = \A \x^k$ in the new coordinate system is:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\tilde{\x}^{k+1} &= \matr{T}^{-1} \x^{k+1} = \matr{T}^{-1} \A \matr{T} \tilde{\x}^k \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix} \tilde{\x}^k
|
||||||
|
= \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix}^{k+1} \tilde{\x}^0
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
Let's denote:
|
||||||
|
\[
|
||||||
|
\tilde{\x}^k = \matr{T}^{-1}\x^k = \begin{bmatrix}
|
||||||
|
\w^T\x^k \\ \matr{W}_L\x^k
|
||||||
|
\end{bmatrix}
|
||||||
|
= \begin{bmatrix}
|
||||||
|
\tilde{\x}^k_{m} \\ \tilde{\x}^k_{\bot}
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
We have that:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\lim_{k \rightarrow \infty} \tilde{\x}^k
|
||||||
|
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & \matr{J}_2 \\
|
||||||
|
\end{bmatrix}^k \tilde{\x}^0 \\
|
||||||
|
&= \lim_{k \rightarrow \infty} \begin{bmatrix}
|
||||||
|
1 & 0 & \dots \\
|
||||||
|
0 & & \\
|
||||||
|
\vdots & & (\matr{J}_2)^k \\
|
||||||
|
\end{bmatrix} \begin{bmatrix}
|
||||||
|
\tilde{\x}^0_{m} \\ \tilde{\x}^0_{\bot}
|
||||||
|
\end{bmatrix} \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
1 \cdot \tilde{\x}^0_{m} \\
|
||||||
|
\lim_{k \rightarrow \infty} (\matr{J}_2)^k \tilde{\x}^0_{\bot}
|
||||||
|
\end{bmatrix} \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
\w^T \x^0 \\
|
||||||
|
0
|
||||||
|
\end{bmatrix} \\
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
Note that $\lim_{k \rightarrow \infty} \matr{J}_2^k = 0$ as it is stable (i.e., all eigenvalues are in the open unit disk $|\mu| < 1$).
|
||||||
|
|
||||||
|
In the original coordinate system, the limit is:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\lim_{k \rightarrow \infty} \x^k
|
||||||
|
&= \lim_{k \rightarrow \infty} \matr{T} \tilde{\x}^k \\
|
||||||
|
&= \matr{T} \lim_{k \rightarrow \infty} \tilde{\x}^k \\
|
||||||
|
&= \begin{bmatrix}
|
||||||
|
\vec{1} & \matr{W}_R
|
||||||
|
\end{bmatrix} \begin{bmatrix}
|
||||||
|
\w^T \x^0 \\
|
||||||
|
0
|
||||||
|
\end{bmatrix}
|
||||||
|
= \vec{1} (\w^T \x^0)
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\indenttbox
|
||||||
|
\begin{remark}
|
||||||
|
It is assumed that $\Vert \w \Vert = 1$ (i.e., no normalization term).
|
||||||
|
\end{remark}
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
|
% \begin{proof}[Lyapunov approach]
|
||||||
|
% $\A - \vec{1}\w^T$ is rank-1. This is to change one specific eigenvalue (move 1 to 0).
|
||||||
|
|
||||||
|
% Dissensus vector represents error:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% delta^{k+1}
|
||||||
|
% = \x^{k+1} - \vec{1}\w^T \x^0 \\
|
||||||
|
% = \x^{k+1} - \vec{1}\w^T \x^{k+1} \\
|
||||||
|
% = (\matr{I} - \vec{1}\w^T) \x^{k+1} \\
|
||||||
|
% = (\matr{I} - \vec{1}\w^T) \A\x^{k} \\
|
||||||
|
% = (\A - \vec{1}\w^T) \x^{k} \\
|
||||||
|
% = (\A - \vec{1}\w^T) \delta^{k} \\
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
% Study:
|
||||||
|
% \[
|
||||||
|
% \delta^{k+1} = (\A - \vec{1}\w^T) \delta{k}
|
||||||
|
% \]
|
||||||
|
% If $\delta^k \rightarrow 0$, then $\x^k \rightarrow\vec{1}\w^T\x^0$.
|
||||||
|
% Note $(\A - \vec{1}\w^T)$ is Schur.
|
||||||
|
|
||||||
|
% Lyapunov equation for discrete time systems:
|
||||||
|
% \[
|
||||||
|
% \bar{\A}^T \matr{P} \bar{\A} = - \matr{P} = - \matr{Q}
|
||||||
|
% \]
|
||||||
|
% where $\bar{\A}$ is the Jordan-form of $(\A - \vec{1}\w^T)$
|
||||||
|
|
||||||
|
% Select $Q_2$ to be block-diagonal and $p_1$
|
||||||
|
|
||||||
|
|
||||||
|
% \[
|
||||||
|
% V(\delta) = \delta^T (\matr{T}^{-1})^T \matr{P} \matr{T}^{-1} \delta
|
||||||
|
% \]
|
||||||
|
% \end{proof}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\end{subappendices}
|
||||||
@ -749,160 +749,206 @@
|
|||||||
\rho \Vert \z_i^{k+1} - \z^* \Vert \leq \rho^k \Vert \z_i^0 - \z^* \Vert
|
\rho \Vert \z_i^{k+1} - \z^* \Vert \leq \rho^k \Vert \z_i^0 - \z^* \Vert
|
||||||
\]
|
\]
|
||||||
|
|
||||||
{
|
\indenttbox
|
||||||
\indenttbox
|
\begin{remark}
|
||||||
\begin{remark}
|
It can be shown that gradient tracking also works with non-convex optimization and, under the correct assumptions, converges to a stationary point.
|
||||||
It can be shown that gradient tracking also works with non-convex optimization and, under the correct assumptions, converges to a stationary point.
|
\end{remark}
|
||||||
\end{remark}
|
|
||||||
}
|
|
||||||
|
|
||||||
\begin{proof}
|
|
||||||
Consider the gradient tracking algorithm written in matrix form:
|
|
||||||
\[
|
|
||||||
\begin{aligned}
|
|
||||||
\z^{k+1} &= \A \z^k - \alpha \s^k \\
|
|
||||||
\s^{k+1} &= \A \s^k + (\nabla \vec{l}(\z^{k+1}) - \nabla \vec{l}(\z^k))
|
|
||||||
\end{aligned}
|
|
||||||
\]
|
|
||||||
where $\nabla \vec{l}(\z^k) = \begin{bmatrix} l_1(\z^k_1) & \dots & l_N(\z^k_N) \end{bmatrix}$.
|
|
||||||
|
|
||||||
% \begin{remark}
|
|
||||||
% In the vector case, the Kronecker product should be applied on $\A$.
|
|
||||||
% \end{remark}
|
|
||||||
|
|
||||||
\begin{description}
|
|
||||||
\item[Equilibrium]
|
|
||||||
We want to find the equilibrium points $(\z_\text{eq}, \s_\text{eq})$ that satisfies:
|
|
||||||
\[
|
|
||||||
\begin{aligned}
|
|
||||||
\s_\text{eq} &= \A \s_\text{eq} + \nabla \vec{l}(\z_\text{eq}) - \nabla \vec{l}(\z_\text{eq}) &\iff& (\matr{I} - \A) \s_\text{eq} = 0 \\
|
|
||||||
\z_\text{eq} &= \A\z_\text{eq} - \alpha \s_\text{eq} &\iff& (\matr{I} - \A) \z_\text{eq} = -\alpha \s_\text{eq} \\
|
|
||||||
\end{aligned}
|
|
||||||
\]
|
|
||||||
It must be that:
|
|
||||||
\begin{itemize}
|
|
||||||
\item $\s_\text{eq} \in \text{ker}(\matr{I} - \A) = \{ \vec{1}\beta_1 \mid \beta_1 \in \R \}$ (as $\A$ is doubly stochastic).
|
|
||||||
\item $(\matr{I} - \A) \z_\text{eq} = - \alpha \vec{1} \beta_1$. As $\vec{1} (-\alpha \beta_1) \in \text{ker}(\matr{I} - \A)$, it must be that $\beta_1 = 0$ (as the image cannot be mapped into the kernel).
|
|
||||||
\end{itemize}
|
|
||||||
Therefore, we end up with:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\s_\text{eq} &= \vec{1}\beta_1 = 0 \\
|
|
||||||
\z_\text{eq} &= \A\z_\text{eq} - \alpha 0 = \matr{1} \beta_2 \quad \text{ i.e., eigenvector of $\A$} \\
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
|
|
||||||
In addition, by pre-multiplying the equation of $\s$ by $\vec{1}^T$, we obtain:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\vec{1}^T \s^{k+1} &= \vec{1}^T \A \s^k + \vec{1}^T \nabla \vec{l}(\z^{k+1}) - \vec{1}^T \nabla \vec{l}(\z^{k}) \\
|
|
||||||
&= \vec{1}^T \s^k + \vec{1}^T \nabla \vec{l}(\z^{k+1}) - \vec{1}^T \nabla \vec{l}(\z^{k})
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
Which shows the following invariance condition:
|
|
||||||
\[
|
|
||||||
\begin{aligned}
|
|
||||||
\vec{1}^T \s^{k+1} - \vec{1}^T \nabla \vec{l}(\z^{k+1})
|
|
||||||
&= \vec{1}^T \s^k - \vec{1}^T \nabla \vec{l}(\z^{k}) \\
|
|
||||||
&= \vec{1}^T \s_\text{eq} - \vec{1}^T \nabla \vec{l}(\z_\text{eq}) \\
|
|
||||||
&= \vec{1}^T \s^0 - \vec{1}^T \nabla \vec{l}(\z^{0}) \\
|
|
||||||
\end{aligned}
|
|
||||||
\]
|
|
||||||
Thus, we have that:
|
|
||||||
\[
|
|
||||||
\begin{split}
|
|
||||||
\vec{1}^T \s_\text{eq} - \vec{1}^T \nabla \vec{l}(\z_\text{eq})
|
|
||||||
&= \vec{1}^T \s^0 - \vec{1}^T \nabla \vec{l}(\z^{0}) \\
|
|
||||||
\iff 0 - \vec{1}^T \nabla \vec{l}(\vec{1}\beta_2) &= 0 \\
|
|
||||||
\end{split}
|
|
||||||
\]
|
|
||||||
Then, it must be that $\z_\text{eq} = \vec{1}\beta_2$ is an optimum with $\beta_2 = z^*$.
|
|
||||||
|
|
||||||
\item[Stability]
|
|
||||||
% Change in coordinates to avoid having $\z^{k+1}$ in $\s^{k}$. The (non-linear) transformation is:
|
|
||||||
% \[
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^k \\ \s^k
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \mapsto
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^k \\ \vec{\xi}^k
|
|
||||||
% \end{bmatrix}
|
|
||||||
% =
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^k \\ \alpha (\nabla \vec{l}(\z^k) - \s^k)
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% \z^{k+1}
|
|
||||||
% &= \A\z^k - \alpha ( \frac{1}{\alpha} \vec{\xi}^k + \nabla \vec{l}(\z^k) ) \\
|
|
||||||
% \vec{\xi}^k
|
|
||||||
% &= \alpha \nabla \vec{l}(\z^{k+1}) - \alpha (\A \s^k + \nabla \vec{l}(\z^{k+1}) - \nabla \vec{l} (\z^k)) \\
|
|
||||||
% &= - \alpha \A (-\frac{1}{\alpha} \xi^k + \nabla \vec{l}(\z^k)) + \alpha \nabla \vec{l}(\z^k) \\
|
|
||||||
% &= \A \vec{\xi}^k - \alpha(\A - \vec{I}) \nabla \vec{l}(\z^k)
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
% In matrix form:
|
|
||||||
% \[
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^{k+1} \\ \vec{\xi}^{k+1} = \begin{bmatrix}
|
|
||||||
% \A & \matr{I} \\ 0 & \A
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^k \\ \vec{\xi}^k
|
|
||||||
% \end{bmatrix}
|
|
||||||
% - alpha \begin{bmatrix}
|
|
||||||
% \matr{I} \\ \A \matr{I}
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \nabla \vec{l}(\z^k)
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \]
|
|
||||||
% The initialization is:
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% \z^0 \in \R^N \\
|
|
||||||
% \vec{\xi}^{0} = \alpha (\nabla \vec{l}(\z^0) - \s^0) = 0
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
% The equilibrium has been shifted to:
|
|
||||||
% \[
|
|
||||||
% \begin{split}
|
|
||||||
% \z_\text{eq} = \vec{1} \z^* \\
|
|
||||||
% \vec{\xi}_\text{eq} = \alpha \nabla l(\vec{1} \z^*) = \alpha \begin{bmatrix}
|
|
||||||
% \nabla l_1(\z^*) \\ \vdots \\ \nabla l_N(\z^*)
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \end{split}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
|
|
||||||
% \[
|
|
||||||
% \begin{gathered}
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^{k+1} \\ \vec{\xi}^{k+1} = \begin{bmatrix}
|
|
||||||
% \A & \matr{I} \\ 0 & \A
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^k \\ \vec{\xi}^k
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \matr{I} \\ \A \matr{I}
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \u^k
|
|
||||||
% \end{bmatrix} \\
|
|
||||||
% \vec{y}^k = \begin{bmatrix}
|
|
||||||
% \matr{I} & 0
|
|
||||||
% \end{bmatrix}
|
|
||||||
% \begin{bmatrix}
|
|
||||||
% \z^k \\ \vec{\xi}^{k}
|
|
||||||
% \end{bmatrix} \\
|
|
||||||
% -- \\
|
|
||||||
% \u^k = \nabla \vec{l}(\vec{y}^k)
|
|
||||||
% \end{gathered}
|
|
||||||
% \]
|
|
||||||
\end{description}
|
|
||||||
\end{proof}
|
|
||||||
\end{theorem}
|
\end{theorem}
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{subappendices}
|
||||||
|
|
||||||
|
\section{Appendix: Gradient tracking optimality and stability proof}
|
||||||
|
|
||||||
|
\begin{theorem}[Gradient tracking algorithm optimality] \marginnote{Gradient tracking algorithm optimality}
|
||||||
|
If:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\matr{A}$ is the adjacency matrix of an undirected and connected communication graph $G$ such that it is doubly stochastic and $a_{ij} > 0$.
|
||||||
|
\item Each cost function $l_i$ is $\mu$-strongly convex and its gradient $L$-Lipschitz continuous.
|
||||||
|
\end{itemize}
|
||||||
|
Then, there exists $\alpha^* > 0$ such that, for any choice of the step size $\alpha \in (0, \alpha^*)$, the sequence of local solutions $\{ \z_i^k \}_{k \in \mathbb{N}}$ of each agent generated by the gradient tracking algorithm asymptotically converges to a consensual optimal solution $\z^*$:
|
||||||
|
\[ \lim_{k \rightarrow \infty} \Vert \z_i^k - \z^* \Vert = 0 \]
|
||||||
|
|
||||||
|
Moreover, the convergence rate is linear and stability is exponential:
|
||||||
|
\[
|
||||||
|
\exists \rho \in (0,1): \Vert \z_i^k - \z^* \Vert \leq \rho \Vert \z_i^{k+1} - \z^* \Vert
|
||||||
|
\,\,\land\,\,
|
||||||
|
\rho \Vert \z_i^{k+1} - \z^* \Vert \leq \rho^k \Vert \z_i^0 - \z^* \Vert
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{proof}
|
||||||
|
Consider the gradient tracking algorithm written in matrix form:
|
||||||
|
\[
|
||||||
|
\begin{aligned}
|
||||||
|
\z^{k+1} &= \A \z^k - \alpha \s^k \\
|
||||||
|
\s^{k+1} &= \A \s^k + (\nabla \vec{l}(\z^{k+1}) - \nabla \vec{l}(\z^k))
|
||||||
|
\end{aligned}
|
||||||
|
\]
|
||||||
|
where $\nabla \vec{l}(\z^k) = \begin{bmatrix} l_1(\z^k_1) & \dots & l_N(\z^k_N) \end{bmatrix}$.
|
||||||
|
|
||||||
|
% \begin{remark}
|
||||||
|
% In the vector case, the Kronecker product should be applied on $\A$.
|
||||||
|
% \end{remark}
|
||||||
|
|
||||||
|
\begin{description}
|
||||||
|
\item[Equilibrium]
|
||||||
|
We want to find the equilibrium points $(\z_\text{eq}, \s_\text{eq})$ that satisfies:
|
||||||
|
\[
|
||||||
|
\begin{aligned}
|
||||||
|
\s_\text{eq} &= \A \s_\text{eq} + \nabla \vec{l}(\z_\text{eq}) - \nabla \vec{l}(\z_\text{eq}) &\iff& (\matr{I} - \A) \s_\text{eq} = 0 \\
|
||||||
|
\z_\text{eq} &= \A\z_\text{eq} - \alpha \s_\text{eq} &\iff& (\matr{I} - \A) \z_\text{eq} = -\alpha \s_\text{eq} \\
|
||||||
|
\end{aligned}
|
||||||
|
\]
|
||||||
|
It must be that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\s_\text{eq} \in \text{ker}(\matr{I} - \A) = \{ \vec{1}\beta_1 \mid \beta_1 \in \R \}$ (as $\A$ is doubly stochastic).
|
||||||
|
\item $(\matr{I} - \A) \z_\text{eq} = - \alpha \vec{1} \beta_1$. As $\vec{1} (-\alpha \beta_1) \in \text{ker}(\matr{I} - \A)$, it must be that $\beta_1 = 0$ (as the image cannot be mapped into the kernel).
|
||||||
|
\end{itemize}
|
||||||
|
Therefore, we end up with:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\s_\text{eq} &= \vec{1}\beta_1 = 0 \\
|
||||||
|
\z_\text{eq} &= \A\z_\text{eq} - \alpha 0 = \matr{1} \beta_2 \quad \text{ i.e., eigenvector of $\A$} \\
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
|
||||||
|
In addition, by pre-multiplying the equation of $\s$ by $\vec{1}^T$, we obtain:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\vec{1}^T \s^{k+1} &= \vec{1}^T \A \s^k + \vec{1}^T \nabla \vec{l}(\z^{k+1}) - \vec{1}^T \nabla \vec{l}(\z^{k}) \\
|
||||||
|
&= \vec{1}^T \s^k + \vec{1}^T \nabla \vec{l}(\z^{k+1}) - \vec{1}^T \nabla \vec{l}(\z^{k})
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
Which shows the following invariance condition:
|
||||||
|
\[
|
||||||
|
\begin{aligned}
|
||||||
|
\vec{1}^T \s^{k+1} - \vec{1}^T \nabla \vec{l}(\z^{k+1})
|
||||||
|
&= \vec{1}^T \s^k - \vec{1}^T \nabla \vec{l}(\z^{k}) \\
|
||||||
|
&= \vec{1}^T \s_\text{eq} - \vec{1}^T \nabla \vec{l}(\z_\text{eq}) \\
|
||||||
|
&= \vec{1}^T \s^0 - \vec{1}^T \nabla \vec{l}(\z^{0}) \\
|
||||||
|
\end{aligned}
|
||||||
|
\]
|
||||||
|
Thus, we have that:
|
||||||
|
\[
|
||||||
|
\begin{split}
|
||||||
|
\vec{1}^T \s_\text{eq} - \vec{1}^T \nabla \vec{l}(\z_\text{eq})
|
||||||
|
&= \vec{1}^T \s^0 - \vec{1}^T \nabla \vec{l}(\z^{0}) \\
|
||||||
|
\iff 0 - \vec{1}^T \nabla \vec{l}(\vec{1}\beta_2) &= 0 \\
|
||||||
|
\end{split}
|
||||||
|
\]
|
||||||
|
Then, it must be that $\z_\text{eq} = \vec{1}\beta_2$ is an optimum with $\beta_2 = z^*$.
|
||||||
|
|
||||||
|
\item[Stability]
|
||||||
|
Apply the following change in coordinates:
|
||||||
|
\[
|
||||||
|
\begin{bmatrix}
|
||||||
|
\z^k \\ \xi^k
|
||||||
|
\end{bmatrix}
|
||||||
|
\mapsto
|
||||||
|
\begin{bmatrix}
|
||||||
|
\tilde{\z}^k \\ \tilde{\xi}^k
|
||||||
|
\end{bmatrix}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
\z^k - \vec{1}z^* \\
|
||||||
|
\xi^k - \alpha \nabla \vec{l}(\vec{1}z^*)
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
So that the equilibrium of the system is shifted to $0$.
|
||||||
|
|
||||||
|
Then, exploit strong convexity to re-formulate the overall system in such a way that the Lyapunov theorem can be applied to prove exponential stability.
|
||||||
|
% Change in coordinates to avoid having $\z^{k+1}$ in $\s^{k}$. The (non-linear) transformation is:
|
||||||
|
% \[
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^k \\ \s^k
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \mapsto
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^k \\ \vec{\xi}^k
|
||||||
|
% \end{bmatrix}
|
||||||
|
% =
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^k \\ \alpha (\nabla \vec{l}(\z^k) - \s^k)
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% \z^{k+1}
|
||||||
|
% &= \A\z^k - \alpha ( \frac{1}{\alpha} \vec{\xi}^k + \nabla \vec{l}(\z^k) ) \\
|
||||||
|
% \vec{\xi}^k
|
||||||
|
% &= \alpha \nabla \vec{l}(\z^{k+1}) - \alpha (\A \s^k + \nabla \vec{l}(\z^{k+1}) - \nabla \vec{l} (\z^k)) \\
|
||||||
|
% &= - \alpha \A (-\frac{1}{\alpha} \xi^k + \nabla \vec{l}(\z^k)) + \alpha \nabla \vec{l}(\z^k) \\
|
||||||
|
% &= \A \vec{\xi}^k - \alpha(\A - \vec{I}) \nabla \vec{l}(\z^k)
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
% In matrix form:
|
||||||
|
% \[
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^{k+1} \\ \vec{\xi}^{k+1} = \begin{bmatrix}
|
||||||
|
% \A & \matr{I} \\ 0 & \A
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^k \\ \vec{\xi}^k
|
||||||
|
% \end{bmatrix}
|
||||||
|
% - \alpha \begin{bmatrix}
|
||||||
|
% \matr{I} \\ \A \matr{I}
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \nabla \vec{l}(\z^k)
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \]
|
||||||
|
% The initialization is:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% \z^0 \in \R^N \\
|
||||||
|
% \vec{\xi}^{0} = \alpha (\nabla \vec{l}(\z^0) - \s^0) = 0
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
% The equilibrium has been shifted to:
|
||||||
|
% \[
|
||||||
|
% \begin{split}
|
||||||
|
% \z_\text{eq} = \vec{1} \z^* \\
|
||||||
|
% \vec{\xi}_\text{eq} = \alpha \nabla l(\vec{1} \z^*) = \alpha \begin{bmatrix}
|
||||||
|
% \nabla l_1(\z^*) \\ \vdots \\ \nabla l_N(\z^*)
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \end{split}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
|
||||||
|
% \[
|
||||||
|
% \begin{gathered}
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^{k+1} \\ \vec{\xi}^{k+1} = \begin{bmatrix}
|
||||||
|
% \A & \matr{I} \\ 0 & \A
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^k \\ \vec{\xi}^k
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \matr{I} \\ \A \matr{I}
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \u^k
|
||||||
|
% \end{bmatrix} \\
|
||||||
|
% \vec{y}^k = \begin{bmatrix}
|
||||||
|
% \matr{I} & 0
|
||||||
|
% \end{bmatrix}
|
||||||
|
% \begin{bmatrix}
|
||||||
|
% \z^k \\ \vec{\xi}^{k}
|
||||||
|
% \end{bmatrix} \\
|
||||||
|
% -- \\
|
||||||
|
% \u^k = \nabla \vec{l}(\vec{y}^k)
|
||||||
|
% \end{gathered}
|
||||||
|
% \]
|
||||||
|
|
||||||
|
|
||||||
|
% Take Lyapunov function $V(\tilde{\z}, \xi^k)$ and check whether:
|
||||||
|
% \[
|
||||||
|
% V(\tilde{\z}^{k+1}, \tilde{\xi}^{k+1}) - V(\tilde{\z}^{k}, \tilde{\xi}^{k}) \leq 0
|
||||||
|
% \]
|
||||||
|
\end{description}
|
||||||
|
\end{proof}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\end{subappendices}
|
||||||
|
|||||||
Reference in New Issue
Block a user