unibo-ai-notes/src/year2/distributed-autonomous-systems/sections/_averaging_systems.tex

\chapter{Averaging systems}


\begin{description}
    \item[Distributed algorithm] \marginnote{Distributed algorithm}
        Given a network of $N$ agents that communicate according to a (fixed) digraph $G$ (each agent receives messages from its in-neighbors), a distributed algorithm computes:
        \[ x_i^{k+1} = \stf_i(x_i^k, \{ x_j^k \}_{j \in \mathcal{N}_i^\text{IN}}) \quad \forall i \in \{ 1, \dots, N \} \]
        where $x_i^k$ is the state of agent $i$ at time $k$ and $\stf_i$ is a local state transition function that depends on the current input states.

        \begin{remark}
            Out-neighbors can also be used.
        \end{remark}

        \begin{remark}
            If all nodes have a self-loop, the notation can be compacted as:
            \[
            x_i^{k+1} = \stf_i(\{ x_j \}_{j \in \mathcal{N}_i^\text{IN}})
            \quad
            \text{or}
            \quad
            x_i^{k+1} = \stf_i(\{ x_j \}_{j \in \mathcal{N}_i^\text{OUT}})
            \]
        \end{remark}
\end{description}


\section{Discrete-time averaging algorithm}

\begin{description}
    \item[Linear averaging distributed algorithm (in-neighbors)] \marginnote{Linear averaging distributed algorithm (in-neighbors)}
        Given the communication digraph with self-loops $G^\text{comm} = (I, E)$ (i.e., $(j, i) \in E$ indicates that $j$ sends messages to $i$), a linear averaging distributed algorithm is defined as:
        \[ x_i^{k+1} = \sum_{j \in \mathcal{N}_i^\text{IN}} a_{ij} x_j^k \quad i \in \{1, \dots, N\} \]
        where $a_{ij} > 0$ is the weight of the edge $(j, i) \in E$.

        \begin{description}
            \item[Linear time-invariant (LTI) autonomous system] \marginnote{Linear time-invariant (LTI) autonomous system}
                By defining $a_{ij} = 0$ for $(j, i) \notin E$, the formulation becomes:
                \[ x_i^{k+1} = \sum_{j=1}^N a_{ij} x_j^k  \quad i \in \{ 1, \dots, N \} \]

                In matrix form, it becomes:
                \[ \vec{x}^{k+1} = \matr{A}^T \vec{x}^k \]
                where $\matr{A}$ is the adjacency matrix of $G^\text{comm}$.

                \begin{remark}
                    This model is inconsistent with respect to graph theory as weights are inverted (i.e., $a_{ij}$ refers to the edge $(j, i)$).
                \end{remark}
        \end{description}

    \item[Linear averaging distributed algorithm (out-neighbors)] \marginnote{Linear averaging distributed algorithm (out-neighbors)}
        Given a fixed sensing digraph with self-loops $G^\text{sens} = (I, E)$ (i.e., $(i, j) \in E$ indicates that $j$ sends messages to $i$), the algorithm is defined as:
        \[ x_i^{k+1} = \sum_{j \in \mathcal{N}_i^\text{OUT}} a_{ij} x_j^k = \sum_{j=1}^{N} a_{ij} x_j^k \]
        In matrix form, it becomes:
        \[ \vec{x}^{k+1} = \matr{A} \vec{x}^k \]
        where $\matr{A}$ is the weighted adjacency matrix of $G^\text{sens}$.
\end{description}


\subsection{Stochastic matrices}

\begin{description}
    \item[Row stochastic] \marginnote{Row stochastic}
        Given a square matrix $\matr{A}$, it is row stochastic if its rows sum to 1:
        \[ \matr{A}\vec{1} = \vec{1} \]

    \item[Column stochastic] \marginnote{Column stochastic}
        Given a square matrix $\matr{A}$, it is column stochastic if its columns sum to 1:
        \[ \matr{A}^T\vec{1} = \vec{1} \]

    \item[Doubly stochastic] \marginnote{Doubly stochastic}
        Given a square matrix $\matr{A}$, it is doubly stochastic if it is both row and column stochastic.
\end{description}

\begin{lemma}
    An adjacency matrix $\matr{A}$ is doubly stochastic if it is row stochastic and the graph $G$ associated to it is weight balanced and has positive weights.
\end{lemma}

\begin{lemma} \phantomsection\label{th:strongly_connected_eigenvalues}
    Given a digraph $G$ with adjacency matrix $\matr{A}$, if $G$ is strongly connected and aperiodic, and $\matr{A}$ is row stochastic, its eigenvalues are such that:
    \begin{itemize}
        \item $\lambda = 1$ is a simple eigenvalue (i.e., algebraic multiplicity of 1),
        \item All others $\mu$ are $|\mu| < 1$.
    \end{itemize}

    \indenttbox
    \begin{remark}
        For the lemma to hold, it is necessary and sufficient that $G$ contains a globally reachable node and the subgraph of globally reachable nodes is aperiodic.
    \end{remark}
\end{lemma}


\subsection{Consensus}

\begin{theorem}[Discrete-time consensus] \marginnote{Discrete-time consensus}
    Consider a discrete-time averaging system with digraph $G$ and weighted adjacency matrix $\matr{A}$. Assume $G$ strongly connected and aperiodic, and $\matr{A}$ row stochastic.

    It holds that there exists a left eigenvector $\vec{w} \in \mathbb{R}^N$, $\vec{w} > 0$ such that the consensus converges to:
        \[
            \lim_{k \rightarrow \infty} \vec{x}^k
            = \vec{1}\frac{\vec{w}^T \vec{x}^0}{\vec{w}^T\vec{1}}
            = \begin{bmatrix} 1 \\ \vdots \\ 1 \end{bmatrix} \frac{\sum_{i=1}^N w_i x_i^0}{\sum_{j=1}^N w_j}
            = \begin{bmatrix} 1 \\ \vdots \\ 1 \end{bmatrix} \sum_{i=1}^N \frac{w_i}{\sum_{j=1}^N w_j} x_i^0
        \]
        where $\tilde{w}_i = \frac{w_i}{\sum_{i=j}^N w_j}$ are all normalized and sum to 1 (i.e., they produce a convex combination).

    Moreover, if $\matr{A}$ is doubly stochastic, then it holds that the consensus is the average:
    \[
        \lim_{k \rightarrow \infty} \vec{x}^k = \vec{1} \frac{1}{N} \sum_{i=1}^N x_i^0
    \]

    \begin{proof}[Sketch of proof]
        Let $\matr{T} = \begin{bmatrix} \vec{1} & \vec{v}^2 & \cdots & \vec{v}^N \end{bmatrix}$ be a change in coordinates that transforms an adjacency matrix into its Jordan form $\matr{J}$:
        \[ \matr{J} = \matr{T}^{-1} \matr{A} \matr{T} \]
        As $\lambda=1$ is a simple eigenvalue (\Cref{th:strongly_connected_eigenvalues}), it holds that:
        \[
            \matr{J} = \begin{bmatrix}
                1 & 0 & \cdots & 0 \\
                0 & & &  \\
                \vdots & & \matr{J}_2 &  \\
                0 & & &  \\
            \end{bmatrix}
        \]
        where the eigenvalues of $\matr{J}_2 \in \mathbb{R}^{(N-1) \times (N-1)}$ lie inside the open unit disk.

        Let $\vec{x}^k = \matr{T}\bar{\vec{x}}^k$, then we have that:
        \[
            \begin{split}
                &\vec{x}^{k+1} = \matr{A} \vec{x}^{k} \\
                &\iff \matr{T} \bar{\vec{x}}^{k+1} = \matr{A} (\matr{T} \bar{\vec{x}}^k) \\
                &\iff \bar{\vec{x}}^{k+1} = \matr{T}^{-1} \matr{A} (\matr{T} \bar{\vec{x}}^k) = \matr{J}\bar{\vec{x}}^k
            \end{split}
        \]
        Therefore:
        \[
            \begin{gathered}
                \lim_{k \rightarrow \infty} \bar{\vec{x}}^k = \bar{x}_1^0 \begin{bmatrix} 1 \\ 0 \\ \vdots \\ 0 \end{bmatrix} \\
                \bar{x}_1^{k+1} = \bar{x}_1^k \quad \forall k \geq 0 \\
                \lim_{k \rightarrow \infty} \bar{x}_i^{k} = 0 \quad \forall i = 2, \dots, N \\
            \end{gathered}
        \]
    \end{proof}
\end{theorem}

\begin{example}[Metropolis-Hasting weights]
    Given an undirected unweighted graph $G$ with edges of degrees $d_1, \dots, d_n$, Metropolis-Hasting weights are defined as:
    \[
        a_{ij} = \begin{cases}
            \frac{1}{1+\max\{ d_i, d_j \}} & \text{if $(i, j) \in E$ and $i \neq j$} \\
            1 - \sum_{h \in \mathcal{N}_i \smallsetminus \{i\}} a_{ih} & \text{if $i=j$} \\
            0 & \text{otherwise}
        \end{cases}
    \]
    The matrix $\matr{A}$ of Metropolis-Hasting weights is symmetric and doubly stochastic.
\end{example}


\section{Discrete-time averaging algorithm over time-varying graphs}


\subsection{Time-varying digraphs}

\begin{description}
    \item[Time-varying digraph] \marginnote{Time-varying digraph}
        Graph $G=(I, E(k))$ that changes at each iteration $k$. It can be described by a sequence $\{ G(k) \}_{k \geq 0}$.

    \item[Jointly strongly connected digraph] \marginnote{Jointly strongly connected digraph}
        Time-varying digraph that is asymptotically strongly connected:
        \[ \forall k \geq 0: \bigcup_{\tau=k}^{+\infty} G(\tau) \text{ is strongly connected} \]

    \item[Uniformly jointly strongly/$B$-strongly connected digraph]   \marginnote{Uniformly jointly strongly/$B$-strongly connected digraph}
        Time-varying digraph that is strongly connected in $B$ steps:
        \[ \forall k \geq 0, \exists B \in \mathbb{N}: \bigcup_{\tau=k}^{k+B} G(\tau) \text{ is strongly connected} \]
\end{description}

\begin{remark}
    (Uniformly) jointly strongly connected digraph can be disconnected at some time steps $k$.
\end{remark}

\begin{description}
    \item[Averaging distributed algorithm] \marginnote{Averaging distributed algorithm over time-varying digraph}
        Given a time-varying digraph $\{ G(k) \}_{k \geq 0}$ (always with self-loops), in- and out-neighbors distributed algorithms can be formulated as:
        \[
            x_i^{k+1} = \sum_{j \in \mathcal{N}_i^\text{IN}(k)} a_{ij}(k) x_j^k
            \quad
            x_i^{k+1} = \sum_{j \in \mathcal{N}_i^\text{OUT}(k)} a_{ij}(k) x_j^k
        \]

        \begin{description}
            \item[Linear time-varying (LTV) discrete-time system] \marginnote{Linear time-varying (LTV) discrete-time system}
                In matrix form, it can be formulated as:
                \[ \vec{x}^{k+1} = \matr{A}(k) \vec{x}^k \]
        \end{description}
\end{description}


\subsection{Consensus}

\begin{theorem}[Discrete-time consensus over time-varying graphs] \marginnote{Discrete-time consensus over time-varying graphs}
    Consider a time-varying discrete-time average system with digraphs $\{G(k)\}_{k \geq 0}$ (all with self-loops) and weighted adjacency matrices $\{\matr{A}(k)\}_{k \geq 0}$. Assume:
    \begin{itemize}
        \item Each non-zero edge weight $a_{ij}(k)$, self-loops included, are larger than a constant $\varepsilon > 0$,
        \item There exists $B \in \mathbb{N}$ such that $\{G(k)\}_{k \geq 0}$ is $B$-strongly connected.
    \end{itemize}

    It holds that there exists a vector $\vec{w} \in \mathbb{R}^N$, $\vec{w} > 0$ such that the consensus converges to:
        \[
            \lim_{k \rightarrow \infty} \vec{x}^k
            = \vec{1}\frac{\vec{w}^T \vec{x}^0}{\vec{w}^T\vec{1}}
        \]

    Moreover, if each $\matr{A}(k)$ is doubly stochastic, it holds that the consensus is the average:
    \[
        \lim_{k \rightarrow \infty} \vec{x}^k = \vec{1} \frac{1}{N} \sum_{i=1}^N x_i^0
    \]
\end{theorem}


\section{Continuous-time averaging algorithm}

\subsection{Laplacian dynamics}

\begin{description}
    \item[Network of dynamic systems] \marginnote{Network of dynamic systems}
        Network described by the ODEs:
        \[ \dot{x}_i(t) = u_i(t) \quad \forall i \in \{ 1, \dots, N \} \]
        with states $x_i \in \mathbb{R}$, inputs $u_i \in \mathbb{R}$, and communication following a digraph $G$.

    \item[Laplacian dynamics system] \marginnote{Laplacian dynamics system}
        Consider a network of dynamic systems where $u_i$ is defined as a proportional controller (i.e., only communicating $(i, j)$ have a non-zero weight $a_{ij}$):
        \[
            \begin{split}
                u_i(t)
                    &= - \sum_{j \in \mathcal{N}_i^\text{OUT}} a_{ij} \Big( x_i(t) - x_j(t) \Big) \\
                    &= - \sum_{j=1}^{N} a_{ij} \Big( x_i(t) - x_j(t) \Big)
            \end{split}
        \]

        \begin{remark}
            With this formulation, consensus can be seen as the problem of minimizing the error defined as the difference between the states of two nodes.
        \end{remark}

        \begin{remark}
            A definition with in-neighbors also exists.
        \end{remark}

        % \[
        %     \dot{x}_i(t) =
        %     -\sum_{j \in \mathcal{N}_i^\text{OUT}} a_{ij} (x_i(t) - x_j(t))
        %     -\sum_{j=1}^N a_{ij} (x_i(t) - x_j(t))
        % \]
        % $a_{ij} = 0$ if $(i, j) \notin E$.

        \begin{theorem}[Linear time invariant (LTI) continuous-time system] \phantomsection\label{th:lti_continuous} \marginnote{Linear time invariant (LTI) continuous-time system}
            With $\vec{x} = \begin{bmatrix} x_1 & \dots & x_N \end{bmatrix}^T$, the system can be written in matrix form as:
            \[ \dot{\vec{x}}(t) = - \matr{L} \vec{x}(t) \]
            where $\matr{L}$ is the Laplacian associated with the communication digraph $G$.

            \begin{proof}
                The system is defined as:
                \[
                    \dot{x}_i(t) = - \sum_{j=1}^{N} a_{ij} \Big( x_i(t) - x_j(t) \Big)
                \]
                By rearranging, we have that:
                \[
                    \begin{split}
                        \dot{x}_i(t)
                            &= - \left( \sum_{j=1}^{N} a_{ij} \right) x_i(t) + \sum_{j=1}^{N} a_{ij} x_j(t) \\
                            &= -\outdeg[i] x_i(t) + (\matr{A}\vec{x}(t))_i
                    \end{split}
                \]
                Which in matrix form is:
                \[
                    \begin{split}
                        \dot{\vec{x}}(t)
                            &= - \matr{D}^\text{OUT} \vec{x}(t) + \matr{A} \vec{x}(t) \\
                            &= - (\matr{D}^\text{OUT} - \matr{A}) \vec{x}(t)
                    \end{split}
                \]
                By definition, $\matr{L} = \matr{D}^\text{OUT} - \matr{A}$. Therefore, we have that:
                \[ \dot{\vec{x}}(t) = - \matr{L} \vec{x}(t) \]
            \end{proof}
        \end{theorem}

        \begin{remark}
            By \Cref{th:lti_continuous}, row/column stochasticity is not required for consensus. Instead, the requirement is for the matrix to be the Laplacian.
        \end{remark}
\end{description}


\subsection{Consensus}

\begin{lemma}
    It holds that:
    \[
        \matr{L}\vec{1}
        = \matr{D}^\text{OUT} \vec{1} - \matr{A}\vec{1}
        = \begin{bmatrix} \outdeg[1] \\ \vdots \\ \outdeg[i] \end{bmatrix} - \begin{bmatrix} \outdeg[1] \\ \vdots \\ \outdeg[i] \end{bmatrix}
        = 0
    \]
\end{lemma}

\begin{lemma} \phantomsection\label{th:weighted_laplacian_eigenvalues}
    The Laplacian $\matr{L}$ of a weighted digraph has an eigenvalue $\lambda=0$ and all the others have strictly positive real part.
\end{lemma}

\begin{lemma}
    Given a weighted digraph $G$ with Laplacian $\matr{L}$, the following are equivalent:
    \begin{itemize}
        \item $G$ is weight balanced.
        \item $\vec{1}$ is a left eigenvector of $\matr{L}$: $\vec{1}^T\matr{L} = 0$ with eigenvalue $0$.
    \end{itemize}
\end{lemma}

\begin{lemma} \phantomsection\label{th:connected_simple_eigenvalue}
    If a weighted digraph $G$ is strongly connected, then $\lambda = 0$ is a simple eigenvalue of $\matr{L}$.
\end{lemma}

\begin{theorem}[Continuous-time consensus] \marginnote{Continuous-time consensus}
    Consider a continuous-time average system with a strongly connected weighted digraph $G$ and Laplacian $\matr{L}$. Assume that the system follows the Laplacian dynamics $\dot{\vec{x}}(t) = - \matr{L}\vec{x}(t)$ for $t \geq 0$.

    It holds that there exists a left eigenvector $\vec{w}$ of $\matr{L}$ with eigenvalue $\lambda=0$ such that the consensus converges to:
    \[
        \lim_{t \rightarrow \infty} \vec{x}(t) = \vec{1} \left( \frac{\vec{w}^T \vec{x}(0)}{\vec{w}^T \vec{1}} \right)
    \]

    Moreover, if $G$ is weight balanced, then it holds that the consensus is the average:
    \[
        \lim_{t \rightarrow \infty} \vec{x}(t) = \vec{1} \frac{\sum_{i=1}^N x_i(0)}{N}
    \]

    % \begin{proof}

    % \end{proof}
\end{theorem}

\begin{remark}
    The result also holds for unweighted digraphs as $\vec{1}$ is both a left and right eigenvector of $\matr{L}$.
\end{remark}