Rearranged sections

This commit is contained in:
2025-04-06 15:42:39 +02:00
parent f7e815cc8c
commit fe682f2ff9

View File

@ -430,7 +430,43 @@
\end{description}
\subsection{Optimization methods}
\subsection{Learning paradigms}
\begin{description}
\item[Federated learning] \marginnote{Federated learning}
Problem where $N$ agents with their local and private data $\mathcal{D}^{i}$ want to learn a common set of parameters $\z^*$ based on the same loss function (evaluated on different data points):
\[
\min_\z \sum_{i=1}^{N} l(\z; \mathcal{D}^i)
\]
A centralized parameter server (master) is responsible for aggregating the estimates of the agents (e.g., pick some nodes and average them).
% \[
% \z^{t+1} = \z^k - \alpha \sum_{i \in I_k} \nabla l(\z; \mathcal{D}^i, p^i)
% \]
\item[Distributed learning] \marginnote{Distributed learning}
Federated learning where there is no centralized entity and agents communicate with their neighbors only.
\end{description}
\begin{figure}[H]
\centering
\begin{subfigure}{0.4\linewidth}
\centering
\includegraphics[width=0.55\linewidth]{./img/federated_learning.png}
\caption{Federated learning}
\end{subfigure}
\begin{subfigure}{0.4\linewidth}
\centering
\includegraphics[width=0.7\linewidth]{./img/distributed_learning.png}
\caption{Distributed learning}
\end{subfigure}
\end{figure}
\section{Federated learning}
\subsection{Batch gradient method}
\begin{description}
\item[Batch gradient method] \marginnote{Batch gradient method}
@ -442,7 +478,12 @@
\begin{remark}
Computation in this way can be expensive.
\end{remark}
\end{description}
\subsection{Incremental gradient method}
\begin{description}
\item[Incremental gradient method] \marginnote{Incremental gradient method}
At each iteration $k$, compute the direction by considering the loss of a single agent $i^k$:
\[
@ -469,6 +510,8 @@
\end{description}
\subsection{Stochastic gradient descent}
\begin{description}
\item[Stochastic gradient descent (SGD)] \marginnote{Stochastic gradient descent (SGD)}
Instance of incremental gradient method where the selection rule follows an unknown distribution.
@ -507,7 +550,12 @@
\item[Mini-batch SGD] \marginnote{Mini-batch SGD}
SGD where the update at each time step $k$ is based on a set $\mathcal{I}^k \subset \{ 1, \dots, N \}$ of realizations of $\mathcal{W}$:
\[ \z^{k+1} = \z^k - \alpha \sum_{i \in \mathcal{I}^k} \nabla l(\z^k, w^i) \]
\end{description}
\subsection{Adaptive momentum}
\begin{description}
\item[Adaptive momentum (ADAM)] \marginnote{Adaptive momentum (ADAM)}
Method based on the first and second momentum of the gradient:
\[
@ -533,39 +581,6 @@
\subsection{Learning paradigms}
\begin{description}
\item[Federated learning] \marginnote{Federated learning}
Problem where $N$ agents with their local and private data $\mathcal{D}^{i}$ want to learn a common set of parameters $\z^*$ based on the same loss function (evaluated on different data points):
\[
\min_\z \sum_{i=1}^{N} l(\z; \mathcal{D}^i)
\]
A centralized parameter server (master) is responsible for aggregating the estimates of the agents (e.g., pick some nodes and average them).
% \[
% \z^{t+1} = \z^k - \alpha \sum_{i \in I_k} \nabla l(\z; \mathcal{D}^i, p^i)
% \]
\item[Distributed learning] \marginnote{Distributed learning}
Federated learning where there is no centralized entity and agents communicate with their neighbors only.
\end{description}
\begin{figure}[H]
\centering
\begin{subfigure}{0.45\linewidth}
\centering
\includegraphics[width=0.55\linewidth]{./img/federated_learning.png}
\caption{Federated learning}
\end{subfigure}
\begin{subfigure}{0.45\linewidth}
\centering
\includegraphics[width=0.7\linewidth]{./img/distributed_learning.png}
\caption{Distributed learning}
\end{subfigure}
\end{figure}
\section{Distributed cost-coupled/consensus optimization}
\begin{description}
@ -642,9 +657,6 @@
Then, each agent converges to an optimal solution $\z^*$.
\end{theorem}
\subsection{Gradient tracking algorithm}
\begin{theorem}
The distributed gradient algorithm does not converge with a constant step size.
@ -672,6 +684,8 @@
\end{theorem}
\subsection{Gradient tracking algorithm}
\begin{description}
\item[Dynamic average consensus] \marginnote{Dynamic average consensus}
Consensus algorithm where each agent measures a signal $r_i^k$ and wants to estimate the average signal of all agents: