Rearranged sections

2026-02-04 07:41:43 +01:00 · 2025-04-06 15:42:39 +02:00
parent f7e815cc8c
commit fe682f2ff9
1 changed files with 51 additions and 37 deletions
--- a/src/year2/distributed-autonomous-systems/sections/_optimization.tex
+++ b/src/year2/distributed-autonomous-systems/sections/_optimization.tex
@ -430,7 +430,43 @@
 \end{description}


-\subsection{Optimization methods}
+\subsection{Learning paradigms}
+
+\begin{description}
+    \item[Federated learning] \marginnote{Federated learning}
+        Problem where $N$ agents with their local and private data $\mathcal{D}^{i}$ want to learn a common set of parameters $\z^*$ based on the same loss function (evaluated on different data points):
+        \[
+            \min_\z \sum_{i=1}^{N} l(\z; \mathcal{D}^i)
+        \]
+        A centralized parameter server (master) is responsible for aggregating the estimates of the agents (e.g., pick some nodes and average them).
+        % \[
+        %     \z^{t+1} = \z^k - \alpha \sum_{i \in I_k} \nabla l(\z; \mathcal{D}^i, p^i)
+        % \]
+
+    \item[Distributed learning] \marginnote{Distributed learning}
+        Federated learning where there is no centralized entity and agents communicate with their neighbors only.
+\end{description}
+
+\begin{figure}[H]
+    \centering
+    \begin{subfigure}{0.4\linewidth}
+        \centering
+        \includegraphics[width=0.55\linewidth]{./img/federated_learning.png}
+        \caption{Federated learning}
+    \end{subfigure}
+    \begin{subfigure}{0.4\linewidth}
+        \centering
+        \includegraphics[width=0.7\linewidth]{./img/distributed_learning.png}
+        \caption{Distributed learning}
+    \end{subfigure}
+\end{figure}
+
+
+
+\section{Federated learning}
+
+
+\subsection{Batch gradient method}

 \begin{description}
    \item[Batch gradient method] \marginnote{Batch gradient method}
@ -442,7 +478,12 @@
        \begin{remark}
            Computation in this way can be expensive.
        \end{remark}
+\end{description}

+
+\subsection{Incremental gradient method}
+
+\begin{description}
    \item[Incremental gradient method] \marginnote{Incremental gradient method}
        At each iteration $k$, compute the direction by considering the loss of a single agent $i^k$:
        \[
@ -469,6 +510,8 @@
 \end{description}


+\subsection{Stochastic gradient descent}
+
 \begin{description}
    \item[Stochastic gradient descent (SGD)] \marginnote{Stochastic gradient descent (SGD)}
        Instance of incremental gradient method where the selection rule follows an unknown distribution.
@ -507,7 +550,12 @@
    \item[Mini-batch SGD] \marginnote{Mini-batch SGD}
        SGD where the update at each time step $k$ is based on a set $\mathcal{I}^k \subset \{ 1, \dots, N \}$ of realizations of $\mathcal{W}$:
        \[ \z^{k+1} = \z^k - \alpha \sum_{i \in \mathcal{I}^k} \nabla l(\z^k, w^i) \]
+\end{description}

+
+\subsection{Adaptive momentum}
+
+\begin{description}
    \item[Adaptive momentum (ADAM)] \marginnote{Adaptive momentum (ADAM)}
        Method based on the first and second momentum of the gradient:
        \[
@ -533,39 +581,6 @@



-\subsection{Learning paradigms}
-
-\begin{description}
-    \item[Federated learning] \marginnote{Federated learning}
-        Problem where $N$ agents with their local and private data $\mathcal{D}^{i}$ want to learn a common set of parameters $\z^*$ based on the same loss function (evaluated on different data points):
-        \[
-            \min_\z \sum_{i=1}^{N} l(\z; \mathcal{D}^i)
-        \]
-        A centralized parameter server (master) is responsible for aggregating the estimates of the agents (e.g., pick some nodes and average them).
-        % \[
-        %     \z^{t+1} = \z^k - \alpha \sum_{i \in I_k} \nabla l(\z; \mathcal{D}^i, p^i)
-        % \]
-
-    \item[Distributed learning] \marginnote{Distributed learning}
-        Federated learning where there is no centralized entity and agents communicate with their neighbors only.
-\end{description}
-
-\begin{figure}[H]
-    \centering
-    \begin{subfigure}{0.45\linewidth}
-        \centering
-        \includegraphics[width=0.55\linewidth]{./img/federated_learning.png}
-        \caption{Federated learning}
-    \end{subfigure}
-    \begin{subfigure}{0.45\linewidth}
-        \centering
-        \includegraphics[width=0.7\linewidth]{./img/distributed_learning.png}
-        \caption{Distributed learning}
-    \end{subfigure}
-\end{figure}
-
-
-
 \section{Distributed cost-coupled/consensus optimization}

 \begin{description}
@ -642,9 +657,6 @@
    Then, each agent converges to an optimal solution $\z^*$.
 \end{theorem}

-
-\subsection{Gradient tracking algorithm}
-
 \begin{theorem}
    The distributed gradient algorithm does not converge with a constant step size.

@ -672,6 +684,8 @@
 \end{theorem}


+\subsection{Gradient tracking algorithm}
+
 \begin{description}
    \item[Dynamic average consensus] \marginnote{Dynamic average consensus}
        Consensus algorithm where each agent measures a signal $r_i^k$ and wants to estimate the average signal of all agents: