mirror of
https://github.com/NotXia/unibo-ai-notes.git
synced 2025-12-14 18:51:52 +01:00
Rearranged sections
This commit is contained in:
@ -430,7 +430,43 @@
|
||||
\end{description}
|
||||
|
||||
|
||||
\subsection{Optimization methods}
|
||||
\subsection{Learning paradigms}
|
||||
|
||||
\begin{description}
|
||||
\item[Federated learning] \marginnote{Federated learning}
|
||||
Problem where $N$ agents with their local and private data $\mathcal{D}^{i}$ want to learn a common set of parameters $\z^*$ based on the same loss function (evaluated on different data points):
|
||||
\[
|
||||
\min_\z \sum_{i=1}^{N} l(\z; \mathcal{D}^i)
|
||||
\]
|
||||
A centralized parameter server (master) is responsible for aggregating the estimates of the agents (e.g., pick some nodes and average them).
|
||||
% \[
|
||||
% \z^{t+1} = \z^k - \alpha \sum_{i \in I_k} \nabla l(\z; \mathcal{D}^i, p^i)
|
||||
% \]
|
||||
|
||||
\item[Distributed learning] \marginnote{Distributed learning}
|
||||
Federated learning where there is no centralized entity and agents communicate with their neighbors only.
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\begin{subfigure}{0.4\linewidth}
|
||||
\centering
|
||||
\includegraphics[width=0.55\linewidth]{./img/federated_learning.png}
|
||||
\caption{Federated learning}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.4\linewidth}
|
||||
\centering
|
||||
\includegraphics[width=0.7\linewidth]{./img/distributed_learning.png}
|
||||
\caption{Distributed learning}
|
||||
\end{subfigure}
|
||||
\end{figure}
|
||||
|
||||
|
||||
|
||||
\section{Federated learning}
|
||||
|
||||
|
||||
\subsection{Batch gradient method}
|
||||
|
||||
\begin{description}
|
||||
\item[Batch gradient method] \marginnote{Batch gradient method}
|
||||
@ -442,7 +478,12 @@
|
||||
\begin{remark}
|
||||
Computation in this way can be expensive.
|
||||
\end{remark}
|
||||
\end{description}
|
||||
|
||||
|
||||
\subsection{Incremental gradient method}
|
||||
|
||||
\begin{description}
|
||||
\item[Incremental gradient method] \marginnote{Incremental gradient method}
|
||||
At each iteration $k$, compute the direction by considering the loss of a single agent $i^k$:
|
||||
\[
|
||||
@ -469,6 +510,8 @@
|
||||
\end{description}
|
||||
|
||||
|
||||
\subsection{Stochastic gradient descent}
|
||||
|
||||
\begin{description}
|
||||
\item[Stochastic gradient descent (SGD)] \marginnote{Stochastic gradient descent (SGD)}
|
||||
Instance of incremental gradient method where the selection rule follows an unknown distribution.
|
||||
@ -507,7 +550,12 @@
|
||||
\item[Mini-batch SGD] \marginnote{Mini-batch SGD}
|
||||
SGD where the update at each time step $k$ is based on a set $\mathcal{I}^k \subset \{ 1, \dots, N \}$ of realizations of $\mathcal{W}$:
|
||||
\[ \z^{k+1} = \z^k - \alpha \sum_{i \in \mathcal{I}^k} \nabla l(\z^k, w^i) \]
|
||||
\end{description}
|
||||
|
||||
|
||||
\subsection{Adaptive momentum}
|
||||
|
||||
\begin{description}
|
||||
\item[Adaptive momentum (ADAM)] \marginnote{Adaptive momentum (ADAM)}
|
||||
Method based on the first and second momentum of the gradient:
|
||||
\[
|
||||
@ -533,39 +581,6 @@
|
||||
|
||||
|
||||
|
||||
\subsection{Learning paradigms}
|
||||
|
||||
\begin{description}
|
||||
\item[Federated learning] \marginnote{Federated learning}
|
||||
Problem where $N$ agents with their local and private data $\mathcal{D}^{i}$ want to learn a common set of parameters $\z^*$ based on the same loss function (evaluated on different data points):
|
||||
\[
|
||||
\min_\z \sum_{i=1}^{N} l(\z; \mathcal{D}^i)
|
||||
\]
|
||||
A centralized parameter server (master) is responsible for aggregating the estimates of the agents (e.g., pick some nodes and average them).
|
||||
% \[
|
||||
% \z^{t+1} = \z^k - \alpha \sum_{i \in I_k} \nabla l(\z; \mathcal{D}^i, p^i)
|
||||
% \]
|
||||
|
||||
\item[Distributed learning] \marginnote{Distributed learning}
|
||||
Federated learning where there is no centralized entity and agents communicate with their neighbors only.
|
||||
\end{description}
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\begin{subfigure}{0.45\linewidth}
|
||||
\centering
|
||||
\includegraphics[width=0.55\linewidth]{./img/federated_learning.png}
|
||||
\caption{Federated learning}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.45\linewidth}
|
||||
\centering
|
||||
\includegraphics[width=0.7\linewidth]{./img/distributed_learning.png}
|
||||
\caption{Distributed learning}
|
||||
\end{subfigure}
|
||||
\end{figure}
|
||||
|
||||
|
||||
|
||||
\section{Distributed cost-coupled/consensus optimization}
|
||||
|
||||
\begin{description}
|
||||
@ -642,9 +657,6 @@
|
||||
Then, each agent converges to an optimal solution $\z^*$.
|
||||
\end{theorem}
|
||||
|
||||
|
||||
\subsection{Gradient tracking algorithm}
|
||||
|
||||
\begin{theorem}
|
||||
The distributed gradient algorithm does not converge with a constant step size.
|
||||
|
||||
@ -672,6 +684,8 @@
|
||||
\end{theorem}
|
||||
|
||||
|
||||
\subsection{Gradient tracking algorithm}
|
||||
|
||||
\begin{description}
|
||||
\item[Dynamic average consensus] \marginnote{Dynamic average consensus}
|
||||
Consensus algorithm where each agent measures a signal $r_i^k$ and wants to estimate the average signal of all agents:
|
||||
|
||||
Reference in New Issue
Block a user