Files
unibo-ai-notes/src/year1/combinatorial-decision-making-and-optimization/module2/sections/_lp.tex

1083 lines
53 KiB
TeX

\chapter{Linear programming}
\begin{description}
\item[Linear programming (LP)] \marginnote{Linear programming (LP)}
Optimization problem defined through a system of linear constraints (equalities and inequalities)
and an objective function.
In the Cartesian plane, equalities represent hyperplanes and inequalities are half-spaces.
\begin{remark}
LP is useful for optimal allocation with limited number of resources.
\end{remark}
\item[Feasible solution] \marginnote{Feasible solution}
Assignment satisfying all the constraints.
In the Cartesian plane, it is represented by any point within
the intersection of all half-spaces defined by the inequalities.
\item[Feasible region] \marginnote{Feasible region}
Set of all feasible solutions.
It is a convex polyhedron that can be empty, bounded or unbounded.
\begin{remark}
The optimal solution is always at one of the intersection points of the constraints within the feasible region (i.e. vertexes of the polyhedron).
The number of vertexes is finite but might grow exponentially.
\end{remark}
\item[Canonical form] \marginnote{Canonical form}
A linear programming problem is in canonical form if it is defined as:
\[
\begin{split}
\max \sum_{j=1}^{n} c_j x_j \text{ subject to } &\sum_{j=1}^{n} a_{i,j} x_j \leq b_i \text{ for } 1 \leq i \leq m \,\,\land \\
& x_j \geq 0 \text{ for } 1 \leq j \leq n
\end{split}
\]
where:
\begin{itemize}
\item $m$ is the number of constraints.
\item $n$ is the number of non-negative variables.
\item $a_{i,j}, b_i, c_j \in \mathbb{R}$ are known parameters (given by the definition of the problem).
\item $\sum_{j=1}^{n} c_j x_j$ is the objective function to maximize.
\item $\sum_{j=1}^{n} a_{i,j} x_j \leq b_i$ are $m$ linear inequalities.
\end{itemize}
In matrix form:
\[ \max \{ \vec{cx} \} \text{ subject to } \matr{A}\vec{x} \leq \vec{b} \land \vec{x} \geq \nullvec \]
where:
\begin{itemize}
\item $\vec{c} = \begin{bmatrix} c_1 & \hdots & c_n \end{bmatrix}$.
\item $\vec{x} = \begin{bmatrix} x_1 & \hdots & x_n \end{bmatrix}^T$.
\item $\vec{b} = \begin{bmatrix} b_1 & \hdots & b_m \end{bmatrix}^T$.
\item $\matr{A} = \begin{bmatrix} a_{1,1} & \hdots & a_{1,n} \\ \vdots & \ddots & \vdots \\ a_{m,1} & \hdots & a_{m,n} \end{bmatrix}$
\end{itemize}
\item[Standard form] \marginnote{Standard form}
A linear programming problem is in standard form if it only has equality constraints (excluded those on single variables):
\[
\begin{split}
\max \sum_{j=1}^{n} c_j x_j \text{ subject to } &\sum_{j=1}^{n} a_{i,j} x_j = b_i \text{ for } 1 \leq i \leq m \,\,\land \\
& x_j \geq 0 \text{ for } 1 \leq j \leq n
\end{split}
\]
In matrix form: $\max \{ \vec{cx} \} \text{ subject to } \matr{A}\vec{x} = \vec{b} \land \vec{x} \geq \nullvec$.
\begin{remark}[Canonical to standard form]
Any LP problem with $m$ constraints in canonical form has an equivalent standard form with $m$ slack variables $y_1, \dots, y_m \geq 0$
such that:
\[
\forall i \in \{ 1, \dots, m \}:
\left( \sum_{j=1}^{n} a_{i,j} x_j \leq b_i \right) \Rightarrow
\left( \sum_{j=1}^{n} a_{i,j} x_j + y_i= b_i \land y_i \geq 0 \right)
\]
\end{remark}
\end{description}
\begin{example}[Brewery problem]
The definition of the problem in canonical form is the following:
\begin{center}
\begin{tabular}{lccccc}
$\max$ & $13A$ & $+$ & $23B$ \\
subj. to & $5A$ & $+$ & $15B$ & $\leq$ & 480 \\
& $4A$ & $+$ & $4B$ & $\leq$ & 160 \\
& $35A$ & $+$ & $20B$ & $\leq$ & 1190 \\
& $A$ & , & $B$ & $\geq$ & 0 \\
\end{tabular}
\end{center}
\begin{figure}[H]
\centering
\includegraphics[width=0.3\linewidth]{./img/lp_brewery_problem.png}
\caption{Feasible regions and vertexes of the polyhedron}
\end{figure}
In standard form, it becomes:
\begin{center}
\begin{tabular}{lccccccccccc}
$\max$ & $13A$ & $+$ & $23B$ \\
subj. to & $5A$ & $+$ & $15B$ & $+$ & $S_C$ & & & & & $=$ & 480 \\
& $4A$ & $+$ & $4B$ & & & $+$ & $S_H$ & & & $=$ & 160 \\
& $35A$ & $+$ & $20B$ & & & & & $+$ & $S_M$ & $=$ & 1190 \\
& $A$ & , & $B$ & , & $S_C$ & , & $S_H$ & , & $S_M$ & $\geq$ & 0 \\
\end{tabular}
\end{center}
\end{example}
\section{Simplex algorithm}
Algorithm that starts from an extreme point of the polyhedron and iteratively moves to a neighboring vertex as long as the objective function does not decrease.
\subsection{Basis}
\begin{description}
\item[Basis] \marginnote{Basis}
Given an LP problem $\mathcal{P}$ in standard form with $m$ constraints and $n$ variables (note: in standard form, it holds that $m \leq n$) and
its constraint matrix $\matr{A} \in \mathbb{R}^{m \times n}$,
a (ordered) basis $\calB = \{ x_{i_1}, \dots, x_{i_m} \}$ is a subset of $m$ of the $n$ variables such that
the columns $\matr{A}_{i_1}, \dots, \matr{A}_{i_m}$ of $\matr{A}$ form a $m \times m$ invertible matrix $\matr{A}_\calB$.
Variables in $\calB$ are basic variables while
$\calN = \{ x_1, \dots, x_n \} \smallsetminus \calB$ are non-basic variables.
$\mathcal{P}$ can be rewritten by separating basic and non-basic variables:
\[
\max \{ \vec{c}_\calB \vec{x}_\calB + \vec{c}_\calN\vec{x}_\calN \} \text{ subject to }
\matr{A}_\calB \vec{x}_\calB + \matr{A}_\calN \vec{x}_\calN = \vec{b} \,\land\, \vec{x}_\calB, \vec{x}_\calN \geq \nullvec
\]
\item[Basic solution] \marginnote{Basic solution}
By constraining $\vec{x}_\calN = \nullvec$, an LP problem becomes:
\[
\max \{ \vec{c}_\calB \vec{x}_\calB \} \text{ subject to }
\matr{A}_\calB \vec{x}_\calB = \vec{b} \,\land\, \vec{x}_\calB \geq \nullvec
\]
As $\matr{A}_\calB$ is invertible by definition, it holds that:
\[
\vec{x}_\calB = \matr{A}_\calB^{-1} \vec{b} \hspace{2em}\text{ and }\hspace{2em}
\max \{ \vec{c}_\calB \vec{x}_\calB \} = \max \{ \vec{c}_\calB \matr{A}_\calB^{-1} \vec{b} \}
\]
$\vec{x}_\calB$ is a basic solution for $\calB$.
\item[Basic feasible solution (BFS)] \marginnote{Basic feasible solution (BFS)}
Given a basic solution $\vec{x}_\calB$ for $\calB$,
it is feasible iff:
\[ \forall_{i=1}^m \vec{x}_{\calB_i} \geq 0 \]
\begin{description}
\item[Non-degenerate BFS] \marginnote{Non-degenerate BFS}
A basic feasible solution is non-degenerate iff $\forall_{i=1}^m \vec{x}_{\calB_i} > 0$.
\begin{remark}
A non-degenerate BFS is represented by a unique basis.
\end{remark}
\end{description}
\begin{remark}
The simplex algorithm iteratively moves through basic feasible solutions $\tilde{\vec{x}}_1, \tilde{\vec{x}}_2, \dots$
such that $\vec{c}\tilde{\vec{x}}_k \geq \vec{c}\tilde{\vec{x}}_{k-1}$.
\end{remark}
\end{description}
\subsection{Tableau}
\marginnote{Tableau}
Tabular representation to describe the steps of the simplex algorithm.
A variable $Z$ is introduced to represent the value of the objective function (which can be seen as a conversion of the objective function into a constraint).
The tableau of an LP problem in standard form is divided into three sections:
\begin{enumerate}
\item Objective function, where the coefficients of the variables are called reduced costs\marginnote{Reduced costs}.
\item Equality constraints.
\item Variable constraints.
\end{enumerate}
\begin{example}[Brewery problem]
In standard form, the brewery problem is defined as:
\begin{center}
\begin{tabular}{lccccccccccc}
$\max$ & $13A$ & $+$ & $23B$ \\
subj. to & $5A$ & $+$ & $15B$ & $+$ & $S_C$ & & & & & $=$ & 480 \\
& $4A$ & $+$ & $4B$ & & & $+$ & $S_H$ & & & $=$ & 160 \\
& $35A$ & $+$ & $20B$ & & & & & $+$ & $S_M$ & $=$ & 1190 \\
& $A$ & , & $B$ & , & $S_C$ & , & $S_H$ & , & $S_M$ & $\geq$ & 0 \\
\end{tabular}
\end{center}
As a tableau, assuming an initial basis $\calB = \{ S_C, S_H, S_M \}$, the problem is represented as:
\begin{center}
\begin{tabular}{cccccccccccccc}
\toprule
$13A$ & $+$ & $23B$ & & & & & & & $-$ & $Z$ & $=$ & 0 \\
\midrule
$5A$ & $+$ & $15B$ & $+$ & $S_C$ & & & & & & & $=$ & 480 \\
$4A$ & $+$ & $4B$ & & & $+$ & $S_H$ & & & & & $=$ & 160 \\
$35A$ & $+$ & $20B$ & & & & & $+$ & $S_M$ & & & $=$ & 1190 \\
\midrule
$A$ & , & $B$ & , & $S_C$ & , & $S_H$ & , & $S_M$ & & & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
The reduced costs are $\{ 13, 23, 0, 0, 0 \}$.
\end{example}
\subsection{Pivoting}
Given a basis $\calB$, it is possible to insert a new variable $x^\text{in} \notin \calB$ into it and remove an old one $x^\text{out} \in \calB$
to increase (or leave unchanged) the objective function:
\begin{descriptionlist}
\item[Entering variable] \marginnote{Entering variable}
$x^\text{in}$ should be the variable in $\calN$ with the highest improvement on the objective function.
\item[Leaving variable] \marginnote{Leaving variable}
$x^\text{out}$ should be chosen to ensure that the new basis $\calB' = \calB \cup \{ x^\text{in} \} \smallsetminus \{ x^\text{out} \}$
is still a feasible basis.
\begin{description}
\item[Minimum ratio rule] \marginnote{Minimum ratio rule}
For each constraint $i$ (i.e. $i$-th row of the system $\matr{A}\vec{x} = \vec{b}$), it is possible to compute the ratio:
\[ \frac{\vec{b}_i}{\vec{\alpha}_i^\text{in}} \]
where:
\begin{itemize}
\item $\vec{\alpha}_i^\text{in}$ is the coefficient associated to the entering variable $x^\text{in}$ in the $i$-th constraint.
\item $\vec{b}_i$ is the known term of the $i$-th constraint.
\end{itemize}
The index $i$ of the leaving variable $x^\text{out} = \calB_i$ is determined as:
\[ \arg\min_i \frac{\vec{b}_i}{\vec{\alpha}_i^\text{in}} \]
\end{description}
\end{descriptionlist}
Once $x^\text{in}$ and $x^\text{out} = \calB_i$ has been determined,
$x^\text{in}$ is isolated in the equation of the $i$-th constraint and it is substituted in all the others.
\begin{example}[Brewery problem]
The initial tableau of the brewery problem with $\calB = \{ S_C, S_H, S_M \}$ and $\calN = \{ A, B \}$ is:
\begin{center}
\begin{tabular}{cccccccccccccc}
\toprule
$13A$ & $+$ & $23B$ & & & & & & & $-$ & $Z$ & $=$ & 0 \\
\midrule
$5A$ & $+$ & $15B$ & $+$ & $S_C$ & & & & & & & $=$ & 480 \\
$4A$ & $+$ & $4B$ & & & $+$ & $S_H$ & & & & & $=$ & 160 \\
$35A$ & $+$ & $20B$ & & & & & $+$ & $S_M$ & & & $=$ & 1190 \\
\midrule
$A$ & , & $B$ & , & $S_C$ & , & $S_H$ & , & $S_M$ & & & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
\begin{enumerate}
\item
It can be easily seen that $x^\text{in} = B$ should be the entering variable.
For the leaving variable, the ratios are:
\[ \arg\min \left\{ \frac{480}{15}, \frac{160}{4}, \frac{1190}{20} \right\} = \arg\min \left\{ 32, 40, 59.5 \right\} = 1 \]
Therefore, the leaving variable is $x^\text{out} = \calB_1 = S_C$.
We now isolate $B$ from the first constraint:
\[
\begin{split}
5A + 15B + S_C = 480 &\iff \frac{1}{3}A + B + \frac{1}{15}S_C = 32 \\
&\iff B = 32 - \frac{1}{3}A - \frac{1}{15}S_C \\
\end{split}
\]
The tableau with $\calB' = \{ B, S_H, S_M \}$ and $\calN' = \{ A, S_C \}$ is updated as:
\begin{center}
\begin{tabular}{cccccccccccccc}
\toprule
$\frac{16}{3}A$ & & & $-$ & $\frac{23}{15}S_C$ & & & & & $-$ & $Z$ & $=$ & $-736$ \\
\midrule
$\frac{1}{3}A$ & $+$ & $B$ & $+$ & $\frac{1}{15}S_C$ & & & & & & & $=$ & $32$ \\
$\frac{8}{3}A$ & & & $-$ & $\frac{4}{15}S_C$ & $+$ & $S_H$ & & & & & $=$ & $32$ \\
$\frac{85}{3}A$ & & & $-$ & $\frac{4}{3}S_C$ & & & $+$ & $S_M$ & & & $=$ & $550$ \\
\midrule
$A$ & , & $B$ & , & $S_C$ & , & $S_H$ & , & $S_M$ & & & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
\item
Now, $x^\text{in} = A$ is the variable that increases the objective function the most.
For the leaving variable, the ratios are:
\[ \arg\min \left\{ \frac{32}{1/3}, \frac{32}{8/3}, \frac{550}{85/3} \right\} = 2 \]
Therefore, the leaving variable is $x^\text{out} = \calB'_2 = S_H$.
$A$ isolated in the second constraint brings to:
\[
\begin{split}
A = \frac{3}{8} (32 + \frac{4}{15}S_C - S_H) \,\iff\, A = 12 + \frac{1}{10}S_C - \frac{3}{8}S_H
\end{split}
\]
The tableau with $\calB'' = \{ A, B, S_M \}$ and $\calN'' = \{ S_C, S_H \}$ is updated as:
\begin{center}
\begin{tabular}{cccccccccccccc}
\toprule
& & & $-$ & $S_C$ & $-$ & $2S_H$ & & & $-$ & $Z$ & $=$ & $-800$ \\
\midrule
& & $B$ & $+$ & $\frac{1}{10}S_C$ & $+$ & $\frac{1}{8}S_H$ & & & & & $=$ & $28$ \\
$A$ & & & $-$ & $\frac{1}{10}S_C$ & $+$ & $\frac{3}{8}S_H$ & & & & & $=$ & $12$ \\
& & & $-$ & $\frac{25}{6}S_C$ & $-$ & $\frac{85}{8}S_H$ & $+$ & $S_M$ & & & $=$ & $210$ \\
\midrule
$A$ & , & $B$ & , & $S_C$ & , & $S_H$ & , & $S_M$ & & & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
We cannot continue anymore as the reduced costs $\{ 0, 0, -1, -2, 0 \}$ are $\leq 0$ (i.e. cannot improve the objective function anymore).
\end{enumerate}
\end{example}
\subsection{Optimality}
\marginnote{Optimality}
When any substitution worsens the objective function, the current assignment is optimal.
In the tableau, this happens when all the reduced costs are $\leq 0$.
\begin{remark}
For any optimal solution, there is at least a basis such that the reduced costs are $\leq 0$.
Therefore, this is a sufficient condition.
\end{remark}
\begin{remark}
The fact that reduced costs are $\leq 0$ is not a necessary condition.
\end{remark}
\begin{example}[Brewery problem]
The tableau at the last iteration, with $\calB'' = \{ A, B, S_M \}$ and $\calN'' = \{ S_C, S_H \}$, is the following:
\begin{center}
\begin{tabular}{cccccccccccccc}
\toprule
& & & $-$ & $S_C$ & $-$ & $2S_H$ & & & $-$ & $Z$ & $=$ & $-800$ \\
\midrule
& & $B$ & $+$ & $\frac{1}{10}S_C$ & $+$ & $\frac{1}{8}S_H$ & & & & & $=$ & $28$ \\
$A$ & & & $-$ & $\frac{1}{10}S_C$ & $+$ & $\frac{3}{8}S_H$ & & & & & $=$ & $12$ \\
& & & $-$ & $\frac{25}{6}S_C$ & $-$ & $\frac{85}{8}S_H$ & $+$ & $S_M$ & & & $=$ & $210$ \\
\midrule
$A$ & , & $B$ & , & $S_C$ & , & $S_H$ & , & $S_M$ & & & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
All reduced costs are $\leq 0$ and an optimal solution has been reached:
\begin{itemize}
\item $S_C = 0$ and $S_H = 0$ (variables in $\calN''$).
\item $A = 12$, $B = 28$ and $S_M = 210$ (variables in $\calB''$. Obtained by isolating them in the constraints).
\item $- S_C - 2S_H - Z = -800 \iff Z = 800 - S_C - 2S_H \iff Z = 800$ (objective function).
\end{itemize}
\end{example}
\begin{remark}
The points in the feasible region of a problem $\mathcal{P}$ are:
\[ \mathcal{F}_\mathcal{P} = \{ \vec{x} \in \mathbb{R}^n \mid \matr{A}\vec{x} \leq \vec{b} \land \vec{x} \geq \nullvec \} \]
\end{remark}
\begin{description}
\item[Optimal region] \marginnote{Optimal region}
For an LP problem $\mathcal{P}$, its set of solutions is defined as:
\[ \mathcal{O}_\mathcal{P} = \{ x^* \in \mathcal{F}_\mathcal{P} \mid \forall \vec{x} \in \mathcal{F}_\mathcal{P}: \vec{c}\vec{x}^* \geq \vec{c}\vec{x} \} \]
Trivially, it holds that $\mathcal{O}_\mathcal{P} \subseteq \mathcal{F}_\mathcal{P}$ and
$\mathcal{F}_\mathcal{P} = \varnothing \Rightarrow \mathcal{O}_\mathcal{P} = \varnothing$.
\begin{theorem}
If $\mathcal{O}_\mathcal{P}$ is finite, then $\vert \mathcal{O}_\mathcal{P} \vert = 1$
(therefore, if $\vert \mathcal{O}_\mathcal{P} \vert > 1$, then $\mathcal{O}_\mathcal{P}$ is infinite).
\end{theorem}
\end{description}
\begin{description}
\item[Unbounded problem] \marginnote{Unbounded problem}
An LP problem $\mathcal{P}$ is unbounded if it does not have an optimal solution (i.e. $\mathcal{F}_\mathcal{P}$ is an unbounded polyhedron).
With the tableau formulation, if a column has reduced cost $> 0$ and all the constraint coefficients are $\leq 0$, then the problem is unbounded.
\begin{example}
Given the following tableau:
\begin{center}
\begin{tabular}{cccccccccccc}
\toprule
& $x$ & $-$ & $y$ & & & & & $-$ & $Z$ & $=$ & $-1$ \\
\midrule
$-$ & $x$ & $-$ & $y$ & $+$ & $S_1$ & & & & & $=$ & $0$ \\
$-$ & $2x$ & $-$ & $y$ & & & $+$ & $S_2$ & & & $=$ & $1$ \\
\midrule
& $x$ & , & $y$ & , & $S_1$ & , & $S_2$ & & & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
The unboundedness of the problem can be detected from the first column.
\begin{figure}[H]
\centering
\includegraphics[width=0.3\linewidth]{./img/lp_unbounded.png}
\end{figure}
\end{example}
\end{description}
\subsection{Algorithm}
Given an LP problem $\mathcal{P}$ in standard form, the steps of the simplex algorithm are:
\begin{enumerate}
\item Set $k=0$, find a feasible basis $\calB_k$ and reformulate $\mathcal{P}$ according to it.
\item If the basis feasible solution is optimal, return.
\item If $\mathcal{P}$ is unbounded, return.
\item Select an entering variable $x^\text{in}$.
\item Select a leaving variable $x^\text{out}$.
\item Let $\calB_{k+1} = \calB_k \cup \{ x^\text{in} \} \smallsetminus \{ x^\text{out} \}$ and reformulate $\mathcal{P}$ according to the new basis.
\item Set $k = k + 1$ and go to Point 2.
\end{enumerate}
\begin{description}
\item[Properties] \phantom{}
\begin{itemize}
\item If all basis feasible solutions are non-degenerate, the simplex algorithm always terminates (as the solution is always strictly improving).
\item In the general case, the algorithm might stall by ending up in a loop.
\item The worst-case time complexity is $O(2^n)$. The average case is polynomial.
\end{itemize}
\end{description}
\begin{remark}
If the problem has lots of vertexes, the interior point method (polynomial complexity) or approximation algorithms should be preferred.
\end{remark}
\subsection{Two-phase method}
\marginnote{Two-phase method}
Method that solves an LP problem by first finding an initial feasible basis $\calB_0$ and determining if the LP problem is unsatisfiable.
Given an LP problem $\mathcal{P}$ ($\max\{ \vec{cx} \} \text{ subject to } \matr{A}\vec{x} = \vec{b}$ with $m$ constraints and $n$ variables),
the two-phase method works as follows:
\begin{descriptionlist}
\item[Phase 1]
Define a new artificial problem $\mathcal{P}'$ from $\mathcal{P}$ with new variables $s_1, \dots, s_m$ as follows:
\[
\begin{split}
\max\left\{ -\sum_{i=1}^{m} s_i \right\} \text{ subject to }
&\sum_{j=1}^{n} a_{i,j} x_j + s_i = b_i \text{ for } i \in \{ k \in \{ 1, \dots, m \} \mid b_k \geq 0 \} \,\land \\
&\sum_{j=1}^{n} a_{i,j} x_j - s_i = b_i \text{ for } i \in \{ k \in \{ 1, \dots, m \} \mid b_k < 0 \} \,\land \\
& s_i, x_j \geq 0
\end{split}
\]
\begin{remark}
It holds that $-\sum_{i=1}^{m} s_i \leq 0$ and
$\calB' = \{ s_1, \dots, s_m \}$ is always a feasible basis corresponding to the basis feasible solution $x_j = 0$, $s_i = \vert b_i \vert$
\end{remark}
The problem $\mathcal{P}'$ with basis $\calB'$ can be solved through the simplex method.
\begin{theorem}
Let $\mathcal{F}_\mathcal{P}$ be the feasible region of $\mathcal{P}$. It holds that:
\[ \mathcal{F}_\mathcal{P} \neq \varnothing \,\iff\, \sum_{i=1}^{m} s_i = 0 \]
In other words:
\begin{itemize}
\item If the optimal solution of $\mathcal{P}'$ is $< 0$, then $\mathcal{P}$ is unsatisfiable.
\item Otherwise, the basis $\calB_{\mathcal{P}'}$ corresponding to the optimal solution of $\mathcal{P}'$
can be used as the initial basis of $\mathcal{P}$, after removing the variables $s_i$.
\end{itemize}
\end{theorem}
\item[Phase 2]
Solve $\mathcal{P}$ through the simplex algorithm using as initial basis $\calB_{\mathcal{P}'}$.
\end{descriptionlist}
\subsection{Duality}
\begin{description}
\item[Dual problem]
Given the primal problem $\mathcal{P}$ defined as:
\[ P: \max \{\vec{cx}\} \text{ subject to } \matr{A}\vec{x} = \vec{b} \land \vec{x} \geq \nullvec \]
with $\vec{b} \in \mathbb{R}^m$, $\vec{x} \in \mathbb{R}^n$, $\matr{A} \in \mathbb{R}^{m \times n}$,
its dual problem $\mathcal{D}(P)$ is defined as:
\[ \mathcal{D}(P): \min\{\vec{by}\} \text{ subject to } \matr{A}^T\vec{y} \geq \vec{c} \land \vec{y} \geq \nullvec \]
where:
\begin{itemize}
\item $\vec{y} \in \mathbb{R}^m$ has a variable $\vec{y}_i$ for each constraint $\sum_{j=1}^{n} \matr{A}_{i,j} \vec{x}_j = \vec{b}_i$ of $P$,
\item $\sum_{i=1}^{m} \matr{A}_{j, i} \vec{y}_i \geq \vec{c}_j$ is a new constraint defined for each variable $\vec{x}_j$ of $P$.
\end{itemize}
\begin{remark}
The constraint $\vec{y} \geq \nullvec$ comes out naturally from the conversion from primal to dual.
Therefore, it is not strictly necessary to explicitly put it.
\end{remark}
\begin{example}
Given the primal problem:
\begin{center}
\begin{tabular}{lccccccccccc}
\toprule
$\max$ & $1x_1$ & $+$ & $1x_2$ & $+$ & \color{lightgray}$0x_3$ & $+$ & \color{lightgray}$0x_4$ & $+$ & \color{lightgray}$0x_5$ \\
\midrule
subj. to & $3x_1$ & $+$ & $2x_2$ & $+$ & $1x_3$ & $+$ & \color{lightgray}$0x_4$ & $+$ & \color{lightgray}$0x_5$ & $=$ & 5 \\
& $4x_1$ & $+$ & $5x_2$ & $+$ & \color{lightgray}$0x_3$ & $+$ & $1x_4$ & $+$ & \color{lightgray}$0x_5$ & $=$ & 4 \\
& \color{lightgray}$0x_1$ & $+$ & $1x_2$ & $+$ & \color{lightgray}$0x_3$ & $+$ & \color{lightgray}$0x_4$ & $+$ & $1x_5$ & $=$ & 2 \\
\midrule
& $x_1$ & , & $x_2$ & , & $x_3$ & , & $x_4$ & , & $x_5$ & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
Its dual is:
\begin{center}
\begin{tabular}{lccccccc}
\toprule
$\min$ & $5y_1$ & $+$ & $4y_2$ & $+$ & $2y_3$ \\
\midrule
subj. to & $3y_1$ & $+$ & $4y_2$ & $+$ & \color{lightgray}$0y_3$ & $\geq$ & 1 \\
& $2y_1$ & $+$ & $5y_2$ & $+$ & $1y_3$ & $\geq$ & 1 \\
& $1y_1$ & $+$ & \color{lightgray}$0y_2$ & $+$ & \color{lightgray}$0y_3$ & $\geq$ & 0 \\
& \color{lightgray}$0y_1$ & $+$ & $1y_2$ & $+$ & \color{lightgray}$0y_3$ & $\geq$ & 0 \\
& \color{lightgray}$0y_1$ & $+$ & \color{lightgray}$0y_2$ & $+$ & $1y_3$ & $\geq$ & 0 \\
\bottomrule
\end{tabular}
\end{center}
\end{example}
\end{description}
\begin{theorem}
For any primal problem $\mathcal{P}$, it holds that $\mathcal{D}(\mathcal{D}(\mathcal{P})) = \mathcal{P}$.
\end{theorem}
\begin{theorem}[Weak duality] \marginnote{Weak duality}
The cost of any feasible solution of the primal $\mathcal{P}$ is less or equal than the cost of any solution of the dual $\mathcal{D}(\mathcal{P})$:
\[ \forall \vec{x} \in \mathcal{F}_{\mathcal{P}}, \forall \vec{y} \in \mathcal{F}_{\mathcal{D}(\mathcal{P})}: \vec{cx} \leq \vec{by} \]
In other words, $\vec{by}$ is an upper bound for $\mathcal{P}$ and $\vec{cx}$ is a lower bound for $\mathcal{D}(P)$.
\begin{corollary}
If $\mathcal{\mathcal{P}}$ is unbounded, then $\mathcal{D}(\mathcal{P})$ is unfeasible:
\[ \mathcal{F}_{\mathcal{P}} \neq \varnothing \land \mathcal{O}_{\mathcal{P}} = \varnothing \,\,\Rightarrow\,\, \mathcal{F}_{\mathcal{D}(\mathcal{P})} = \varnothing \]
\end{corollary}
\begin{corollary}
If $\mathcal{D}(\mathcal{P})$ is unbounded, then $\mathcal{P}$ is unfeasible:
\[ \mathcal{F}_{\mathcal{D}(\mathcal{P})} \neq \varnothing \land \mathcal{O}_{\mathcal{D}(\mathcal{P})} = \varnothing \,\,\Rightarrow\,\, \mathcal{F}_{\mathcal{P}} = \varnothing \]
\end{corollary}
\end{theorem}
\begin{theorem}[Strong duality] \marginnote{Strong duality}
If the primal and the dual are feasible, then they have the same optimal values:
\[
\Big( \mathcal{F}_{\mathcal{P}} \neq \varnothing \land \mathcal{F}_{\mathcal{D}(\mathcal{P})} \neq \varnothing \Big) \Rightarrow
\Big( \forall \vec{x}^* \in \mathcal{O}_\mathcal{P}, \forall \vec{y}^* \in \mathcal{O}_{\mathcal{D}(\mathcal{P})}: \vec{cx}^* = \vec{by}^* \Big)
\]
\end{theorem}
\begin{description}
\item[Dual simplex] \marginnote{Dual simplex}
Move from optimal basis (which can be unfeasible) to feasible basis, while preserving optimality.
\begin{remark}
The traditional primal simplex moves from feasible to optimal basis, while preserving feasibility.
\end{remark}
\item[Properties]
\phantom{}
\begin{itemize}
\item Duality makes the time complexity of finding a feasible or optimal solution the same.
\item The dual allows to prove the unfeasibility of the primal.
\item Primal and dual provide a bounding of the objective function.
\end{itemize}
\end{description}
\subsection{Sensitive analysis}
\marginnote{Sensitive analysis}
Study how the optimal solution of a problem $\mathcal{P}$ is affected if $\mathcal{P}$ is perturbed.
Given a problem $\mathcal{P}$ with optimal solution $\vec{x}^*$, a perturbed problem $\bar{\mathcal{P}}$ can be obtained by altering:
\begin{descriptionlist}
\item[Known terms]
Change of form:
\[ \vec{b} \leadsto \bar{\vec{b}} = \vec{b} + \Delta \vec{b} \]
This can affect the feasibility and optimality of $\vec{x}^*$.
\begin{remark}
Changing the known terms of $\mathcal{P}$ changes the objective function of $\mathcal{D}(\mathcal{P})$.
\end{remark}
\item[Objective function coefficients]
Change of form:
\[ \vec{c} \leadsto \bar{\vec{c}} = \vec{c} + \Delta \vec{c} \]
This can affect the optimality of $\vec{x}^*$.
\item[Constraint coefficients]
Change of form:
\[ \matr{A} \leadsto \bar{\matr{A}} = \matr{A} + \Delta \matr{A} \]
\begin{itemize}
\item If the change involves a variable $\vec{x}^*_j = 0$, then feasibility is not changed but optimality can be affected.
\item If the change involves a variable $\vec{x}^*_j \neq 0$, the problem needs to be re-solved.
\end{itemize}
\end{descriptionlist}
\section{(Mixed) integer linear programming}
\begin{description}
\item[Integer linear programming (ILP)] \marginnote{Integer linear programming (ILP)}
Linear programming problem where variables are all integers:
\[
\begin{split}
\max \sum_{j=1}^{n} c_j x_j \text{ subject to } &\sum_{j=1}^{n} a_{i,j} x_j = b_i \,\,\text{ for } 1 \leq i \leq m \,\,\land \\
& x_j \geq 0 \land x_j \in \mathbb{Z} \,\,\text{ for } 1 \leq j \leq n
\end{split}
\]
\item[Mixed-integer linear programming (MILP)] \marginnote{Mixed-integer programming (MILP)}
Linear programming problem with $n$ variables where $k < n$ variables are integers.
\end{description}
\begin{theorem}
Finding a feasible solution of a mixed-integer linear programming problem is NP-complete.
\begin{proof}
\phantom{}
\begin{descriptionlist}
\item[MILP in NP)]
A certificate contains an assignment of the variables. It is sufficient to check if all constraints are satisfied.
Both steps are polynomial.
\item[MILP is NP-hard)]
Any SAT problem can be poly-reduced to MILP.
\end{descriptionlist}
\end{proof}
\end{theorem}
\begin{remark}
Due to its NP-completeness, MILP problems can be solved by:
\begin{descriptionlist}
\item[Exact algorithms] Guarantee optimal solution with exponential time complexity.
\item[Approximate algorithms] Guarantee polynomial time complexity but might provide sub-optimal solutions with an approximation factor $\rho$.
\item[Heuristic algorithms] Empirically find a good solution in a reasonable time (no theoretical guarantees).
\end{descriptionlist}
\end{remark}
\subsection{Linear relaxation}
\marginnote{Linear relaxation}
Given a MILP problem $\mathcal{P}$, its linear relaxation $\mathcal{L}(\mathcal{P})$ removes the constraints $x_j \in \mathbb{Z}$.
However, solving $\mathcal{L}(\mathcal{P})$ as an LP problem and rounding the solution does not guarantee feasibility or optimality.
\begin{theorem}
It holds that $\mathcal{F}_{\mathcal{L}(\mathcal{P})} = \varnothing \Rightarrow \mathcal{F}_\mathcal{P} = \varnothing$.
Therefore, the linear relaxation of a MILP problem can be used to verify unsatisfiability.
\end{theorem}
\begin{remark}
If $\mathcal{F}_{\mathcal{L}(\mathcal{P})}$ is unbounded, then $\mathcal{P}$ can either be bounded, unbounded or unsatisfiable.
\end{remark}
\subsection{Branch-and-bound}
\marginnote{Branch-and-bound}
Given an ILP problem $\mathcal{P}$, the branch-and-bound algorithm solves it with a divide et impera approach.
The algorithm does the following:
\begin{enumerate}
\item Set the current best optimal value $z^* = -\infty$ and put $\mathcal{P}$ as the root of a search tree.
\item Solve $\mathcal{P}_0 = \mathcal{L}(\mathcal{P})$ to obtain a solution $\{ x_1 = \beta_1, \dots, x_n = \beta_n \}$.
\item If each $\beta_i$ is an integer, the solution is optimal and terminate.
\item Pick a variable $x_k$ such that its assignment $\beta_k \notin \mathbb{Z}$ and branch the problem:
\[
\begin{cases}
\mathcal{P}_1 = \mathcal{P}_0 \cup \{ x_k \leq \lfloor \beta_k \rfloor \} \\
\mathcal{P}_2 = \mathcal{P}_0 \cup \{ x_k \geq \lceil \beta_k \rceil \}
\end{cases}
\]
\item Add $\mathcal{P}_1$ and $\mathcal{P}_2$ as children of $\mathcal{P}$ in the search tree.
Solve the linear relaxations $\mathcal{L}(\mathcal{P}_1)$ and $ \mathcal{L}(\mathcal{P}_2)$:
\begin{itemize}
\item If $\mathcal{L}(\mathcal{P}_k)$ has an integral solution, it is optimal for the subproblem.
The best optimal value $z^*$ is updated if the current objective value $z_k$ is higher.
In the search tree, $\mathcal{P}_k$ becomes a leaf.
\item If $\mathcal{L}(\mathcal{P}_k)$ does not have an integral solution, continue branching as in Point 4.
If this is not possible, $\mathcal{P}_k$ becomes a leaf in the search tree.
\end{itemize}
\end{enumerate}
\begin{description}
\item[Fathomed node] Leaf of the search tree.
\item[Incumbent solution] Leaf whose solution is optimal.
\end{description}
\begin{example}[Bakery problem]
Consider the problem $\mathcal{P}$:
\begin{center}
\begin{tabular}{lccccc}
$\max$ & $400B$ & $+$ & $450C$ \\
subj. to & $250B$ & $+$ & $200C$ & $\leq$ & $4000$ \\
& $2B$ & & & $\leq$ & 6 \\
& $75B$ & $+$ & $150C$ & $\leq$ & $2000$ \\
& $100B$ & $+$ & $150C$ & $\leq$ & $500$ \\
& & & $75C$ & $\leq$ & $500$ \\
& $B$ & , & $C$ & $\in$ & $\{ 1, 2, \dots, 100\}$ \\
\end{tabular}
\end{center}
\begin{enumerate}
\item The solution of $\mathcal{P}_0 = \mathcal{L}(\mathcal{P})$ is $\{ B = 3, C = \frac{4}{3} \}$.
\begin{figure}[H]
\centering
\includegraphics[width=0.55\linewidth]{./img/bakery_1.png}
\end{figure}
\item We have to branch on the variable $C$:
\[
\begin{cases}
\mathcal{P}_1 = \mathcal{P}_0 \cup \{ C \leq \lfloor \frac{4}{3} \rfloor \} = \mathcal{P}_0 \cup \{ C \leq 1 \} \\
\mathcal{P}_2 = \mathcal{P}_0 \cup \{ C \geq \lceil \frac{4}{3} \rceil \} = \mathcal{P}_0 \cup \{ C \geq 2 \}
\end{cases}
\]
\begin{figure}[H]
\centering
\begin{subfigure}{0.2\linewidth}
\centering
\includegraphics[width=\linewidth]{./img/bakery_tree2.png}
\end{subfigure}
\begin{subfigure}{0.55\linewidth}
\centering
\includegraphics[width=\linewidth]{./img/bakery_2.png}
\end{subfigure}
\end{figure}
\item The solution of $\mathcal{L}(\mathcal{P}_1)$ is $\{ B=3, C=1 \}$ and has objective value $z_1 = 1650$.
As it is integral, the current best solution is updated to $z^* = 1650$ and no further branching is needed.
\item The solution of $\mathcal{L}(\mathcal{P}_2)$ is $\{ B=2, C=2 \}$ and has objective value $z_2 = 1700$.
As it is integral, the current best solution is updated to $z^* = 1700$ and no further branching is needed.
\begin{figure}[H]
\centering
\begin{subfigure}{0.35\linewidth}
\centering
\includegraphics[width=\linewidth]{./img/bakery_tree3.png}
\end{subfigure}
\begin{subfigure}{0.55\linewidth}
\centering
\includegraphics[width=\linewidth]{./img/bakery_3.png}
\end{subfigure}
\end{figure}
\item The leaf containing $\mathcal{P}_2$ is optimal.
\end{enumerate}
\end{example}
Possible techniques to improve branch-and-bound are:
\begin{descriptionlist}
\item[Presolve] \marginnote{Presolve}
Reformulate the problem $\mathcal{P}$ before solving it to reduce
the size of $\mathcal{F}_{\mathcal{L}(\mathcal{P})}$ (without altering $\mathcal{F}_{\mathcal{P}}$).
\begin{descriptionlist}
\item[Bounds tightening] \marginnote{Bounds tightening}
Infer stronger constraints.
\begin{example}
$\{ x_1 + x_2 \geq 20, x_1 \leq 10 \} \,\,\models\,\, x_2 \geq 10$.
\end{example}
\item[Problem reduction] \marginnote{Problem reduction}
Infer the assignment to variables.
\begin{example}
$\{ x_1 + x_2 \geq 0.8 \} \,\,\models\,\, x_1 = x_2 = 0$.
\end{example}
\end{descriptionlist}
\item[Cutting planes] \marginnote{Cutting planes}
Add constraints to reduce the space of non-integral solutions ($\mathcal{F}_{\mathcal{L}(\mathcal{P})} \smallsetminus \mathcal{F}_{\mathcal{P}}$).
Given a MILP problem $\mathcal{P}$, a cut is an inequality:
\[
\begin{aligned}
\vec{px} \leq \vec{q} \text{ such that }
&\forall \vec{y} \in \mathcal{F}_\mathcal{P}: \vec{py} \leq \vec{p} \,\,\land\,\, &\text{\small(feasible solutions inside the cut)}\\
&\forall \vec{z} \in \mathcal{O}_{\mathcal{L}(\mathcal{P})}: \vec{pz} > \vec{q} &\text{\small(non-integral solutions outside the cut)}
\end{aligned}
\]
\begin{figure}[H]
\centering
\includegraphics[width=0.39\linewidth]{./img/cutting_planes.png}
\end{figure}
\begin{theorem}
There always exists a (possibly non-unique) cut separating the optimal solution in
$\mathcal{F}_{\mathcal{L}(\mathcal{P})} \smallsetminus \mathcal{F}_{\mathcal{P}}$ from
$\mathcal{F}_{\mathcal{P}}$.
\end{theorem}
\begin{remark}
The cut can be done when branching or as a standalone operation (branch-and-cut).
\end{remark}
\begin{description}
\item[Gomory's cut] \marginnote{Gomory's cut}
Consider the optimal solution of $\mathcal{L}(\mathcal{P})$
with basis $\calB^* = \{ x_{i_1}, \dots, x_{i_m} \}$ and non-basic variables $\calN^* = \{ x_{i_{m+1}}, \dots, x_{i_n} \}$.
The cut aims to separate a non-integral vertex of the polytope and all the other feasible integer points.
% \begin{remark}
% As $x_{i_{m+1}}^* = 0, \dots, x_{i_n}^* = 0$, basic variables can be rewritten as:
% \[ \forall k \in \{ 1, \dots, m \}: x_{i_k} = \beta_k + \sum_{j=1}^{n-m} \alpha_{k,j} x_{i_{m+j}} \]
% \end{remark}
If there is a $k$ such that $x_{i_k} = \beta_k \notin \mathbb{Z}$,
then $\vec{x}^* \in \mathcal{F}_{\mathcal{L}(\mathcal{P})} \smallsetminus \mathcal{F}_{\mathcal{P}}$
and it can separated from the optimal solution in $\mathcal{F}_{\mathcal{P}}$.
$x_{i_k}$ can be written in basic form as:
\[
\begin{split}
x_{i_k} &= \beta_k + \sum_{j=1}^{n-m} \alpha_{k,j} x_{i_{m+j}} \\
% \iff
% \underbrace{ \beta_k + \sum_{j=1}^{n-m} \lfloor \alpha_{k,j} \rfloor x_{i_{m+j}} - \lfloor \beta_k \rfloor }_{\mathclap{\text{Integer part}}} &=
% \underbrace{ \beta_k - \lfloor \beta_k \rfloor - \sum_{j=1}^{n-m} (\alpha_{k,j} - \lfloor \alpha_{k,j} \rfloor) x_{i_{m+j}} }_{\mathclap{\text{Fractional part}}}
% = (\beta_k - \lfloor \beta_k \rfloor) + \sum_{j=1}^{n-m} \lfloor -\alpha_{k,j} \rfloor x_{i_{m+j}} \\
\end{split}
\]
The cut has form:
\[
\begin{gathered}
\sum_{j=1}^{n-m} (-\alpha_{k,j} - \lfloor -\alpha_{k,j} \rfloor) x_{i_{m+j}} \geq (\beta_k - \lfloor \beta_k \rfloor) \\
\iff -(\beta_k - \lfloor \beta_k \rfloor) + \sum_{j=1}^{n-m} (-\alpha_{k,j} - \lfloor -\alpha_{k,j} \rfloor) x_{i_{m+j}} \geq 0 \\
\end{gathered}
\]
% where:
% \begin{itemize}
% \item $f_k = \beta_k - \lfloor \beta_k \rfloor$ is the mantissa of $\beta_k$.
% \item $f_{k,j} = -\alpha_{k,j} - \lfloor -\alpha_{k,j} \rfloor$ is the mantissa of $-\alpha_{k,j}$.
% \end{itemize}
The problem is then extended with the cut as:
\[
\mathcal{L}(\mathcal{P}) \cup \left\{ y_k = -(\beta_k - \lfloor \beta_k \rfloor) + \sum_{j=1}^{n-m} (-\alpha_{k,j} - \lfloor -\alpha_{k,j} \rfloor) x_{i_{m+j}}
\land y_k \geq 0 \right\}
\]
where $y_k$ is a new slack variable.
The old optimal solution for the updated problem is unfeasible but the dual is feasible.
\item[Bender's decomposition] \marginnote{Bender's decomposition}
Consider a problem with $m$ inequalities and $n$ variables.
Variables can be partitioned into $\vec{x} \in \mathbb{R}^n$ and $\vec{y} \in \mathbb{R}^{n-p}$ for $p = 1, \dots, n$.
The problem can be rewritten as:
\begin{center}
\begin{tabular}{lccccc}
$\min_{\vec{x}, \vec{y}}$ & $\vec{c}^T\vec{x}$ & $+$ & $\vec{d}^T\vec{y}$ \\
subj. to & $\matr{A}\vec{x}$ & $+$ & $\matr{B}\vec{y}$ & $\geq$ & $\vec{b}$ \\
& $\vec{x} \geq \nullvec$ & $,$ & $\vec{y} \in \mathcal{Y}$
\end{tabular}
\end{center}
where $\mathcal{Y} \subseteq \mathbb{R}^{n-p}$ is the feasible set of $\vec{y}$.
For any $\bar{\vec{y}} \in \mathcal{Y}$, the residual problem is defined as:
\begin{center}
\begin{tabular}{lccccc}
$\min_{\vec{x}}$ & $\vec{c}^T\vec{x}$ & $+$ & $\vec{d}^T\bar{\vec{y}}$ \\
subj. to & $\matr{A}\vec{x}$ & $\geq$ & $\vec{b} - \matr{B}\bar{\vec{y}}$ & $,$ & $\vec{x} \geq \nullvec$ \\
\end{tabular}
\end{center}
The dual of the residual problem is:
\begin{center}
\begin{tabular}{lccccc}
$\max_{\vec{u}}$ & $(\vec{b} - \matr{B}\bar{\vec{y}})^T \vec{u}$ & $+$ & $\overbrace{\vec{d}^T\bar{\vec{y}}}^{\mathclap{\text{Constant}}}$ \\
subj. to & $\matr{A}^T\vec{u} \leq \vec{c}$ & $,$ & $\vec{u} \geq \nullvec$ \\
\end{tabular}
\end{center}
Therefore, the original problem (master problem) becomes a min-max problem:
\[
\min_{\vec{y} \in \mathcal{Y}} \left[
\vec{d}^T\vec{y} + \max_{\vec{u} \geq \nullvec} \left\{ (\vec{b} - \matr{B}\bar{\vec{y}})^T \vec{u} \mid \matr{A}^T\vec{u} \leq \vec{c} \right\}
\right]
\]
Fixed an initial $\vec{y}$, the method does the following:
\begin{enumerate}
\item Initialize an empty set of cuts $\mathcal{C}$.
\item Solve the linear relaxation of the $\max$ sub-problem (dual of residual):
\begin{itemize}
\item If it is unbounded, the residual problem is unfeasible. Add a cut in $\mathcal{C}$ to exclude $\vec{y}$.
\item If it is unfeasible, the residual problem is unbounded or unfeasible. Terminate.
\item If $\bar{\vec{u}}$ is optimal, it is optimal for the residual problem too.
Add the cut $\vec{c}^T\vec{x} + \cancel{\vec{d}^T\vec{y}} \geq (\vec{b} - \matr{B}\vec{y})^T \bar{\vec{u}} + \cancel{\vec{d}^T\vec{y}}$
(by weak duality) to $\mathcal{C}$.
\end{itemize}
\item Solve the updated master problem to get a new solution $\bar{\vec{y}}$.
\item If the new bounds gap is lower than a threshold, stop and solve the residual problem for $\bar{\vec{x}}$.
Otherwise, go to Point 2 with the new $\bar{\vec{y}}$.
\end{enumerate}
\end{description}
\item[Heuristics] \marginnote{Heuristics}
Empirical methods to guide the search.
\begin{itemize}
\item Local search
\item Meta-heuristics
\item MILP heuristics:
\begin{itemize}
\item Rounding.
\item Diving: rounding and re-solving by fixing some variables.
\item Sub-MIPing: solving by fixing some variables.
\end{itemize}
\end{itemize}
\item[Warm start] \marginnote{Warm start}
Search from a given initial total or partial assignment of the variables.
\end{descriptionlist}
\section{Non-linear programming}
\marginnote{Non-linear programming}
Problem of form:
\[
\begin{split}
\min f(\vec{x}) \,\,\text{ subj. to }\,\,& g_i(\vec{x}) \leq \nullvec \hspace{1em}\text{ for $j=1, \dots, m$} \\
& h_j(\vec{x}) = \nullvec \hspace{1em}\text{ for $j=1, \dots, p$}
\end{split}
\]
where $\vec{x} \in \mathbb{R}^n$ and $f$, $g_i$, $h_j$ are non-linear functions.
\begin{remark}
Non-linear problems are solved using optimization methods (e.g. gradient descent, Newton's method, \dots).
\end{remark}
\section{Linearization}
Methods to linearize constraints.
They usually work if the domains of the variables are bounded.
\begin{description}
\item[Reification] \marginnote{Integer reification}
Linearize logical combinations of linear constraints.
Given a constraint $C(x_1, \dots, x_k)$, a new boolean variable $b \in \{0, 1\}$ is introduced to reify it.
Depending on the type of reification, $b$ behaves as follows:
\begin{descriptionlist}
\item[Integer full-reification] \marginnote{Full-reification}
$(b = 1) \iff C(x_1, \dots, x_k)$
\item[Integer half-reification] \marginnote{Half-reification}
$(b = 1) \Rightarrow C(x_1, \dots, x_k) \land (b = 0) \Rightarrow \lnot C(x_1, \dots, x_k)$
\end{descriptionlist}
Given the reifications $b_i$ of some constraints $C_i$, the logical combination is modeled by adding new constraints on $b_i$.
\begin{example}
$\bigvee_i C_i$ is modeled by imposing $\sum_{i} b_i \geq 1$.
\end{example}
\begin{description}
\item[Big-M trick] \marginnote{Big-M trick}
Half-reification of bounded linear inequalities.
Given a conjunction of constraints $C_1 \vee \dots \vee C_m$, it is modeled as follows:
\begin{enumerate}
\item Introduce $m$ new boolean variables $b_1, \dots, b_m$ and impose $\sum_{i=1}^{m} b_i \geq 1$.
\item For each $C_i \equiv \sum_{j} \alpha_{i,j} x_j \leq \beta_i$, add a new constraint:
\[ \sum_{j} \alpha_{i,j} x_j - \beta_i \leq M_i \cdot (1-b_i) \]
where $M_i$ is a "big enough" constant.
In this way:
\begin{itemize}
\item $(b_i=0) \Rightarrow \sum_{j} \alpha_{i,j} x_j - \beta_i \leq M_i$ is always satisfied (as $M_i$ is big enough for any assignment of $x_j$).
\item $(b_i=1) \Rightarrow \sum_{j} \alpha_{i,j} x_j - \beta_i \leq 0$ is the original constraint.
\end{itemize}
\end{enumerate}
\begin{description}
\item[Big-M number]
Constant $M$ for the constraints.
Assuming that each variable is bounded ($x_j \in \{l_j, \dots, u_j\}$),
the constant big-M for the constraint $C_i$ can be defined as:
\[ M_i = -\beta_i + \sum_{j} \max \{ (\alpha_{i,j} l_j), (\alpha_{i,j} u_j) \} \]
\end{description}
\begin{example}
Given the variables $x \in \{0, \dots, 30\}$, $y \in \{ -5, \dots, -2 \}$, $z \in \{ -6, \dots, 7 \}$ and the constraint:
\[ (5x \leq 18) \vee (-y +2z \leq 3) \]
Its linearization is done by adding two boolean variables $b_1$, $b_2$ and the constraints:
\begin{itemize}
\item $b_1 + b_2 \geq 1$
\item $5x - 18 \leq (\max\{ 5 \cdot 0, 5 \cdot 30 \} - 18)(1 - b_1)$
\item $-y + 2z - 3 \leq (\max\{ -1 \cdot -5, -1 \cdot -2 \} + \max\{ 2 \cdot -6, 2 \cdot 7 \} - 3)(1 - b_2)$
\end{itemize}
\end{example}
\end{description}
\item[Min/max constraints] \marginnote{Min/max constraints}
Given the variables $x_1, \dots, x_k$ such that $x_i \in \{ l_i, \dots, u_i \}$,
$\min$/$\max$ constraints of form:
\[ y = [\cdot] \{ x_1, \dots, x_k \} \]
are modeled as follows:
\begin{descriptionlist}
\item[min]
Let $l_{\min} = \min\{ l_1, \dots, l_k \}$.
Add $k$ new boolean variables $b_1, \dots, b_k$ and impose:
\[ \sum_{i=1}^{k} \big( b_i = 1 \big) \land \big( y \leq x_i \big) \land \big( y \geq x_i - (u_i - l_{\min})(1 - b_i) \big) \]
In this way, $(b_i = 1) \Rightarrow (x_i = \min\{ x_1, \dots, x_k \})$.
\item[max]
Let $u_{\max} = \max\{ u_1, \dots, u_k \}$.
Add $k$ new boolean variables $b_1, \dots, b_k$ and impose:
\[ \sum_{i=1}^{k} \big( b_i = 1 \big) \land \big( y \geq x_i \big) \land \big( y \leq x_i + (u_{\max}-l_i)(1 - b_i) \big) \]
In this way, $(b_i = 1) \Rightarrow (x_i = \max\{ x_1, \dots, x_k \})$.
\end{descriptionlist}
\begin{remark}
This approach can also be applied to $y = \vert x \vert$, $y \neq x$, $y = kx$.
\end{remark}
\item[Unary encoding] \marginnote{Unary encoding}
Encoding of the domain of a variable.
Given a variable $x$ with domain $\mathcal{D}(x)$,
its unary encoding introduces $\vert \mathcal{D}(x) \vert$ new binary variables $b_k^x$ and imposes:
\[ \sum_{k \in \mathcal{D}(x)} b_k^x = 1 \land \sum_{k \in \mathcal{D}(x)} k \cdot b_k^x = x \]
In this way: $b_k^x = 1 \iff x = k$.
\begin{remark}
This encoding provides a tighter search space of the linear relaxation of the problem and better encodes global constraints.
On the other hand, it might introduce lots of new binary variables.
\end{remark}
\begin{example}[\texttt{all\_different}]
The encoding of $\texttt{all\_different}(x_1, \dots, x_n)$ is done as follows:
\begin{itemize}
\item Encode each variable through unary encoding.
\item For $j \in \bigcup_{1 \leq h < k \leq n} (\mathcal{D}(x_h) \cap \mathcal{D}(x_k))$ add the constraint:
\[ \sum_{i=1}^{n} \alpha_{i,j} b_j^{x_i} \leq 1 \]
where $\alpha_{i,j} = \begin{cases}
1 & \text{if $j \in \mathcal{D}(x_i)$} \\
0 & \text{otherwise}
\end{cases}$
\end{itemize}
For instance, consider the variables $x \in \{ 2, \dots, 11 \}$, $y \in \{ -5, \dots, 4 \}$, $z \in \{ 3, \dots, 5 \}$
constrained with $\texttt{all\_different}(x, y, z)$.
We encode them using unary encoding and constrain $b_j$ for
$j \in (\{ 2, \dots, 11 \} \cap \{ -5, \dots, 4 \}) \cup (\{ 2, \dots, 11 \} \cap \{ 3, \dots, 5 \}) \cup (\{ -5, \dots, 4 \} \cap \{ 3, \dots, 5 \}) = \{ 2, \dots, 5 \}$:
\begin{center}
\begin{tabular}{ccccccc}
$1 \cdot b_2^x$ & $+$ & $1 \cdot b_2^y$ & $+$ & \color{lightgray}$0 \cdot b_2^z$ & $\leq$ & $1$ \\
$1 \cdot b_3^x$ & $+$ & $1 \cdot b_3^y$ & $+$ & $1 \cdot b_3^z$ & $\leq$ & $1$ \\
$1 \cdot b_4^x$ & $+$ & $1 \cdot b_4^y$ & $+$ & $1 \cdot b_4^z$ & $\leq$ & $1$ \\
$1 \cdot b_5^x$ & $+$ & \color{lightgray}$0 \cdot b_5^y$ & $+$ & $1 \cdot b_5^z$ & $\leq$ & $1$ \\
\end{tabular}
\end{center}
\end{example}
\begin{example}[Array]
$z = [x_1, \dots, x_n][y]$ can be encoded as $z = \sum_{i=1}^{n} b_i^y x_i$.
\end{example}
\begin{example}[Bounded non-linearity]
$z = xy$ with $y \in \{ l_y, \dots, u_y \}$ can be encoded as $z = [xl_y, \dots, xu_y][y-l_y+1]$
\end{example}
\end{description}