From b40908d7251933df5dc32553822d83066f41b756 Mon Sep 17 00:00:00 2001 From: NotXia <35894453+NotXia@users.noreply.github.com> Date: Fri, 18 Oct 2024 20:25:22 +0200 Subject: [PATCH] Add NLP vector semantics --- src/year2/natural-language-processing/nlp.tex | 1 + .../sections/_classification.tex | 2 +- .../sections/_semantics.tex | 258 ++++++++++++++++++ 3 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 src/year2/natural-language-processing/sections/_semantics.tex diff --git a/src/year2/natural-language-processing/nlp.tex b/src/year2/natural-language-processing/nlp.tex index 674312e..982b0ca 100644 --- a/src/year2/natural-language-processing/nlp.tex +++ b/src/year2/natural-language-processing/nlp.tex @@ -11,5 +11,6 @@ \input{./sections/_basic_text.tex} \input{./sections/_language_models.tex} \input{./sections/_classification.tex} + \input{./sections/_semantics.tex} \end{document} \ No newline at end of file diff --git a/src/year2/natural-language-processing/sections/_classification.tex b/src/year2/natural-language-processing/sections/_classification.tex index d3410af..df96ee3 100644 --- a/src/year2/natural-language-processing/sections/_classification.tex +++ b/src/year2/natural-language-processing/sections/_classification.tex @@ -401,7 +401,7 @@ Logistic regression has the following properties: -\section{Affective meaning} +\section{Affective meaning} \label{sec:affective_meaning} The affective meaning of a text corpus can vary depending on: \begin{descriptionlist} diff --git a/src/year2/natural-language-processing/sections/_semantics.tex b/src/year2/natural-language-processing/sections/_semantics.tex new file mode 100644 index 0000000..694f74d --- /dev/null +++ b/src/year2/natural-language-processing/sections/_semantics.tex @@ -0,0 +1,258 @@ +\chapter{Semantics} + + +\section{Traditional semantic representation} + +\begin{description} + \item[Lemma/citation form] \marginnote{Lemma} + Syntactic form of a word. + + \begin{example} + The word \texttt{pipe}. + \end{example} + + \item[Word sense] \marginnote{Word sense} + Meaning component of a word. + + \begin{description} + \item[Polysemous lemma] Lemma with multiple senses. + \begin{example} + Possible senses of the word \texttt{pipe} are: the music instrument, the conduit to transport material, \dots. + \end{example} + \end{description} + + \item[Supersense] \marginnote{Supersense} + Semantic category for senses. + + \item[Word sense disambiguation (WSD)] \marginnote{Word sense disambiguation (WSD)} + Task of determining the correct sense of a word. +\end{description} + + +\subsection{Sense relations} + +\begin{description} + \item[Synonym] \marginnote{Synonym} + Relation of (near) identity between two senses of two different words (i.e., same propositional meaning). + + \begin{remark}[Principle of contrast] + A different linguistic form is probably due to some, maybe subtle, difference in meaning. + \end{remark} + + \item[Antonym] \marginnote{Antonym} + Relation of opposition, with respect to one feature of meaning, between two senses. More specifically, antonyms can be: + \begin{itemize} + \item An opposition between two ends of a scale (e.g., \texttt{long}/\texttt{short}). + \item A reversive (e.g., \texttt{up}/\texttt{down}). + \end{itemize} + + \item[Subordination] \marginnote{Subordination} + Specificity (i.e., is-a) relation between two senses. + + \begin{example} + \texttt{car} is a subordinate of \texttt{vehicle}. + \end{example} + + \item[Superordination] \marginnote{Superordination} + Generalization relation between two senses. + + \begin{example} + \texttt{furniture} is a superordinate of \texttt{lamp}. + \end{example} + + \item[Meronym] \marginnote{Meronym} + Part-of relation between two senses. +\end{description} + +\begin{remark} + Relations among word senses can be seen as a graph. +\end{remark} + + +\subsection{Common ontologies} + +\begin{description} + \item[WordNet] \marginnote{WordNet} + Database of semantic relations of English words. + + \item[BabelNet] \marginnote{BabelNet} + Multilingual database of semantic relations. +\end{description} + + +\subsection{Word relations} + +\begin{description} + \item[Word similarity] \marginnote{Word similarity} + Measure the meaning similarity of words (i.e., relation between words and not senses). + + \begin{remark} + Working with words is easier than senses. + \end{remark} + + \begin{example} + Cat and dog are not synonyms but have similar meaning (i.e., pets). + \end{example} + + \item[Word relatedness] \marginnote{Word relatedness} + Measure the context relation of words. + + \begin{example} + \texttt{car}/\texttt{bike} are similar while \texttt{car}/\texttt{fuel} are related but not similar. + \end{example} + + \begin{description} + \item[Semantic field] \marginnote{Semantic field} + Words that cover a particular domain and have structured relations with each other. + + \begin{example} + In the context of a hospital, \texttt{surgeon}, \texttt{scalpel}, \texttt{nurse}, \texttt{anesthetic}, and \texttt{hospital} belong to the same semantic field. + \end{example} + + \begin{description} + \item[Topic model] \marginnote{Topic model} + Unsupervised method to cluster the topics in a document based on how a word is used in its context. + \end{description} + + \item[Semantic frames] \marginnote{Semantic frames} + Words that describe the perspective or participants of a particular event. + + \begin{example} + In a commercial transaction, a \texttt{buyer} trades \texttt{money} with a \texttt{seller} in return of some \texttt{good or service}. + \end{example} + + \begin{description} + \item[Semantic role labeling (SRL)] \marginnote{Semantic role labeling (SRL)} + Task of determining the frames and their semantic role. + \end{description} + \end{description} +\end{description} + + +\section{Vector semantics} + +\begin{description} + \item[Connotation] \marginnote{Connotation} + Affective meaning of a word. + + \begin{remark} + As described in \Cref{sec:affective_meaning}, emotions can be represented in a vector space. Therefore, word meanings can also be represented as vectors. + \end{remark} + + % \item[Vector semantics] \marginnote{Vector semantics} + % Define a word by its environment or distribution in language use. + + \item[Vector semantics intuitions] + Vector semantics lay on two intuitions: + \begin{descriptionlist} + \item[Distributionalism intuition] \marginnote{Distributionalism intuition} + The meaning of a word is defined by its environment or distribution (i.e., neighboring words). Words with a similar distribution are likely to have the same meaning. + + \item[Vector intuition] \marginnote{Vector intuition} + Define the meaning of a word as a point in an $N$-dimensional space. + \end{descriptionlist} + + \item[Embedding] \marginnote{Embedding} + Vector representation of a word where words with a similar meaning are nearby in the vector space. + + Two common embedding models are: + \begin{descriptionlist} + \item[TF-IDF] \marginnote{TF-IDF} + Sparse embedding based on the counts of nearby words. + + \item[Word2vec] \marginnote{Word2vec} + Dense embedding learned by training a classifier to distinguish nearby and far-away words. + \end{descriptionlist} +\end{description} + + +\subsection{Co-occurrence (sparse) embeddings} + +\begin{description} + \item[Co-occurrence matrix] \marginnote{Co-occurrence matrix} + Matrix representing the frequency that words occur with the others. + + Different design choices can be considered: + \begin{itemize} + \item Matrix design. + \item Reweighing. + \item Dimensionality reduction. + \item Vector comparison metric. + \end{itemize} + + \item[Matrix design] + Shape and content of the co-occurrence matrix. + + \begin{description} + \item[Term-document matrix] \marginnote{Term-document matrix} + Given a vocabulary $V$ and a set of documents $D$, a term-document matrix has shape $|V| \times |D|$ and counts the occurrences of each word in each document. + + \begin{remark} + This representation allows to encode both documents (i.e., by considering the matrix column-wise) and words (i.e., by considering the matrix row-wise). + \end{remark} + + \begin{example} + An excerpt of a possible term-document matrix for Shakespeare is: + \begin{table}[H] + \centering + \footnotesize + \begin{tabular}{ccccc} + \toprule + & \textit{As You Like It} & \textit{Twelfth Night} & \textit{Julius Caesar} & \textit{Henry V} \\ + \midrule + \texttt{battle} & 1 & 0 & 7 & 13 \\ + \texttt{good} & 114 & 80 & 62 & 89 \\ + \texttt{fool} & 36 & 58 & 1 & 4 \\ + \texttt{wit} & 20 & 15 & 2 & 3 \\ + \bottomrule + \end{tabular} + \end{table} + The representation for the document \textit{As You Like It} is $[1, 114, 36, 20]$, while the representation of the word \texttt{battle} is $[1, 0, 7, 13]$. + \end{example} + + \item[Word-word matrix] \marginnote{Word-word matrix} + Given a vocabulary $V$, a word-word matrix has shape $|V| \times |V|$. Rows represent target words and columns are context words. + Given a training corpus, the word at each row is represented by counting its co-occurrences with the others within a context of $N$ words. + + \begin{remark} + A larger context window captures more semantic information. A smaller window captures more syntactic information. + \end{remark} + + \begin{example} + A possible word-word matrix is: + \begin{table}[H] + \centering + \footnotesize + \begin{tabular}{ccccccccc} + \toprule + & \texttt{aardvark} & \dots & \texttt{computer} & \texttt{data} & \texttt{result} & \texttt{pie} & \texttt{sugar} & \dots \\ + \midrule + \texttt{cherry} & 0 & \dots & 2 & 8 & 9 & 442 & 25 & \dots \\ + \texttt{strawberry} & 0 & \dots & 0 & 0 & 1 & 60 & 19 & \dots \\ + \texttt{digital} & 0 & \dots & 1670 & 1683 & 85 & 5 & 4 & \dots \\ + \texttt{information} & 0 & \dots & 3325 & 3982 & 378 & 5 & 13 & \dots \\ + \bottomrule + \end{tabular} + \end{table} + \end{example} + \end{description} + + \item[Reweighing] + Rescale the value of the components of the vectors (e.g., make a probability, length normalization, TF-IDF, \dots). + + \begin{remark}[Frequency paradox] + Raw frequencies are not an ideal representation for words as they are skewed and not discriminative. Moreover, overly frequent words (e.g., stop words) do not provide context information. + \end{remark} + + \item[Dimensionality reduction] + Reduce the dimensionality of the embeddings. + + \item[Vector comparison] + Metric to determine the distance of two embeddings. + + \begin{description} + \item[Dot product] $\vec{w} \cdot \vec{v} = \sum_{i=1}^{n} w_i v_i$. + \item[Length] Compare the length $|\vec{v}| = \sqrt{\sum_{i=1}^{n} v_i^2}$ of the vectors. + \item[Cosine similarity] $\frac{\vec{w} \cdot \vec{v}}{|\vec{w}| \, |\vec{v}|}$. + \end{description} +\end{description} \ No newline at end of file