man/association-score-functions.Rd - KorAP/RKorAPClient - Gitiles

 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/association-scores.R
 \name{association-score-functions}
 \alias{association-score-functions}
 \alias{defaultAssociationScoreFunctions}
 \alias{pmi}
 \alias{mi2}
 \alias{mi3}
 \alias{logDice}
 \alias{ll}
 \title{Association score functions}
 \usage{
 defaultAssociationScoreFunctions()

 pmi(O1, O2, O, N, E, window_size)

 mi2(O1, O2, O, N, E, window_size)

 mi3(O1, O2, O, N, E, window_size)

 logDice(O1, O2, O, N, E, window_size)

 ll(O1, O2, O, N, E, window_size)
 }
 \arguments{
 \item{O1}{observed absolute frequency of node}

 \item{O2}{observed absolute frequency of collocate}

 \item{O}{observed absolute frequency of collocation}

 \item{N}{corpus size}

 \item{E}{expected absolute frequency of collocation (already adjusted to window size)}

 \item{window_size}{total window size around node (left neighbour count + right neighbour count)}
 }
 \value{
 \if{html}{\out{<div class="sourceCode">}}\preformatted{         association score
 }\if{html}{\out{</div>}}
 }
 \description{
 Functions to calculate different collocation association scores between
 a node (target word) and words in a window around the it.
 The functions are primarily used by \code{\link[=collocationScoreQuery]{collocationScoreQuery()}}.

 \strong{pmi}: pointwise mutual information

 \strong{mi2}: pointwise mutual information squared (Daille 1994), also referred to as mutual dependency
 (Thanopoulos et al. 2002)

 \strong{mi3}: pointwise mutual information cubed (Daille 1994), also referred to as log-frequency biased mutual dependency)
 (Thanopoulos et al. 2002)

 \strong{logDice}: log-Dice coefficient, a heuristic measure that is popular in lexicography (Rychlý 2008)

 \strong{ll}: log-likelihood (Dunning 1993) using Stefan Evert's (2004) simplified implementation
 }
 \examples{
 \dontrun{

 new("KorAPConnection", verbose = TRUE) \%>\%
 collocationScoreQuery("Perlen", c("verziertes", "Säue"),
   scoreFunctions = append(defaultAssociationScoreFunctions(),
      list(localMI = function(O1, O2, O, N, E, window_size) {
                        O * log2(O/E)
                     })))
 }

 }
 \references{
 Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.

 Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.

 Rychlý, Pavel (2008):  A lexicographer-friendly association score. In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9. \url{https://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf}.

 Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.

 Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
 Free PDF available from \url{https://purl.org/stefan.evert/PUB/Evert2004phd.pdf}
 }
 \seealso{
 Other collocation analysis functions:
 \code{\link{collocationAnalysis,KorAPConnection-method}},
 \code{\link{collocationScoreQuery,KorAPConnection-method}},
 \code{\link{synsemanticStopwords}()}
 }
 \concept{association-score-functions}
 \concept{collocation analysis functions}
	% Generated by roxygen2: do not edit by hand
	% Please edit documentation in R/association-scores.R
	\name{association-score-functions}
	\alias{association-score-functions}
	\alias{defaultAssociationScoreFunctions}
	\alias{pmi}
	\alias{mi2}
	\alias{mi3}
	\alias{logDice}
	\alias{ll}
	\title{Association score functions}
	\usage{
	defaultAssociationScoreFunctions()

	pmi(O1, O2, O, N, E, window_size)

	mi2(O1, O2, O, N, E, window_size)

	mi3(O1, O2, O, N, E, window_size)

	logDice(O1, O2, O, N, E, window_size)

	ll(O1, O2, O, N, E, window_size)
	}
	\arguments{
	\item{O1}{observed absolute frequency of node}

	\item{O2}{observed absolute frequency of collocate}

	\item{O}{observed absolute frequency of collocation}

	\item{N}{corpus size}

	\item{E}{expected absolute frequency of collocation (already adjusted to window size)}

	\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
	}
	\value{
	\if{html}{\out{<div class="sourceCode">}}\preformatted{ association score
	}\if{html}{\out{</div>}}
	}
	\description{
	Functions to calculate different collocation association scores between
	a node (target word) and words in a window around the it.
	The functions are primarily used by \code{\link[=collocationScoreQuery]{collocationScoreQuery()}}.

	\strong{pmi}: pointwise mutual information

	\strong{mi2}: pointwise mutual information squared (Daille 1994), also referred to as mutual dependency
	(Thanopoulos et al. 2002)

	\strong{mi3}: pointwise mutual information cubed (Daille 1994), also referred to as log-frequency biased mutual dependency)
	(Thanopoulos et al. 2002)

	\strong{logDice}: log-Dice coefficient, a heuristic measure that is popular in lexicography (Rychlý 2008)

	\strong{ll}: log-likelihood (Dunning 1993) using Stefan Evert's (2004) simplified implementation
	}
	\examples{
	\dontrun{

	new("KorAPConnection", verbose = TRUE) \%>\%
	collocationScoreQuery("Perlen", c("verziertes", "Säue"),
	scoreFunctions = append(defaultAssociationScoreFunctions(),
	list(localMI = function(O1, O2, O, N, E, window_size) {
	O * log2(O/E)
	})))
	}

	}
	\references{
	Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.

	Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.

	Rychlý, Pavel (2008): A lexicographer-friendly association score. In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9. \url{https://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf}.

	Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.

	Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
	Free PDF available from \url{https://purl.org/stefan.evert/PUB/Evert2004phd.pdf}
	}
	\seealso{
	Other collocation analysis functions:
	\code{\link{collocationAnalysis,KorAPConnection-method}},
	\code{\link{collocationScoreQuery,KorAPConnection-method}},
	\code{\link{synsemanticStopwords}()}
	}
	\concept{association-score-functions}
	\concept{collocation analysis functions}