| % Generated by roxygen2: do not edit by hand |
| % Please edit documentation in R/association-scores.R |
| \name{association-score-functions} |
| \alias{association-score-functions} |
| \alias{defaultAssociationScoreFunctions} |
| \alias{pmi} |
| \alias{mi2} |
| \alias{mi3} |
| \alias{logDice} |
| \alias{ll} |
| \title{Association score functions} |
| \usage{ |
| defaultAssociationScoreFunctions() |
| |
| pmi(O1, O2, O, N, E, window_size) |
| |
| mi2(O1, O2, O, N, E, window_size) |
| |
| mi3(O1, O2, O, N, E, window_size) |
| |
| logDice(O1, O2, O, N, E, window_size) |
| |
| ll(O1, O2, O, N, E, window_size) |
| } |
| \arguments{ |
| \item{O1}{observed absolute frequency of node} |
| |
| \item{O2}{observed absolute frequency of collocate} |
| |
| \item{O}{observed absolute frequency of collocation} |
| |
| \item{N}{corpus size} |
| |
| \item{E}{expected absolute frequency of collocation (already adjusted to window size)} |
| |
| \item{window_size}{total window size around node (left neighbour count + right neighbour count)} |
| } |
| \value{ |
| \if{html}{\out{<div class="sourceCode">}}\preformatted{ association score |
| }\if{html}{\out{</div>}} |
| } |
| \description{ |
| Functions to calculate different collocation association scores between |
| a node (target word) and words in a window around the it. |
| The functions are primarily used by \code{\link[=collocationScoreQuery]{collocationScoreQuery()}}. |
| |
| \strong{pmi}: pointwise mutual information |
| |
| \strong{mi2}: pointwise mutual information squared (Daille 1994), also referred to as mutual dependency |
| (Thanopoulos et al. 2002) |
| |
| \strong{mi3}: pointwise mutual information cubed (Daille 1994), also referred to as log-frequency biased mutual dependency) |
| (Thanopoulos et al. 2002) |
| |
| \strong{logDice}: log-Dice coefficient, a heuristic measure that is popular in lexicography (Rychlý 2008) |
| |
| \strong{ll}: log-likelihood (Dunning 1993) using Stefan Evert's (2004) simplified implementation |
| } |
| \examples{ |
| \dontrun{ |
| |
| new("KorAPConnection", verbose = TRUE) \%>\% |
| collocationScoreQuery("Perlen", c("verziertes", "Säue"), |
| scoreFunctions = append(defaultAssociationScoreFunctions(), |
| list(localMI = function(O1, O2, O, N, E, window_size) { |
| O * log2(O/E) |
| }))) |
| } |
| |
| } |
| \references{ |
| Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7. |
| |
| Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625. |
| |
| Rychlý, Pavel (2008): A lexicographer-friendly association score. In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9. \url{https://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf}. |
| |
| Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74. |
| |
| Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714. |
| Free PDF available from \url{https://purl.org/stefan.evert/PUB/Evert2004phd.pdf} |
| } |
| \seealso{ |
| Other collocation analysis functions: |
| \code{\link{collocationAnalysis,KorAPConnection-method}}, |
| \code{\link{collocationScoreQuery,KorAPConnection-method}}, |
| \code{\link{synsemanticStopwords}()} |
| } |
| \concept{association-score-functions} |
| \concept{collocation analysis functions} |