Blame - man/collocationScores.Rd - IDS-Mannheim/rderekovecs

blob: 4221d250432aac790c58687098a633daf63455d1 [file] [log] [blame]

Marc Kupietz	bb4f54c	2023-10-19 21:22:44 +0200	[diff] [blame]	1	% Generated by roxygen2: do not edit by hand
				2	% Please edit documentation in R/derekovecs.R
				3	\name{collocationScores}
				4	\alias{collocationScores}
Marc Kupietz	c6a66ee	2023-10-23 13:18:48 +0200	[diff] [blame^]	5	\title{Get collocation scores}
Marc Kupietz	bb4f54c	2023-10-19 21:22:44 +0200	[diff] [blame]	6	\usage{
				7	collocationScores(w, c, ...)
				8	}
				9	\arguments{
				10	\item{w}{The target word/node.}
				11
				12	\item{c}{The collocate.}
				13
				14	\item{...}{Additional parameters to pass to the API.}
				15	}
				16	\value{
				17	A one row data frame with collocate and its association scores.
Marc Kupietz	c6a66ee	2023-10-23 13:18:48 +0200	[diff] [blame^]	18	\describe{
				19	\item{word}{collocate}
				20	\item{f2}{abs. frequency of collocate}
				21	\item{f}{abs. frequency of collocation}
				22	\item{npmi}{normalized pmi (Bouma 2009)}
				23	\item{pmi}{pointwise mutual information}
				24	\item{dice}{dice score}
				25	\item{ld}{log-dice score (Rychlý 2008) for whole window}
				26	\item{lfmd}{log-frequency biased mutual dependency ≙ pmi³ (Dalle 1994; Thanopoulos et al. 2002)}
				27	\item{llr}{log-likelihood (Dunning 1993; Evert 2004)}
				28	\item{ln_count}{frequency of collocate as left neighbour of node}
				29	\item{ln_pmi}{pmi as left neighbour}
				30	\item{md}{mutual dependency ≙ pmi² (Dalle 1994; Thanopoulos et al. 2002)}
				31	\item{rn_count}{frequency of collocate as right neighbour of node}
				32	\item{rn_pmi}{pmi as right neighbour}
				33	\item{ldaf}{log-dice score for auto focus window}
				34	\item{win}{binary encoded positions at which the collocate appears at least once, e.g.: 1023 = 2^10-1 ≙ 11111 node 11111}
				35	\item{afwin}{binary encoded auto-focus window (see Perkuhn et al. 2012: E8-15), e.g. 64 = 2^6 ≙ 00010 node 00000 (Aus gutem Grund)}
				36	}
Marc Kupietz	bb4f54c	2023-10-19 21:22:44 +0200	[diff] [blame]	37	}
				38	\description{
				39	Calculate the association scores between a node (target word) and words in a window around the it.
				40	}
Marc Kupietz	c6a66ee	2023-10-23 13:18:48 +0200	[diff] [blame^]	41	\references{
				42	Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.
				43
				44	Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.
				45
				46	Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
				47	Free PDF available from \url{https://purl.org/stefan.evert/PUB/Evert2004phd.pdf}
				48
				49	Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.
				50	}