added features.tsv Change-Id: Ie3631a7022c7d6e2bdc53ec954ad7baa07a7cb2a

commit: 8b7732d40ec5c12189659e9202a8c479a7a6fefb [log] [tgz]
author: PeterFankhauserIDS <fankhauser@ids-mannheim.de> Mon May 17 15:17:21 2021 +0200
committer: PeterFankhauserIDS <fankhauser@ids-mannheim.de> Mon May 17 15:17:21 2021 +0200
tree: 92c34ebe9d37489982443df9274ce36644259763
parent: 54c18eed9f14388035c3e948d634731bc925a12a [diff] [blame]
diff --git a/data/features.tsv b/data/features.tsv
new file mode 100644
index 0000000..94877c7
--- /dev/null
+++ b/data/features.tsv

@@ -0,0 +1,39 @@
+innames	oldnames	newnames	explanation	getrid
+rankDiff	SY_RD	SY_R_D	rank difference: SY_W_R1-SY_C1_R	0
+af	SY_C_LDAF	SY_C1_LDAF	logdice with autofocus	0
+dice	SY_C_DICE	SY_C1_DICE	dice	1
+ld	SY_C_LD	SY_C1_LD	logdice	0
+lfmd	SY_C_MI3	SY_C1_MI3	MI^3	0
+llfmd	SY_C_LEFTN_MI	SY_C1_MI_L	MI with left neighbour	1
+llr	SY_C_LL	SY_C1_LL	loglikelihood	0
+lnpmi	SY_C_LEFTN_COUNT	SY_C1_C_L	raw count of left neighbour	1
+md	SY_C_MI2	SY_C1_MI2	MI^2	0
+npmi	SY_C_NPMI	SY_C1_NMI	normalized MI	1
+pmi	SY_C_MI	SY_C1_MI	(pointwise) mutual information, MI	0
+rlfmd	SY_C_RIGHTN_MI	SY_C1_MI_R	MI with right neighbour	1
+rnpmi	SY_C_RIGHTN_COUNT	SY_C1_C_R	raw count of right neighbour	1
+average	SY_W_AVG	SY_W_AVG	average of output activations with autofocus	0
+cprob	SY_W_CON	SY_W_CON	conorm of column normalized output activations with autofocus	0
+max	SY_W_MAX	SY_W_MAX	max of output activations	0
+overall	SY_W_NSUM1	SY_W_NSUM_AF	sum of output activations normalized by total sum over all selected columns with autofocus	0 
+prob	SY_W_NSUM2	SY_W_NSUM	sum of output activations normalized by total sum over all columns	0
+w2v.rank	SY_W_RK1	SY_W_R1	rank by SY_W_CON	0
+w2v.rank1	SY_W_RK2	SY_W_R2	rank by SY_W_NSUM2	0
+classic.rank	SY_C_RK	SY_C1_R	rank by SY_C_LD	0	
+NSTOPW	NSTOPW	O_NSTOPW	number of non stop words in idiom candidate	0
+CO_NUMBER	CO_NUMBER	O_C2_N	number of occurrences in pop lyrics	1
+CO_SGT	CO_SGT	O_C2_SGT	Simple Good-Turing estimate of probability in pop lyrics	0
+CO_EXPECTED	CO_EXPECTED	SY_C2_EXP	expected number of occurrences	1
+CO_PMI	CO_PMI	SY_C2_MI	(pointwise) mutual information, MI	0
+CO_MI3	CO_MI3	SY_C2_MI3	MI^3	0
+CO_LOCALMI	CO_LOCALMI	SY_C2_LMI	local mutual information	0
+CO_Z	CO_Z	SY_C2_Z	zscore	0
+CO_T	CO_T	SY_C2_T	p-value of ttest	1
+CO_LL	CO_LL	SY_C2_LL	loglikelihood	0
+CO_LOGDICE	CO_LOGDICE	SY_C2_LD	logdice	0
+CO_K	CO_K	SY_C2_K	k-value (Kita et al.1994)	1
+CO_G	CO_G	SY_C2_G	lexical gravity (Gries/Mukherjee 2010)	1
+CO_WIN5_VEC	CO_WIN5_VEC	CO_VEC	avg. cosine similarity between words in candidate and words in +/-5 context	0
+CO_WIN5_VEC_AUTOSEM	CO_WIN5_VEC_AUTOSEM	CO_VEC_LEX	like CO_WIN5_VEC but only on lexical words	0
+CO_DEREKO	CO_DEREKO	O_DEREKO	number of words available in DeReKo 0
+CO_GRAM	CO_GRAM	O_GRAM	number of words in idiom candidate	0
\ No newline at end of file
commit	8b7732d40ec5c12189659e9202a8c479a7a6fefb	[log] [tgz]
author	PeterFankhauserIDS <fankhauser@ids-mannheim.de>	Mon May 17 15:17:21 2021 +0200
committer	PeterFankhauserIDS <fankhauser@ids-mannheim.de>	Mon May 17 15:17:21 2021 +0200
tree	92c34ebe9d37489982443df9274ce36644259763
parent	54c18eed9f14388035c3e948d634731bc925a12a [diff] [blame]