Introducing Plusampersand-Compounds
Change-Id: I838185da6defb88f79cc12ebcd28a673062fbfad
diff --git a/matrix_test.go b/matrix_test.go
index 14b2fbc..b0d2698 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -769,6 +769,17 @@
assert.Equal(tokens[11], ".")
assert.Equal(12, len(tokens))
+ // Plusampersand compounds
+ tokens = ttokenize(mat, w, "Die 2G+-Regel soll weitere Covid-19-Erkrankungen reduzieren.")
+ assert.Equal(tokens[0], "Die")
+ assert.Equal(tokens[1], "2G+-Regel")
+ assert.Equal(tokens[2], "soll")
+ assert.Equal(tokens[3], "weitere")
+ assert.Equal(tokens[4], "Covid-19-Erkrankungen")
+ assert.Equal(tokens[5], "reduzieren")
+ assert.Equal(tokens[6], ".")
+ assert.Equal(7, len(tokens))
+
/*
@Test
public void englishTokenizerSeparatesEnglishContractionsAndClitics () {
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 6b2331e..b59ee8a 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -114,7 +114,7 @@
! Abbreviations and Initials
define Abbr [ @txt"txt/abbrv.txt" | Letter ] %.;
-define Plusampersand @txt"txt/plusampersand.txt";
+define Plusampersand @txt"txt/plusampersand.txt" (Dash Word);
! A solution to the "(author): problem" may be to add ) at the end of any
! string as a possible ending
diff --git a/src/txt/plusampersand.txt b/src/txt/plusampersand.txt
index 7921ca1..6cbeb97 100644
--- a/src/txt/plusampersand.txt
+++ b/src/txt/plusampersand.txt
@@ -1,10 +1,7 @@
-&
-'
->
+2G+
+3G+
&K
-<
&M
-"
&RQ
+Ale
+ALe