Introduce english tokenizer
Change-Id: I5b60d9a4de8db3c5730957335fc674adb4fccf0f
diff --git a/Makefile b/Makefile
index 75b86b1..e8eddc5 100644
--- a/Makefile
+++ b/Makefile
@@ -4,15 +4,24 @@
update:
go get -u ./... && go mod tidy
-buildfoma:
+buildfoma_de:
cd src && \
foma -e "source de/tokenizer.xfst" \
-e "save stack ../testdata/tokenizer.fst" -q -s && \
cd ..
+buildfoma_en:
+ cd src && \
+ foma -e "source en/tokenizer.xfst" \
+ -e "save stack ../testdata/tokenizer_en.fst" -q -s && \
+ cd ..
+
buildmatok: buildfoma build
./bin/datok convert -i ./testdata/tokenizer.fst -o ./testdata/tokenizer.matok
+buildmatok_en: buildfoma_en build
+ ./bin/datok convert -i ./testdata/tokenizer_en.fst -o ./testdata/tokenizer_en.matok
+
builddatok: buildfoma build
./bin/datok convert -i ./testdata/tokenizer.fst -o ./testdata/tokenizer.datok -d