Added nnsplit
Change-Id: Iee46144b8b128773bc9e3f8cff0941ed7154236d
diff --git a/Dockerfile b/Dockerfile
index bcef4e5..ac3815e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -141,6 +141,24 @@
RUN echo "Waste\n" && cat ./example.txt | waste -N --rcfile=./Waste/waste.rc
+###################
+# Install nnsplit #
+###################
+
+COPY nnsplit_bench /euralex/nnsplit_bench/
+
+RUN apt-get install -y cargo
+
+RUN cd ./nnsplit_bench && \
+ cargo build --release
+
+RUN mkdir ./nnsplit && \
+ mv ./nnsplit_bench/target/release/nnsplit_bench ./nnsplit/nnsplit_bench && \
+ rm -r ./nnsplit_bench/target
+
+RUN echo "nnsplit\n" && ./nnsplit/nnsplit_bench example.txt
+
+
#################
# Install Datok #
#################
diff --git a/nnsplit_bench/Cargo.toml b/nnsplit_bench/Cargo.toml
new file mode 100644
index 0000000..96b2a05
--- /dev/null
+++ b/nnsplit_bench/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "nnsplit_bench"
+version = "0.1.0"
+edition = "2018"
+
+[dependencies]
+
+[dependencies.load_file]
+version = "1.0.1"
+
+[dependencies.nnsplit]
+version = "0.5.8"
+features = ["model-loader", "tract-backend"]
\ No newline at end of file
diff --git a/nnsplit_bench/src/main.rs b/nnsplit_bench/src/main.rs
new file mode 100644
index 0000000..f057006
--- /dev/null
+++ b/nnsplit_bench/src/main.rs
@@ -0,0 +1,22 @@
+use std::env;
+use std::fs;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+ let splitter =
+ nnsplit::NNSplit::load("de", nnsplit::NNSplitOptions::default())?;
+
+ let args: Vec<String> = env::args().collect();
+ let filename = &args[1];
+
+ let contents = fs::read_to_string(filename)
+ .expect("Something went wrong reading the file");
+
+ let input: Vec<&str> = vec![&contents];
+ let splits = &splitter.split(&input)[0];
+
+ for sentence in splits.iter() {
+ println!("{}", sentence.text());
+ }
+
+ Ok(())
+}
\ No newline at end of file