commit | 049e52606bcd1fb192789d49110607042a55e2f8 | [log] [tgz] |
---|---|---|
author | Akron <nils@diewald-online.de> | Fri Mar 18 09:59:34 2022 +0100 |
committer | Akron <nils@diewald-online.de> | Fri Mar 18 09:59:34 2022 +0100 |
tree | 797fee47ae18690d1734547d83c4bff3d2ead858 | |
parent | 54fd31434d861d66feec491b9b1eff0e661b1225 [diff] [blame] |
Add eos evaluation Change-Id: Ia721ce1df8798fa2771059b4feb12eb56459325b
diff --git a/benchmarks/cleanup/tokenize_nn.pl b/benchmarks/cleanup/tokenize_nn.pl new file mode 100644 index 0000000..3124c6a --- /dev/null +++ b/benchmarks/cleanup/tokenize_nn.pl
@@ -0,0 +1,14 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +my $c = ''; +foreach (<>) { + $c .= $_; +}; +$c =~ s/^\n+//s; +foreach my $c (split(/\n\n/, $c)) { + $c =~ s/[\s\n\t]+//g; + print $c, "\n"; +}; +