Add UD evaluation
Change-Id: I87b50f7b46c7f1d111e5e8ad3f925ca5280d74a2
diff --git a/benchmarks/cleanup/split_conllu.pl b/benchmarks/cleanup/split_conllu.pl
new file mode 100644
index 0000000..9dfd824
--- /dev/null
+++ b/benchmarks/cleanup/split_conllu.pl
@@ -0,0 +1,35 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+our @ARGV;
+
+my $file = $ARGV[0];
+
+open(X, '<' . $file);
+open(RAW, '>' . $file . '.raw');
+open(SPLIT, '>' . $file . '.split');
+
+my $init;
+
+while(!eof(X)) {
+ local $_ = <X>;
+
+ if (/^# text = (.+?)$/) {
+ if ($init) {
+ print SPLIT "\n";
+ print RAW ' ';
+ };
+ print RAW $1;
+ }
+ elsif (m/^\d+[\s\t]/) {
+ if (/^\d+[\s\t]+([^\t\s]+)[\t\s]/) {
+ print SPLIT $1,"\n";
+ $init = 1;
+ }
+ };
+};
+
+close(X);
+close(RAW);
+close(SPLIT);