blob: ced1274173b272f17dd437ae98154a8d08f38ea3 [file] [log] [blame]
#!/usr/bin/env perl
use strict;
use warnings;
# This script rewrites the pipeline output
# of the stanford parser for tokenize,ssplit,mwt
our @ARGV;
if (open(my $file, '<' . $ARGV[0])) {
foreach (readline($file)) {
if (s/^\[Text\=(.+?)\s+CharacterOffsetBegin\=\d+\s+CharacterOffsetEnd=\d+\]$/$1/) {
print $_;
}
elsif (m/^Sentence\s+\#\d+\s+\(/) {
print "\n";
};
};
print "Done.";
close($file);
}
else {
warn 'Unable to open file'
};