#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
# This script rewrites the pipeline output | |
# of the stanford parser for tokenize,ssplit,mwt | |
our @ARGV; | |
if (open(my $file, '<' . $ARGV[0])) { | |
foreach (readline($file)) { | |
if (s/^\[Text\=(.+?)\s+CharacterOffsetBegin\=\d+\s+CharacterOffsetEnd=\d+\]$/$1/) { | |
print $_; | |
} | |
elsif (m/^Sentence\s+\#\d+\s+\(/) { | |
print "\n"; | |
}; | |
}; | |
print "Done."; | |
close($file); | |
} | |
else { | |
warn 'Unable to open file' | |
}; |