blob: ced1274173b272f17dd437ae98154a8d08f38ea3 [file] [log] [blame]
Akronb0408972022-03-07 11:36:17 +01001#!/usr/bin/env perl
2use strict;
3use warnings;
4
5# This script rewrites the pipeline output
6# of the stanford parser for tokenize,ssplit,mwt
7
8our @ARGV;
9
10if (open(my $file, '<' . $ARGV[0])) {
11 foreach (readline($file)) {
12 if (s/^\[Text\=(.+?)\s+CharacterOffsetBegin\=\d+\s+CharacterOffsetEnd=\d+\]$/$1/) {
13 print $_;
14 }
15 elsif (m/^Sentence\s+\#\d+\s+\(/) {
16 print "\n";
17 };
18 };
19
20 print "Done.";
21 close($file);
22}
23else {
24 warn 'Unable to open file'
25};