Akron | b040897 | 2022-03-07 11:36:17 +0100 | [diff] [blame^] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | |
| 5 | # This script rewrites the pipeline output |
| 6 | # of the stanford parser for tokenize,ssplit,mwt |
| 7 | |
| 8 | our @ARGV; |
| 9 | |
| 10 | if (open(my $file, '<' . $ARGV[0])) { |
| 11 | foreach (readline($file)) { |
| 12 | if (s/^\[Text\=(.+?)\s+CharacterOffsetBegin\=\d+\s+CharacterOffsetEnd=\d+\]$/$1/) { |
| 13 | print $_; |
| 14 | } |
| 15 | elsif (m/^Sentence\s+\#\d+\s+\(/) { |
| 16 | print "\n"; |
| 17 | }; |
| 18 | }; |
| 19 | |
| 20 | print "Done."; |
| 21 | close($file); |
| 22 | } |
| 23 | else { |
| 24 | warn 'Unable to open file' |
| 25 | }; |