blob: bf948e8986b884eede7eff584eae3a8f277500cc [file] [log] [blame]
Akron8b511f92020-07-09 17:28:08 +02001#!/usr/bin/env perl
2use strict;
3use warnings;
4use FindBin;
5BEGIN {
6 unshift @INC, "$FindBin::Bin/../../lib";
7};
8use KorAP::XML::TEI::Tokenizer::Aggressive;
9
Akron8b511f92020-07-09 17:28:08 +020010$| = 1;
11
12# Init tokenizer
13my $tok = KorAP::XML::TEI::Tokenizer::Aggressive->new;
14
15# Read lines from input and return boundaries
16while (!eof(STDIN)) {
17 my $line = <>;
Marc Kupietz52dc21b2020-09-05 13:51:22 +020018 for my $text (split(/\n?\x{04}\n?/, $line)) {
19 $tok->tokenize($text);
20 print join(' ', $tok->boundaries), "\n";
21 $tok->reset;
22 }
Akron8b511f92020-07-09 17:28:08 +020023};
24
251;