blob: e484160fd2e93f64c941dda39146732514077f4a [file] [log] [blame]
Akron8b511f92020-07-09 17:28:08 +02001#!/usr/bin/env perl
2use strict;
3use warnings;
4use FindBin;
5BEGIN {
6 unshift @INC, "$FindBin::Bin/../../lib";
7};
8use KorAP::XML::TEI::Tokenizer::Aggressive;
9
10use open qw(:std :utf8); # assume utf-8 encoding
11
12$| = 1;
13
14# Init tokenizer
15my $tok = KorAP::XML::TEI::Tokenizer::Aggressive->new;
16
17# Read lines from input and return boundaries
18while (!eof(STDIN)) {
19 my $line = <>;
20 $tok->tokenize($line);
21 print join(' ', $tok->boundaries), "\n";
22 $tok->reset;
23};
24
251;