Revert "Added regular expression for emojis that leads to agd.t failing"
This reverts commit 0c99f3b4eb5eafd4fb6f464b0fc74a30336856e6.
Change-Id: I3fe04039624d3ab6d038c1952e85f80cb98da84c
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index 171b1c1..482bcf7 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -16,7 +16,6 @@
use List::MoreUtils 'uniq';
use JSON::XS;
use Log::Any qw($log);
-use utf8;
# TODO 1:
# Bei den Autoren im Index darauf achten,
@@ -147,15 +146,8 @@
# Ignore non-word, non-number, and non-verbal tokens per default
# '9646' equals the musical pause, used in speech corpora
if ($self->non_verbal_tokens && ord($token) == 9646) {
- # Non-verbal token
- }
- # elsif ($token eq '😍') {
- #;
- #}
- elsif ($token =~ m{\p{Emoji}}){
- ;
- }
- elsif (!$self->non_word_tokens && $token !~ /[\w\d]/) {
+ # Non-verbal token
+ } elsif (!$self->non_word_tokens && $token !~ /[\w\d]/) {
# TODO: Recognize punctuations!
# if ($mtt) {
# my $term = [$token, $from, $to];