changed sentence foundry, foundry selector and textClass serialization
diff --git a/lib/KorAP/Document.pm b/lib/KorAP/Document.pm
index 200258a..6153463 100644
--- a/lib/KorAP/Document.pm
+++ b/lib/KorAP/Document.pm
@@ -181,10 +181,14 @@
};
};
- foreach ('author', 'text_class') {
+ for ('author') {
$hash{_k($_)} = join(',', @{ $self->$_ });
};
+ for ('text_class') {
+ $hash{_k($_)} = join(' ', @{ $self->$_ });
+ };
+
return \%hash;
};
diff --git a/lib/KorAP/Index/Base/Paragraphs.pm b/lib/KorAP/Index/Base/Paragraphs.pm
index 35199d4..338a748 100644
--- a/lib/KorAP/Index/Base/Paragraphs.pm
+++ b/lib/KorAP/Index/Base/Paragraphs.pm
@@ -22,7 +22,7 @@
}
) or return;
- $$self->stream->add_meta('p', '<i>' . $i);
+ $$self->stream->add_meta('paragraph', '<i>' . $i);
return 1;
};
diff --git a/lib/KorAP/Index/OpenNLP/Sentences.pm b/lib/KorAP/Index/OpenNLP/Sentences.pm
index 7868fb2..0b774f7 100644
--- a/lib/KorAP/Index/OpenNLP/Sentences.pm
+++ b/lib/KorAP/Index/OpenNLP/Sentences.pm
@@ -21,7 +21,7 @@
}
) or return;
- $$self->stream->add_meta('s', '<i>' . $i);
+ $$self->stream->add_meta('sentence', '<i>' . $i);
return 1;
};
diff --git a/lib/KorAP/Tokenizer.pm b/lib/KorAP/Tokenizer.pm
index 956b5c0..3b29424 100644
--- a/lib/KorAP/Tokenizer.pm
+++ b/lib/KorAP/Tokenizer.pm
@@ -81,7 +81,7 @@
});
# Add token count
- $mtts->add_meta('t', '<i>' . $have);
+ $mtts->add_meta('token', '<i>' . $have);
$range->gap($old, $doc->primary->data_length, $have-1) if $doc->primary->data_length >= $old;
@@ -243,9 +243,9 @@
push(@supports, $foundry);
foreach my $layer (@{$self->{support}->{$foundry}}) {
my @layers = @$layer;
- push(@supports, $foundry . '#' . $layers[0]);
+ push(@supports, $foundry . '/' . $layers[0]);
if ($layers[1]) {
- push(@supports, $foundry . '#' . join('#', @layers));
+ push(@supports, $foundry . '/' . join('/', @layers));
};
};
};
diff --git a/script/create_example.pl b/script/create_example.pl
new file mode 100755
index 0000000..275866e
--- /dev/null
+++ b/script/create_example.pl
@@ -0,0 +1,17 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use FindBin;
+use v5.16;
+
+my $dir = $FindBin::Bin;
+
+foreach my $file (qw/00001 00002 00003 00004 00005 00006 02439/) {
+ my $call = 'perl ' . $dir . '/prepare_index.pl -i ' . $dir . '/../examples/WPD/AAA/' . $file . ' -o ' . $dir . '/../' . $file . '.json';
+ print 'Create ' . $file . ".json\n";
+ system($call);
+
+ print 'Create ' . $file . ".json.gz\n";
+ $call .= '.gz -z';
+ system($call);
+};
diff --git a/script/prepare_index.pl b/script/prepare_index.pl
index 2b98cfa..21d8dd9 100644
--- a/script/prepare_index.pl
+++ b/script/prepare_index.pl
@@ -124,7 +124,7 @@
my @layers;
# Base information
-push(@layers, ['OpenNLP', 'Sentences']);
+push(@layers, ['Base', 'Sentences']);
push(@layers, ['Base', 'Paragraphs']);
# OpenNLP