Support named entities for NKJP
Change-Id: I71bd002625849c34628d99c518571484e6083ea0
diff --git a/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/morpho.xml b/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/morpho.xml
index bdef55e..17019a5 100644
--- a/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/morpho.xml
+++ b/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/morpho.xml
@@ -2989,6 +2989,13 @@
</fs>
<fs type="alt">
<f name="lemma">to</f>
+ <f name="pos">conj</f>
+ <f name="msd">
+ <symbol value="0"/>
+ </f>
+ </fs>
+ <fs type="alt">
+ <f name="lemma">to</f>
<f name="pos">part</f>
<f name="msd">
<symbol value="0"/>
@@ -3672,6 +3679,13 @@
<symbol value="0"/>
</f>
</fs>
+ <fs type="alt">
+ <f name="lemma">to</f>
+ <f name="pos">conj</f>
+ <f name="msd">
+ <symbol value="0"/>
+ </f>
+ </fs>
<fs type="alt" n="choice">
<f name="lemma">to</f>
<f name="pos">part</f>
diff --git a/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/named.xml b/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/named.xml
index df1ee28..b40d1d6 100644
--- a/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/named.xml
+++ b/t/real/corpus/NKJP/NKJP/KolakowskiOco/nkjp/named.xml
@@ -8,34 +8,30 @@
<fs xmlns="http://www.tei-c.org/ns/1.0" type="ne">
<f name="ne"><!-- _Kierkegaard-->
<fs>
- <f name="complex-ent">
- <fs type="complex-ent">
- <f name="type">
- <symbol value="persName"/>
- </f>
- <f name="subtype">
- <symbol value="surname"/>
- </f>
- <f name="nkjp-named">
- <fs type="named">
- <f name="type">
- <symbol value="persName"/>
- </f>
- <f name="subtype">
- <symbol value="surname"/>
- </f>
- <f name="orth">
- <string>Kierkegaard</string>
- </f>
- <f name="base">
- <string>Kierkegaard</string>
- </f>
- <f name="certainty">
- <symbol value="high"/>
- </f>
- </fs>
- </f>
- </fs>
+ <f name="type">
+ <symbol value="persName"/>
+ </f>
+ <f name="subtype">
+ <symbol value="surname"/>
+ </f>
+ </fs>
+ </f>
+ <f name="nkjp">
+ <fs type="named">
+ <f name="type">
+ <symbol value="persName"/>
+ </f>
+ <f name="subtype">
+ <symbol value="surname"/>
+ </f>
+ <f name="orth">
+ <string>Kierkegaard</string>
+ </f>
+ <f name="base">
+ <string>Kierkegaard</string>
+ </f>
+ <f name="certainty">
+ <symbol value="high"/>
</f>
</fs>
</f>
diff --git a/t/real/nkjp.t b/t/real/nkjp.t
index cdd46e8..8652037 100644
--- a/t/real/nkjp.t
+++ b/t/real/nkjp.t
@@ -15,6 +15,7 @@
use File::Spec::Functions 'catdir';
use_ok('KorAP::XML::Krill');
+use_ok('KorAP::XML::Annotation::NKJP::NamedEntities');
my $path = catdir(dirname(__FILE__), 'corpus','NKJP','NKJP','KOT');
@@ -52,7 +53,7 @@
doc => $doc,
foundry => $token_base_foundry,
layer => $token_base_layer,
- name => 'tokens'
+ name => 'tokens',
);
ok($tokens, 'Token Object is fine');
ok($tokens->parse, 'Token parsing is fine');
@@ -65,7 +66,7 @@
## Base
ok($tokens->add('DeReKo', 'Structure', 'base_sentences_paragraphs'));
-ok($tokens->add('NKJP', 'Morpho'), 'Add Gingko');
+ok($tokens->add('NKJP', 'Morpho'), 'Add Morpho');
$output = $tokens->to_data;
@@ -128,13 +129,14 @@
## Base
ok($tokens->add('DeReKo', 'Structure', 'base_sentences_paragraphs'));
-ok($tokens->add('NKJP', 'Morpho'), 'Add Gingko');
+ok($tokens->add('NKJP', 'Morpho'), 'Add Morpho');
+ok($tokens->add('NKJP', 'NamedEntities'), 'Add NamedEntities');
$output = $tokens->to_data;
-is($output->{data}->{foundries}, 'dereko dereko/structure dereko/structure/base_sentences_paragraphs nkjp nkjp/morpho', 'Foundries');
+is($output->{data}->{foundries}, 'dereko dereko/structure dereko/structure/base_sentences_paragraphs nkjp nkjp/morpho nkjp/namedentities', 'Foundries');
-is($output->{data}->{layerInfos}, 'dereko/s=spans nkjp/l=tokens nkjp/m=tokens nkjp/p=tokens', 'layerInfos');
+is($output->{data}->{layerInfos}, 'dereko/s=spans nkjp/l=tokens nkjp/m=tokens nkjp/ne=tokens nkjp/p=tokens', 'layerInfos');
$token = join('||', @{$output->{data}->{stream}->[5]});
@@ -146,6 +148,19 @@
like($token, qr!nkjp/p:adj!);
like($token, qr!s:takie!);
+$token = join('||', @{$output->{data}->{stream}->[67]});
+
+like($token, qr!<>:dereko/s:seg\$<b>64<i>464<i>475<i>68<b>4<s>1!);
+like($token, qr!\@:dereko\/s:corresp:ann_segmentation\.xml\\#segm_2\.2-seg\$<b>17<s>1<i>68!);
+like($token, qr!\@:dereko\/s:id:morph_2\.2-seg\$<b>17<s>1<i>68!);
+like($token, qr!_67\$<i>464<i>475!);
+like($token, qr!i:kierkegaard!);
+like($token, qr!nkjp/l:Kierkegaard!);
+like($token, qr!nkjp/m:sg:nom:m1!);
+like($token, qr!nkjp/ne:persName:surname!);
+like($token, qr!nkjp/p:subst!);
+like($token, qr!s:Kierkegaard!);
+
done_testing;
__END__