Added meta tests for IDS
Change-Id: Iec1ca1dace8ae137ef03cd4f4d8be291548a9b69
diff --git a/Changes b/Changes
index 73c7bf2..7350491 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,8 @@
-0.06 2016-02-12
+0.07 2016-02-13
+ - Improved support for Schreibgebrauch meta data
+ (IDS flavour).
+
+0.06 2016-02-11
- Improved support for Schreibgebrauch meta data
(Duden flavour).
diff --git a/MANIFEST b/MANIFEST
index ca31d2c..5fff770 100755
--- a/MANIFEST
+++ b/MANIFEST
@@ -80,6 +80,7 @@
t/sgbr/lemma.t
t/sgbr/meta.t
t/sgbr/meta_duden.t
+t/sgbr/meta_ids.t
t/sgbr/pos.t
t/sgbr/token.t
t/corpus/BZK/header.xml
@@ -634,6 +635,24 @@
t/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/ana.xml
t/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/lemma.xml
t/sgbr/PRO-DUD/BSP-2013-01/32/struct/structure.xml
+t/sgbr/CMC-TSK/header.xml
+t/sgbr/CMC-TSK/2014-09/header.xml
+t/sgbr/CMC-TSK/2014-09/2843/header.xml
+t/sgbr/CMC-TSK/2014-09/2843/data.xml
+t/sgbr/CMC-TSK/2014-09/2843/base/sentences.xml
+t/sgbr/CMC-TSK/2014-09/2843/base/tokens_aggr.xml
+t/sgbr/CMC-TSK/2014-09/2843/base/tokens_conservative.xml
+t/sgbr/CMC-TSK/2014-09/2843/sgbr/ana.xml
+t/sgbr/CMC-TSK/2014-09/2843/sgbr/lemma.xml
+t/sgbr/CMC-TSK/2014-09/2843/struct/structure.xml
+t/sgbr/CMC-TSK/2014-09/3401/header.xml
+t/sgbr/CMC-TSK/2014-09/3401/data.xml
+t/sgbr/CMC-TSK/2014-09/3401/base/sentences.xml
+t/sgbr/CMC-TSK/2014-09/3401/base/tokens_aggr.xml
+t/sgbr/CMC-TSK/2014-09/3401/base/tokens_conservative.xml
+t/sgbr/CMC-TSK/2014-09/3401/sgbr/ana.xml
+t/sgbr/CMC-TSK/2014-09/3401/sgbr/lemma.xml
+t/sgbr/CMC-TSK/2014-09/3401/struct/structure.xml
t/corpus/BZK/D59/00001/mate/pipeline/one_token_per_line.txt
t/corpus/BZK/D59/00001/mate/pipeline/parsed.txt
t/corpus/BZK/D59/00001/mate/tokenSpans/number_tokenSpans.xml
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index e260885..404c2b2 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -17,7 +17,7 @@
# Due to the kind of processing, processed metadata may be stored in
# a multiprocess cache instead.
-our $VERSION = '0.06';
+our $VERSION = '0.07';
our @ATTR = qw/text_sigle
doc_sigle
@@ -306,17 +306,24 @@
# Date of publication
try {
my $date = $dom->at('date')->all_text;
- if ($date =~ s!^\s*(\d{4})-(\d{2})-(\d{2})!$1$2$3!) {
+ $self->store(sgbrDate => $date);
+ if ($date =~ s!^\s*(\d{4})-(\d{2})-(\d{2}).*$!$1$2$3!) {
$self->pub_date($date);
}
else {
$self->log->warn('"' . $date . '" is not a compatible pubDate');
- }
+ };
};
# Publication place
try {
- $self->pub_place($dom->at('pubPlace')->all_text);
+ my $pp = $dom->at('pubPlace');
+ if ($pp) {
+ $self->pub_place($pp->all_text) if $pp->all_text;
+ };
+ if ($pp->attr('ref')) {
+ $self->reference($pp->attr('ref'));
+ };
};
if ($stmt = $dom->at('titleStmt')) {
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index b1c6f20..7e589e7 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -267,7 +267,12 @@
my $self = shift;
my %param = @_;
- croak 'No token data available' unless $self->stream;
+ unless ($self->stream) {
+ $self->log->warn(
+ 'No token data available'
+ );
+ return;
+ };
$self->log->trace(
($param{skip} ? 'Skip' : 'Add').' span data '.$param{foundry}.':'.$param{layer}
@@ -311,7 +316,13 @@
my $self = shift;
my %param = @_;
- croak 'No token data available' unless $self->stream;
+ unless ($self->stream) {
+ $self->log->warn(
+ 'No token data available'
+ );
+ return;
+ };
+
$self->log->trace(
($param{skip} ? 'Skip' : 'Add').' token data '.$param{foundry}.':'.$param{layer}
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/base/paragraph.xml b/t/sgbr/CMC-TSK/2014-09/2843/base/paragraph.xml
new file mode 100644
index 0000000..3dfe5e2
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/base/paragraph.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="24" to="29" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/base/sentences.xml b/t/sgbr/CMC-TSK/2014-09/2843/base/sentences.xml
new file mode 100644
index 0000000..c9091e3
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/base/sentences.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="0" to="23" />
+ <span from="24" to="29" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/base/tokens_aggr.xml b/t/sgbr/CMC-TSK/2014-09/2843/base/tokens_aggr.xml
new file mode 100644
index 0000000..87a2303
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/base/tokens_aggr.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="8" />
+ <span id="t_2" from="8" to="9" />
+ <span id="t_3" from="9" to="11" />
+ <span id="t_4" from="11" to="12" />
+ <span id="t_5" from="12" to="17" />
+ <span id="t_6" from="18" to="20" />
+ <span id="t_7" from="20" to="21" />
+ <span id="t_8" from="21" to="23" />
+ <span id="t_9" from="24" to="27" />
+ <span id="t_10" from="28" to="29" />
+ <span id="t_11" from="29" to="30" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/base/tokens_conservative.xml b/t/sgbr/CMC-TSK/2014-09/2843/base/tokens_conservative.xml
new file mode 100644
index 0000000..aa3aa2e
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/base/tokens_conservative.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="17" />
+ <span id="t_2" from="18" to="23" />
+ <span id="t_3" from="24" to="27" />
+ <span id="t_4" from="28" to="29" />
+ <span id="t_5" from="29" to="30" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/data.xml b/t/sgbr/CMC-TSK/2014-09/2843/data.xml
new file mode 100644
index 0000000..cd93058
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>@ Koelle_am_Rhing 10:18 100 %!</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/header.xml b/t/sgbr/CMC-TSK/2014-09/2843/header.xml
new file mode 100644
index 0000000..d8008d2
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/header.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<textSigle>CMC-TSK_2014-09.2843</textSigle>
+<item xml:id="CMC.TSK.2014.09.Texte.2843" n="45">
+<biblFull>
+<titleStmt>
+<title>@ Koelle_am_Rhing 10:18</title>
+<author ref="#CMC.TSK.2014.09.Autoren.587"/>
+</titleStmt>
+<publicationStmt>
+<publisher>tagesschau.de</publisher>
+<pubPlace ref="http://meta.tagesschau.de/node/090285#comment-1732187"/>
+<date>2014-09-30 14:33:00</date>
+</publicationStmt>
+</biblFull>
+</item>
+</teiHeader>
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/sgbr/ana.xml b/t/sgbr/CMC-TSK/2014-09/2843/sgbr/ana.xml
new file mode 100644
index 0000000..0f80d17
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/sgbr/ana.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="1">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">@</f>
+ <f name="ctag">APPR</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="2" to="17">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Koelle_am_Rhing</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="18" to="23">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">10:18</f>
+ <f name="ctag">CARD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="24" to="27">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">100</f>
+ <f name="ctag">CARD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="28" to="29">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">%</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="29" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">!</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/sgbr/lemma.xml b/t/sgbr/CMC-TSK/2014-09/2843/sgbr/lemma.xml
new file mode 100644
index 0000000..9d34627
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/sgbr/lemma.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/2843/struct/structure.xml b/t/sgbr/CMC-TSK/2014-09/2843/struct/structure.xml
new file mode 100644
index 0000000..ac05cb6
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/2843/struct/structure.xml
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s0" from="0" to="29" l="1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">text</f>
+ </fs>
+ </span>
+ <span id="s1" from="0" to="23" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">head</f>
+ </fs>
+ </span>
+ <span id="s2" from="0" to="23" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="0" to="1" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.APPR</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="2" to="17" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="18" to="23" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.CARD</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="24" to="29" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">p</f>
+ </fs>
+ </span>
+ <span id="s7" from="24" to="29" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="24" to="27" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.CARD</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="28" to="29" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="29" to="29" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS._ENDE</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/base/paragraph.xml b/t/sgbr/CMC-TSK/2014-09/3401/base/paragraph.xml
new file mode 100644
index 0000000..6edab5f
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/base/paragraph.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="16" to="114" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/base/sentences.xml b/t/sgbr/CMC-TSK/2014-09/3401/base/sentences.xml
new file mode 100644
index 0000000..f877f16
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/base/sentences.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="0" to="15" />
+ <span from="16" to="114" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/base/tokens_aggr.xml b/t/sgbr/CMC-TSK/2014-09/3401/base/tokens_aggr.xml
new file mode 100644
index 0000000..e417fee
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/base/tokens_aggr.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="15" />
+ <span id="t_2" from="16" to="18" />
+ <span id="t_3" from="19" to="21" />
+ <span id="t_4" from="22" to="26" />
+ <span id="t_5" from="27" to="30" />
+ <span id="t_6" from="31" to="36" />
+ <span id="t_7" from="37" to="40" />
+ <span id="t_8" from="41" to="43" />
+ <span id="t_9" from="44" to="49" />
+ <span id="t_10" from="50" to="63" />
+ <span id="t_11" from="64" to="73" />
+ <span id="t_12" from="74" to="80" />
+ <span id="t_13" from="80" to="81" />
+ <span id="t_14" from="81" to="85" />
+ <span id="t_15" from="86" to="89" />
+ <span id="t_16" from="90" to="94" />
+ <span id="t_17" from="95" to="97" />
+ <span id="t_18" from="98" to="106" />
+ <span id="t_19" from="107" to="113" />
+ <span id="t_20" from="113" to="114" />
+ <span id="t_21" from="114" to="115" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/base/tokens_conservative.xml b/t/sgbr/CMC-TSK/2014-09/3401/base/tokens_conservative.xml
new file mode 100644
index 0000000..60c0624
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/base/tokens_conservative.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="15" />
+ <span id="t_2" from="16" to="18" />
+ <span id="t_3" from="19" to="21" />
+ <span id="t_4" from="22" to="26" />
+ <span id="t_5" from="27" to="30" />
+ <span id="t_6" from="31" to="36" />
+ <span id="t_7" from="37" to="40" />
+ <span id="t_8" from="41" to="43" />
+ <span id="t_9" from="44" to="49" />
+ <span id="t_10" from="50" to="63" />
+ <span id="t_11" from="64" to="73" />
+ <span id="t_12" from="74" to="85" />
+ <span id="t_13" from="86" to="89" />
+ <span id="t_14" from="90" to="94" />
+ <span id="t_15" from="95" to="97" />
+ <span id="t_16" from="98" to="106" />
+ <span id="t_17" from="107" to="113" />
+ <span id="t_18" from="113" to="114" />
+ <span id="t_19" from="114" to="115" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/data.xml b/t/sgbr/CMC-TSK/2014-09/3401/data.xml
new file mode 100644
index 0000000..545be45
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>@ fitnessfrosch Na ja wenn Sie Nazis nur an Deren Kennzeichnung ausmachen wollen,sind Sie aber im falschen Weiher!!</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/header.xml b/t/sgbr/CMC-TSK/2014-09/3401/header.xml
new file mode 100644
index 0000000..61d8707
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/header.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<textSigle>CMC-TSK_2014-09.3401</textSigle>
+<item xml:id="CMC.TSK.2014.09.Texte.3401" n="102">
+<biblFull>
+<titleStmt>
+<title>@fitnessfrosch</title>
+<author ref="#CMC.TSK.2014.09.Autoren.206"/>
+</titleStmt>
+<publicationStmt>
+<publisher>tagesschau.de</publisher>
+<pubPlace ref="http://meta.tagesschau.de/node/090308#comment-1732754"/>
+<date>2014-10-01 00:50:00</date>
+</publicationStmt>
+</biblFull>
+</item>
+</teiHeader>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/sgbr/ana.xml b/t/sgbr/CMC-TSK/2014-09/3401/sgbr/ana.xml
new file mode 100644
index 0000000..5785f17
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/sgbr/ana.xml
@@ -0,0 +1,207 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="1">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">@</f>
+ <f name="ctag">XY</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="2" to="15">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">fitnessfrosch</f>
+ <f name="ctag">ADJD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="17" to="18">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">a</f>
+ <f name="ctag">ITJ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="19" to="21">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ja</f>
+ <f name="ctag">PTKANT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="22" to="26">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">wenn</f>
+ <f name="ctag">KOUS</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="27" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ <f name="ctag">PPER</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="31" to="36">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Nazis</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="37" to="40">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">nur</f>
+ <f name="ctag">ADV</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="41" to="43">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">an</f>
+ <f name="ctag">APPR</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="44" to="49">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Deren</f>
+ <f name="ctag">PDS</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="50" to="63">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kennzeichnung</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="64" to="73">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ausmachen</f>
+ <f name="ctag">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="74" to="85">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">wollen,sind</f>
+ <f name="ctag">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="86" to="89">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ <f name="ctag">PPER</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="90" to="94">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">aber</f>
+ <f name="ctag">ADV</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="95" to="97">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">im</f>
+ <f name="ctag">APPRART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="98" to="106">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">falschen</f>
+ <f name="ctag">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="107" to="113">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Weiher</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="113" to="114">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">!</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="114" to="115">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">!</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/sgbr/lemma.xml b/t/sgbr/CMC-TSK/2014-09/3401/sgbr/lemma.xml
new file mode 100644
index 0000000..3bb930e
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/sgbr/lemma.xml
@@ -0,0 +1,153 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="15" to="15">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Naja</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="17" to="18">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Na</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="19" to="21">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ja</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="22" to="26">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">wenn</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="27" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="31" to="36">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Nazi</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="37" to="40">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">nur</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="41" to="43">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">an</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="44" to="49">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Deren</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="50" to="63">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kennzeichnung</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="64" to="73">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ausmachen</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="86" to="89">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="90" to="94">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">aber</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="95" to="97">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">im</f>
+ <f name="lemma">in</f>
+ <f name="lemma">in.</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="98" to="106">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">falsch</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="107" to="113">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Weiher</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/3401/struct/structure.xml b/t/sgbr/CMC-TSK/2014-09/3401/struct/structure.xml
new file mode 100644
index 0000000..e9e5151
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/3401/struct/structure.xml
@@ -0,0 +1,287 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s0" from="0" to="114" l="1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">text</f>
+ </fs>
+ </span>
+ <span id="s1" from="0" to="15" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">head</f>
+ </fs>
+ </span>
+ <span id="s2" from="0" to="15" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="0" to="1" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.XY</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="2" to="15" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADJD</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="16" to="114" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">p</f>
+ </fs>
+ </span>
+ <span id="s6" from="16" to="114" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="15" to="15" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.4291</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="17" to="18" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ITJ</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.1627</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="19" to="21" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PTKANT</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.594</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="22" to="26" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.KOUS</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.165</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="27" to="30" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PPER</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.455</f>
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="31" to="36" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.4470</f>
+ <f name="n">5</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="37" to="40" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADV</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.342</f>
+ <f name="n">6</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="41" to="43" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.APPR</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.69</f>
+ <f name="n">7</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="44" to="49" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PDS</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.11545</f>
+ <f name="n">8</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="50" to="63" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.20665</f>
+ <f name="n">9</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="64" to="73" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.VVFIN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.2620</f>
+ <f name="n">10</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="74" to="85" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.VVFIN</f>
+ <f name="n">11</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="86" to="89" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PPER</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.455</f>
+ <f name="n">12</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="90" to="94" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADV</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.114</f>
+ <f name="n">13</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s21" from="95" to="97" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.APPRART</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.16</f>
+ <f name="n">14</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s22" from="98" to="106" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADJA</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.1000</f>
+ <f name="n">15</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s23" from="107" to="113" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.20760</f>
+ <f name="n">16</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s24" from="113" to="114" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS._ENDE</f>
+ <f name="n">17</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s25" from="114" to="114" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS._ENDE</f>
+ <f name="n">18</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/2014-09/header.xml b/t/sgbr/CMC-TSK/2014-09/header.xml
new file mode 100644
index 0000000..659f987
--- /dev/null
+++ b/t/sgbr/CMC-TSK/2014-09/header.xml
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<dokumentSigle>CMC-TSK_2014-09</dokumentSigle>
+ <fileDesc>
+ <titleStmt>
+ <title level="u" type="main">Korpus zur Beobachtung des Schreibgebrauchs im Deutschen</title>
+ <title level="u" type="sub">Subkorpus Internettexte</title>
+ <title level="u" type="sub">Subkorpus Leserkommentare Tagesschau</title>
+ <title level="u" type="sub">Subkorpus September 2014</title>
+ <title level="u" type="sub">Subkorpus Beispielauszug</title>
+ <funder>
+ <orgName>Bundesministerium für Bildung und Forschung</orgName>
+ </funder>
+ <editor n="1" xml:id="CMC.TSK.2014.09.Bearbeiter.1">
+ <persName>Peter M. Fischer</persName>
+ <orgName>Institut für Deutsche Sprache, Mannheim</orgName>
+ </editor>
+ <editor n="2" xml:id="CMC.TSK.2014.09.Bearbeiter.2">
+ <persName>Jakob Prange</persName>
+ <orgName>Saarland University</orgName>
+ </editor>
+ </titleStmt>
+ <publicationStmt>
+ <p>Dieses Subkorpus wurde als solches nicht veröffentlicht.</p>
+ </publicationStmt>
+ <sourceDesc>
+ <p>Die hier erfassten Texte sind Leserkommentare auf der Nachrichtenseite der Tagesschau <ref target="http://www.tagesschau.de/"/>. Die Seite veröffentlicht Nachrichtenartikel, die eine kurze Zeit lang von Lesern kommentiert werden können. Der Leser muss hierzu vorher ein kostenloses Konto eingerichtet haben und sich dann auf der Seite anmelden.</p>
+ </sourceDesc>
+ </fileDesc>
+ <language ident="de">Deutsch</language>
+ <projectDesc>
+ <p>
+ Dieses Subkorpus wurde vom und für das Gemeinschaftsprojekt <name>Analyse und Instrumentarien zur Beobachtung des Schreibgebrauchs im Deutschen</name> (<ref target="http://www.schreibgebrauch.org/">http://www.schreibgebrauch.org/</ref>) zusammengestellt. Das Projektkonsortium besteht aus folgenden Partnern:
+ <list>
+ <item>
+ <orgName>Institut für Deutsche Sprache, Mannheim</orgName>
+ </item>
+ <item>
+ <orgName>Institut für Computerlinguistik, Universität des Saarlandes, Saarbrücken</orgName>
+ </item>
+ <item>
+ <orgName>Bibliographisches Institut GmbH (Dudenverlag), Berlin</orgName>
+ </item>
+ <item>
+ <orgName>Wahrig bei Brockhaus, Gütersloh</orgName>
+ </item>
+ </list>
+ </p>
+ </projectDesc>
+ <editorialDecl xml:id="CMC.TSK.2014.09.Metadaten.Autoren">
+ <interpretation>
+ <p>Dieses Korpus beinhaltet Metadaten zu den Autoren der Texte. Eine Liste aller Autoren, die in diesem Subkorpus kommentiert haben, wurde in der Sektion <gi corresp="#CMC.TSK.2014.09.Autoren">particDesc</gi> zusammengestellt. Jeder Autor ist dabei als <tag scheme="TEI">person</tag> kodiert und führt im Unterelement <tag scheme="TEI">persName</tag> sein oder ihr verwendetes Pseudonym. Aus Gründen der Anonymitätswahrung wurden keine weiteren personenbezogenen Metadaten in das Korpus aufgenommen. Die Zuordnung der Autoren zu ihren Texten regeln die Metadaten der Texte.</p>
+ </interpretation>
+ </editorialDecl>
+ <particDesc xml:id="CMC.TSK.2014.09.Autoren">
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="206" xml:id="CMC.TSK.2014.09.Autoren.206">
+ <persName type="pseudo">weltoffen</persName>
+ </person>
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="587" xml:id="CMC.TSK.2014.09.Autoren.587">
+ <persName type="pseudo">privat23</persName>
+ </person>
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="927" xml:id="CMC.TSK.2014.09.Autoren.927">
+ <persName type="pseudo">Koelle_am_Rhing</persName>
+ </person>
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="1043" xml:id="CMC.TSK.2014.09.Autoren.1043">
+ <persName type="pseudo">fitnessfrosch</persName>
+ </person>
+ </particDesc>
+ <interpretation xml:id="CMC.TSK.2014.09.Kodex">
+ <p>Dieses Korpus beinhaltet Metadaten zu den Kommentartexten wie auch zu den Nachrichtenartikeln, in dessen Kontext sie verfasst wurden. Die folgende Sektion <gi corresp="#CMC.TSK.2014.09.Texte">editorialDecl</gi> stellt eine strukturelle Übersicht aller Nachrichtenartikel und ihrer Kommentartexte zusammen. Jeder als <tag scheme="TEI">item</tag> realisierte Listeneintrag repräsentiert dabei einen Nachrichtenartikel, der neben einem vollständig strukturierten bibliographischen Unterblock mit Titel und Veröffentlichung auch jeweils als <tag scheme="TEI">desc</tag> kodiert einen kurzen Einleitungstext sowie Schlagwörter zum Inhalt (<tag scheme="TEI">name</tag>) und der geografischen Verortung (<tag scheme="TEI">geogName</tag>) führt. Darunter folgt eine Liste aller zu diesem Nachrichtenartikel verfasster Kommentartexte. Hier repräsentiert jeder als <tag scheme="TEI">item</tag> realisierte Listeneintrag einen Kommentartext, der ebenso einen vollständig strukturierten bibliographischen Unterblock mit Titel, Veröffentlichung und Autorenreferenz führt.</p>
+ </interpretation>
+ <segmentation xml:id="CMC.TSK.2014.09.Token">
+ <p>Die Texte in diesem Korpus wurden mit der Software <ref target="https://github.com/DFKI-MLT/JTok">jTok</ref> tokenisiert. Im Ergebnis wurden Sätze mit <tag scheme="TEI">s</tag>, Wörter mit <tag scheme="TEI">w</tag> und Satzpunktuation mit <tag scheme="TEI">c</tag> ausgezeichnet. Ferner wurden auf Textebene absatzübergreifend alle Sätze und auf Satzebene durchgängig alle Wörter und jede Satzpunktuation im jeweiligen Attribut <att scheme="TEI">n</att> und bei 1 beginnend durchgezählt.</p>
+ </segmentation>
+ <interpretation xml:id="CMC.TSK.2014.09.POS">
+ <p>Dieses Korpus verwendet ein Wortart-Tagging nach dem Stuttgart-Tübingen-Tagset (<ref target="http://www.ims.uni-stuttgart.de/forschung/ressourcen/lexika/TagSets/stts-table.html">STTS</ref>) und dessen Erweiterung für das Genre der internetbasierten Kommunikation (<ref target="http://opus.bsz-bw.de/ubhi/volltexte/2014/279/pdf/p027.pdf">STTS 2.0</ref>). Dabei wurden drei ursprüngliche Tags ("$.", "$," und "$(") in ("_ENDE", "_KOMMA" und "_SONST") umbennant, um den Validitätsrichtlinien für XML-Identifikatoren (beschrieben in der <ref target="http://www.w3.org/TR/2006/REC-xml-20060816/">W3C-Empfehlung für XML 1.0 vom 16.08.2006</ref>) zu genügen.</p>
+ <ab>
+ <interpGrp type="annotation">
+ <interp xml:id="CMC.TSK.2014.09.POS.ADJA">attributives Adjektiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ADJD">adverbiales oder prädikatives Adjektiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ADR">Adressierung</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ADV">Adverb</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APPO">Postposition</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APPR">Präposition; Zirkumposition links</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APPRART">Präposition mit Artikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APZR">Zirkumposition rechts</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ART">bestimmter oder unbestimmter Artikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.AW">Aktionswort</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.AWIND">Aktionswort-Indikator</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.CARD">Kardinalzahl</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.EMOASC">Emoticon, als Zeichenfolge dargestellt (Typ "ASCII")</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ERRAW">falsch abgetrennter Wortteil</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ERRTOK">Tokenisierungsfehler</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.FM">Fremdsprachliches Material</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.HST">Hashtag</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ITJ">Interjektion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KOKOM">Vergleichskonjunktion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KON">nebenordnende Konjunktion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KOUI">unterordnende Konjunktion mit "zu" und Infinitiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KOUS">unterordnende Konjunktion mit Satz</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.NE">Eigennamen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.NN">normales Nomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ONO">Onomatopoetikon</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PDAT">attribuierendes Demonstrativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PDS">substituierendes Demonstrativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PIAT">attribuierendes Indefinitpronomen ohne Determiner</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PIS">substituierendes Indefinitpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PPER">irreflexives Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PPOSAT">attribuierendes Possessivpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PPOSS">substituierendes Possessivpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PRELAT">attribuierendes Relativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PRELS">substituierendes Relativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PRF">reflexives Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PROAV">Pronominaladverb</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKA">Partikel bei Adjektiv oder Adverb</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKANT">Antwortpartikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKNEG">Negationspartikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKVZ">abgetrennter Verbzusatz</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKZU">"zu" vor Infinitiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PWAT">attribuierendes Interrogativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PWAV">adverbiales Interrogativ- oder Relativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PWS">substituierendes Interrogativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.TRUNC">Kompositions-Erstglied</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.URL">Uniform Resource Locator</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAFIN">finites Verb, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAIMP">Imperativ, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAINF">Infinitiv, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAPP">Partizip Perfekt, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAPPER">Verb, aux mit Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VMFIN">finites Verb, modal</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VMINF">Infinitiv, modal</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VMPP">Partizip Perfekt, modal</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVFIN">finites Verb, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVIMP">Imperativ, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVINF">Infinitiv, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVIZU">Infinitiv mit "zu", voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVPP">Partizip Perfekt, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVPPER">Verb, voll mit Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.XY">Nichtwort, Sonderzeichen enthaltend</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS._ENDE">Satzbeendende Interpunktion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS._KOMMA">Komma</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS._SONST">sonstige Satzzeichen; satzintern</interp>
+ </interpGrp>
+ </ab>
+ </interpretation>
+</teiHeader>
\ No newline at end of file
diff --git a/t/sgbr/CMC-TSK/header.xml b/t/sgbr/CMC-TSK/header.xml
new file mode 100644
index 0000000..c2658d5
--- /dev/null
+++ b/t/sgbr/CMC-TSK/header.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<korpusSigle>CMC-TSK</korpusSigle>
+</teiHeader>
diff --git a/t/sgbr/lemma.t b/t/sgbr/lemma.t
index 3971b88..87f1d83 100644
--- a/t/sgbr/lemma.t
+++ b/t/sgbr/lemma.t
@@ -56,4 +56,50 @@
is($stream->[-1]->[2], 's:Kevin', 'Last term');
is($stream->[-1]->[3], 'sgbr/l:Kevin', 'Last term');
+
+# Real data 1
+$path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '2843');
+
+ok($doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+ok($tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+ok(!$tokens->add('Sgbr', 'Lemma'), 'Add Structure impossible - no token data');
+
+
+# Real data 2
+$path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '3401');
+
+ok($doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+ok($tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+ok($tokens->add('Sgbr', 'Lemma'), 'Add Structure');
+
done_testing;
+
+__END__
diff --git a/t/sgbr/meta_duden.t b/t/sgbr/meta_duden.t
index cd82130..7cae3c5 100644
--- a/t/sgbr/meta_duden.t
+++ b/t/sgbr/meta_duden.t
@@ -29,6 +29,7 @@
is($doc->publisher, 'Dorfblatt GmbH', 'Publisher');
is($doc->pub_date, '20130126');
+is($doc->store('sgbrDate'), '2013-01-26');
is($doc->pub_place, 'Stadtingen');
is($doc->doc_title, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
diff --git a/t/sgbr/meta_ids.t b/t/sgbr/meta_ids.t
new file mode 100644
index 0000000..4128f5d
--- /dev/null
+++ b/t/sgbr/meta_ids.t
@@ -0,0 +1,145 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+my $path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '2843');
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+like($doc->path, qr!$path/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'CMC-TSK_2014-09.2843', 'ID-text');
+
+is($doc->doc_sigle, 'CMC-TSK_2014-09', 'ID-doc');
+is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus');
+
+is($doc->title, '@ Koelle_am_Rhing 10:18', 'title');
+
+ok(!$doc->sub_title, 'no subtitle');
+
+is($doc->publisher, 'tagesschau.de', 'Publisher');
+
+is($doc->pub_date, '20140930');
+
+ok(!$doc->pub_place, 'No pub place');
+
+is($doc->doc_title, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($doc->doc_sub_title, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
+
+is($doc->store('funder'), 'Bundesministerium für Bildung und Forschung', 'Funder');
+
+is($doc->author, 'privat23', 'Author');
+ok(!$doc->store('sgbrAuthorSex'), 'No Sex');
+ok(!$doc->store('sgbrKodex'), 'No kodex');
+is($doc->reference, 'http://meta.tagesschau.de/node/090285#comment-1732187', 'Publace ref');
+
+is($doc->keywords_string, '');
+
+is($doc->language, 'de', 'Language');
+
+ok(!$doc->editor, 'Editor');
+
+ok(!$doc->text_type, 'Text Type');
+ok(!$doc->text_type_art, 'Text Type Art');
+ok(!$doc->text_type_ref, 'Text Type Ref');
+ok(!$doc->text_column, 'Text Column');
+ok(!$doc->text_domain, 'Text Domain');
+ok(!$doc->creation_date, 'Creation Date');
+ok(!$doc->license, 'License');
+ok(!$doc->pages, 'Pages');
+ok(!$doc->file_edition_statement, 'File Edition Statement');
+ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement');
+
+ok(!$doc->doc_editor, 'Doc: editor');
+ok(!$doc->doc_author, 'Doc: author');
+
+ok(!$doc->corpus_title, 'Corpus: title');
+ok(!$doc->corpus_sub_title, 'Corpus: subtitle');
+ok(!$doc->corpus_editor, 'Corpus: editor');
+ok(!$doc->corpus_author, 'Corpus: author');
+
+my $hash = $doc->to_hash;
+is($hash->{title}, '@ Koelle_am_Rhing 10:18', 'Corpus title');
+
+
+# Second document
+
+$path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '3401');
+
+ok($doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+like($doc->path, qr!$path/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'CMC-TSK_2014-09.3401', 'ID-text');
+
+is($doc->doc_sigle, 'CMC-TSK_2014-09', 'ID-doc');
+is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus');
+
+is($doc->title, '@fitnessfrosch', 'title');
+
+ok(!$doc->sub_title, 'no subtitle');
+
+is($doc->publisher, 'tagesschau.de', 'Publisher');
+
+is($doc->pub_date, '20141001');
+is($doc->store('sgbrDate'), '2014-10-01 00:50:00');
+
+ok(!$doc->pub_place, 'No pub place');
+
+is($doc->doc_title, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($doc->doc_sub_title, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
+
+is($doc->store('funder'), 'Bundesministerium für Bildung und Forschung', 'Funder');
+
+is($doc->author, 'weltoffen', 'Author');
+ok(!$doc->store('sgbrAuthorSex'), 'No Sex');
+ok(!$doc->store('sgbrKodex'), 'No kodex');
+is($doc->reference, 'http://meta.tagesschau.de/node/090308#comment-1732754', 'Publace ref');
+
+is($doc->keywords_string, '');
+
+is($doc->language, 'de', 'Language');
+
+ok(!$doc->editor, 'Editor');
+
+ok(!$doc->text_type, 'Text Type');
+ok(!$doc->text_type_art, 'Text Type Art');
+ok(!$doc->text_type_ref, 'Text Type Ref');
+ok(!$doc->text_column, 'Text Column');
+ok(!$doc->text_domain, 'Text Domain');
+ok(!$doc->creation_date, 'Creation Date');
+ok(!$doc->license, 'License');
+ok(!$doc->pages, 'Pages');
+ok(!$doc->file_edition_statement, 'File Edition Statement');
+ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement');
+
+ok(!$doc->doc_editor, 'Doc: editor');
+ok(!$doc->doc_author, 'Doc: author');
+
+ok(!$doc->corpus_title, 'Corpus: title');
+ok(!$doc->corpus_sub_title, 'Corpus: subtitle');
+ok(!$doc->corpus_editor, 'Corpus: editor');
+ok(!$doc->corpus_author, 'Corpus: author');
+
+$hash = $doc->to_hash;
+is($hash->{title}, '@fitnessfrosch', 'Corpus title');
+
+done_testing;
+__END__
+
diff --git a/t/sgbr/pos.t b/t/sgbr/pos.t
index feb357a..e87a1d7 100644
--- a/t/sgbr/pos.t
+++ b/t/sgbr/pos.t
@@ -41,4 +41,17 @@
is($stream->[1]->[3], 'sgbr/p:PPER', 'First term POS');
is($stream->[-1]->[3], 'sgbr/p:NE', 'Last term POS');
+
+ok($tokens->add('Sgbr', 'Lemma'), 'Add Structure');
+
+$data = $tokens->to_data->{data};
+$stream = $data->{stream};
+
+is($stream->[-1]->[0], '_50$<i>359<i>364', 'Token number');
+is($stream->[-1]->[1], 'i:kevin', 'Position');
+is($stream->[-1]->[2], 's:Kevin', 'Last term');
+is($stream->[-1]->[3], 'sgbr/l:Kevin', 'Last term');
+is($stream->[-1]->[4], 'sgbr/p:NE', 'Last term');
+ok(!defined $stream->[-1]->[5], 'Last term');
+
done_testing;