Restructure test suite to prepare CPAN release
Change-Id: If3033774f50d33b2e5b3344e3927fd534cef4dfb
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/base/paragraph.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/base/paragraph.xml
new file mode 100644
index 0000000..3dfe5e2
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/base/paragraph.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="24" to="29" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/base/sentences.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/base/sentences.xml
new file mode 100644
index 0000000..c9091e3
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/base/sentences.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="0" to="23" />
+ <span from="24" to="29" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/base/tokens_aggr.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/base/tokens_aggr.xml
new file mode 100644
index 0000000..87a2303
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/base/tokens_aggr.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="8" />
+ <span id="t_2" from="8" to="9" />
+ <span id="t_3" from="9" to="11" />
+ <span id="t_4" from="11" to="12" />
+ <span id="t_5" from="12" to="17" />
+ <span id="t_6" from="18" to="20" />
+ <span id="t_7" from="20" to="21" />
+ <span id="t_8" from="21" to="23" />
+ <span id="t_9" from="24" to="27" />
+ <span id="t_10" from="28" to="29" />
+ <span id="t_11" from="29" to="30" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/base/tokens_conservative.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/base/tokens_conservative.xml
new file mode 100644
index 0000000..aa3aa2e
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/base/tokens_conservative.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="17" />
+ <span id="t_2" from="18" to="23" />
+ <span id="t_3" from="24" to="27" />
+ <span id="t_4" from="28" to="29" />
+ <span id="t_5" from="29" to="30" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/data.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/data.xml
new file mode 100644
index 0000000..cd93058
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>@ Koelle_am_Rhing 10:18 100 %!</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/header.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/header.xml
new file mode 100644
index 0000000..d8008d2
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/header.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<textSigle>CMC-TSK_2014-09.2843</textSigle>
+<item xml:id="CMC.TSK.2014.09.Texte.2843" n="45">
+<biblFull>
+<titleStmt>
+<title>@ Koelle_am_Rhing 10:18</title>
+<author ref="#CMC.TSK.2014.09.Autoren.587"/>
+</titleStmt>
+<publicationStmt>
+<publisher>tagesschau.de</publisher>
+<pubPlace ref="http://meta.tagesschau.de/node/090285#comment-1732187"/>
+<date>2014-09-30 14:33:00</date>
+</publicationStmt>
+</biblFull>
+</item>
+</teiHeader>
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/sgbr/ana.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/sgbr/ana.xml
new file mode 100644
index 0000000..0f80d17
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/sgbr/ana.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="1">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">@</f>
+ <f name="ctag">APPR</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="2" to="17">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Koelle_am_Rhing</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="18" to="23">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">10:18</f>
+ <f name="ctag">CARD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="24" to="27">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">100</f>
+ <f name="ctag">CARD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="28" to="29">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">%</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="29" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">!</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/sgbr/lemma.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/sgbr/lemma.xml
new file mode 100644
index 0000000..9d34627
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/sgbr/lemma.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/2843/struct/structure.xml b/t/real/sgbr/CMC-TSK/2014-09/2843/struct/structure.xml
new file mode 100644
index 0000000..ac05cb6
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/2843/struct/structure.xml
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.2843" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s0" from="0" to="29" l="1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">text</f>
+ </fs>
+ </span>
+ <span id="s1" from="0" to="23" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">head</f>
+ </fs>
+ </span>
+ <span id="s2" from="0" to="23" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="0" to="1" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.APPR</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="2" to="17" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="18" to="23" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.CARD</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="24" to="29" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">p</f>
+ </fs>
+ </span>
+ <span id="s7" from="24" to="29" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="24" to="27" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.CARD</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="28" to="29" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="29" to="29" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS._ENDE</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/base/paragraph.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/base/paragraph.xml
new file mode 100644
index 0000000..6edab5f
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/base/paragraph.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="16" to="114" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/base/sentences.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/base/sentences.xml
new file mode 100644
index 0000000..f877f16
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/base/sentences.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="0" to="15" />
+ <span from="16" to="114" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/base/tokens_aggr.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/base/tokens_aggr.xml
new file mode 100644
index 0000000..e417fee
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/base/tokens_aggr.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="15" />
+ <span id="t_2" from="16" to="18" />
+ <span id="t_3" from="19" to="21" />
+ <span id="t_4" from="22" to="26" />
+ <span id="t_5" from="27" to="30" />
+ <span id="t_6" from="31" to="36" />
+ <span id="t_7" from="37" to="40" />
+ <span id="t_8" from="41" to="43" />
+ <span id="t_9" from="44" to="49" />
+ <span id="t_10" from="50" to="63" />
+ <span id="t_11" from="64" to="73" />
+ <span id="t_12" from="74" to="80" />
+ <span id="t_13" from="80" to="81" />
+ <span id="t_14" from="81" to="85" />
+ <span id="t_15" from="86" to="89" />
+ <span id="t_16" from="90" to="94" />
+ <span id="t_17" from="95" to="97" />
+ <span id="t_18" from="98" to="106" />
+ <span id="t_19" from="107" to="113" />
+ <span id="t_20" from="113" to="114" />
+ <span id="t_21" from="114" to="115" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/base/tokens_conservative.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/base/tokens_conservative.xml
new file mode 100644
index 0000000..60c0624
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/base/tokens_conservative.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ <span id="t_1" from="2" to="15" />
+ <span id="t_2" from="16" to="18" />
+ <span id="t_3" from="19" to="21" />
+ <span id="t_4" from="22" to="26" />
+ <span id="t_5" from="27" to="30" />
+ <span id="t_6" from="31" to="36" />
+ <span id="t_7" from="37" to="40" />
+ <span id="t_8" from="41" to="43" />
+ <span id="t_9" from="44" to="49" />
+ <span id="t_10" from="50" to="63" />
+ <span id="t_11" from="64" to="73" />
+ <span id="t_12" from="74" to="85" />
+ <span id="t_13" from="86" to="89" />
+ <span id="t_14" from="90" to="94" />
+ <span id="t_15" from="95" to="97" />
+ <span id="t_16" from="98" to="106" />
+ <span id="t_17" from="107" to="113" />
+ <span id="t_18" from="113" to="114" />
+ <span id="t_19" from="114" to="115" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/data.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/data.xml
new file mode 100644
index 0000000..545be45
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>@ fitnessfrosch Na ja wenn Sie Nazis nur an Deren Kennzeichnung ausmachen wollen,sind Sie aber im falschen Weiher!!</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/header.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/header.xml
new file mode 100644
index 0000000..61d8707
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/header.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<textSigle>CMC-TSK_2014-09.3401</textSigle>
+<item xml:id="CMC.TSK.2014.09.Texte.3401" n="102">
+<biblFull>
+<titleStmt>
+<title>@fitnessfrosch</title>
+<author ref="#CMC.TSK.2014.09.Autoren.206"/>
+</titleStmt>
+<publicationStmt>
+<publisher>tagesschau.de</publisher>
+<pubPlace ref="http://meta.tagesschau.de/node/090308#comment-1732754"/>
+<date>2014-10-01 00:50:00</date>
+</publicationStmt>
+</biblFull>
+</item>
+</teiHeader>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/sgbr/ana.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/sgbr/ana.xml
new file mode 100644
index 0000000..5785f17
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/sgbr/ana.xml
@@ -0,0 +1,207 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="1">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">@</f>
+ <f name="ctag">XY</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="2" to="15">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">fitnessfrosch</f>
+ <f name="ctag">ADJD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="17" to="18">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">a</f>
+ <f name="ctag">ITJ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="19" to="21">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ja</f>
+ <f name="ctag">PTKANT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="22" to="26">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">wenn</f>
+ <f name="ctag">KOUS</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="27" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ <f name="ctag">PPER</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="31" to="36">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Nazis</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="37" to="40">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">nur</f>
+ <f name="ctag">ADV</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="41" to="43">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">an</f>
+ <f name="ctag">APPR</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="44" to="49">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Deren</f>
+ <f name="ctag">PDS</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="50" to="63">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kennzeichnung</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="64" to="73">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ausmachen</f>
+ <f name="ctag">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="74" to="85">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">wollen,sind</f>
+ <f name="ctag">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="86" to="89">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ <f name="ctag">PPER</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="90" to="94">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">aber</f>
+ <f name="ctag">ADV</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="95" to="97">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">im</f>
+ <f name="ctag">APPRART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="98" to="106">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">falschen</f>
+ <f name="ctag">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="107" to="113">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Weiher</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="113" to="114">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">!</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="114" to="115">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">!</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/sgbr/lemma.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/sgbr/lemma.xml
new file mode 100644
index 0000000..3bb930e
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/sgbr/lemma.xml
@@ -0,0 +1,153 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="15" to="15">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Naja</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="17" to="18">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Na</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="19" to="21">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ja</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="22" to="26">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">wenn</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="27" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="31" to="36">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Nazi</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="37" to="40">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">nur</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="41" to="43">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">an</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="44" to="49">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Deren</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="50" to="63">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kennzeichnung</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="64" to="73">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ausmachen</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="86" to="89">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="90" to="94">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">aber</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="95" to="97">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">im</f>
+ <f name="lemma">in</f>
+ <f name="lemma">in.</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="98" to="106">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">falsch</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="107" to="113">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Weiher</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/3401/struct/structure.xml b/t/real/sgbr/CMC-TSK/2014-09/3401/struct/structure.xml
new file mode 100644
index 0000000..e9e5151
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/3401/struct/structure.xml
@@ -0,0 +1,287 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="CMC-TSK_2014-09.3401" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s0" from="0" to="114" l="1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">text</f>
+ </fs>
+ </span>
+ <span id="s1" from="0" to="15" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">head</f>
+ </fs>
+ </span>
+ <span id="s2" from="0" to="15" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="0" to="1" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.XY</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="2" to="15" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADJD</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="16" to="114" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">p</f>
+ </fs>
+ </span>
+ <span id="s6" from="16" to="114" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="15" to="15" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.4291</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="17" to="18" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ITJ</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.1627</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="19" to="21" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PTKANT</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.594</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="22" to="26" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.KOUS</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.165</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="27" to="30" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PPER</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.455</f>
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="31" to="36" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.4470</f>
+ <f name="n">5</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="37" to="40" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADV</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.342</f>
+ <f name="n">6</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="41" to="43" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.APPR</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.69</f>
+ <f name="n">7</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="44" to="49" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PDS</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.11545</f>
+ <f name="n">8</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="50" to="63" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.20665</f>
+ <f name="n">9</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="64" to="73" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.VVFIN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.2620</f>
+ <f name="n">10</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="74" to="85" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.VVFIN</f>
+ <f name="n">11</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="86" to="89" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.PPER</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.455</f>
+ <f name="n">12</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="90" to="94" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADV</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.114</f>
+ <f name="n">13</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s21" from="95" to="97" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.APPRART</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.16</f>
+ <f name="n">14</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s22" from="98" to="106" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.ADJA</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.1000</f>
+ <f name="n">15</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s23" from="107" to="113" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS.NN</f>
+ <f name="lemmaRef">#CMC.TSK.2014.09.Lemmata.20760</f>
+ <f name="n">16</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s24" from="113" to="114" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS._ENDE</f>
+ <f name="n">17</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s25" from="114" to="114" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#CMC.TSK.2014.09.POS._ENDE</f>
+ <f name="n">18</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/2014-09/header.xml b/t/real/sgbr/CMC-TSK/2014-09/header.xml
new file mode 100644
index 0000000..659f987
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/2014-09/header.xml
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<dokumentSigle>CMC-TSK_2014-09</dokumentSigle>
+ <fileDesc>
+ <titleStmt>
+ <title level="u" type="main">Korpus zur Beobachtung des Schreibgebrauchs im Deutschen</title>
+ <title level="u" type="sub">Subkorpus Internettexte</title>
+ <title level="u" type="sub">Subkorpus Leserkommentare Tagesschau</title>
+ <title level="u" type="sub">Subkorpus September 2014</title>
+ <title level="u" type="sub">Subkorpus Beispielauszug</title>
+ <funder>
+ <orgName>Bundesministerium für Bildung und Forschung</orgName>
+ </funder>
+ <editor n="1" xml:id="CMC.TSK.2014.09.Bearbeiter.1">
+ <persName>Peter M. Fischer</persName>
+ <orgName>Institut für Deutsche Sprache, Mannheim</orgName>
+ </editor>
+ <editor n="2" xml:id="CMC.TSK.2014.09.Bearbeiter.2">
+ <persName>Jakob Prange</persName>
+ <orgName>Saarland University</orgName>
+ </editor>
+ </titleStmt>
+ <publicationStmt>
+ <p>Dieses Subkorpus wurde als solches nicht veröffentlicht.</p>
+ </publicationStmt>
+ <sourceDesc>
+ <p>Die hier erfassten Texte sind Leserkommentare auf der Nachrichtenseite der Tagesschau <ref target="http://www.tagesschau.de/"/>. Die Seite veröffentlicht Nachrichtenartikel, die eine kurze Zeit lang von Lesern kommentiert werden können. Der Leser muss hierzu vorher ein kostenloses Konto eingerichtet haben und sich dann auf der Seite anmelden.</p>
+ </sourceDesc>
+ </fileDesc>
+ <language ident="de">Deutsch</language>
+ <projectDesc>
+ <p>
+ Dieses Subkorpus wurde vom und für das Gemeinschaftsprojekt <name>Analyse und Instrumentarien zur Beobachtung des Schreibgebrauchs im Deutschen</name> (<ref target="http://www.schreibgebrauch.org/">http://www.schreibgebrauch.org/</ref>) zusammengestellt. Das Projektkonsortium besteht aus folgenden Partnern:
+ <list>
+ <item>
+ <orgName>Institut für Deutsche Sprache, Mannheim</orgName>
+ </item>
+ <item>
+ <orgName>Institut für Computerlinguistik, Universität des Saarlandes, Saarbrücken</orgName>
+ </item>
+ <item>
+ <orgName>Bibliographisches Institut GmbH (Dudenverlag), Berlin</orgName>
+ </item>
+ <item>
+ <orgName>Wahrig bei Brockhaus, Gütersloh</orgName>
+ </item>
+ </list>
+ </p>
+ </projectDesc>
+ <editorialDecl xml:id="CMC.TSK.2014.09.Metadaten.Autoren">
+ <interpretation>
+ <p>Dieses Korpus beinhaltet Metadaten zu den Autoren der Texte. Eine Liste aller Autoren, die in diesem Subkorpus kommentiert haben, wurde in der Sektion <gi corresp="#CMC.TSK.2014.09.Autoren">particDesc</gi> zusammengestellt. Jeder Autor ist dabei als <tag scheme="TEI">person</tag> kodiert und führt im Unterelement <tag scheme="TEI">persName</tag> sein oder ihr verwendetes Pseudonym. Aus Gründen der Anonymitätswahrung wurden keine weiteren personenbezogenen Metadaten in das Korpus aufgenommen. Die Zuordnung der Autoren zu ihren Texten regeln die Metadaten der Texte.</p>
+ </interpretation>
+ </editorialDecl>
+ <particDesc xml:id="CMC.TSK.2014.09.Autoren">
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="206" xml:id="CMC.TSK.2014.09.Autoren.206">
+ <persName type="pseudo">weltoffen</persName>
+ </person>
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="587" xml:id="CMC.TSK.2014.09.Autoren.587">
+ <persName type="pseudo">privat23</persName>
+ </person>
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="927" xml:id="CMC.TSK.2014.09.Autoren.927">
+ <persName type="pseudo">Koelle_am_Rhing</persName>
+ </person>
+ <person ana="#CMC.TSK.2014.09.Metadaten.Autoren" n="1043" xml:id="CMC.TSK.2014.09.Autoren.1043">
+ <persName type="pseudo">fitnessfrosch</persName>
+ </person>
+ </particDesc>
+ <interpretation xml:id="CMC.TSK.2014.09.Kodex">
+ <p>Dieses Korpus beinhaltet Metadaten zu den Kommentartexten wie auch zu den Nachrichtenartikeln, in dessen Kontext sie verfasst wurden. Die folgende Sektion <gi corresp="#CMC.TSK.2014.09.Texte">editorialDecl</gi> stellt eine strukturelle Übersicht aller Nachrichtenartikel und ihrer Kommentartexte zusammen. Jeder als <tag scheme="TEI">item</tag> realisierte Listeneintrag repräsentiert dabei einen Nachrichtenartikel, der neben einem vollständig strukturierten bibliographischen Unterblock mit Titel und Veröffentlichung auch jeweils als <tag scheme="TEI">desc</tag> kodiert einen kurzen Einleitungstext sowie Schlagwörter zum Inhalt (<tag scheme="TEI">name</tag>) und der geografischen Verortung (<tag scheme="TEI">geogName</tag>) führt. Darunter folgt eine Liste aller zu diesem Nachrichtenartikel verfasster Kommentartexte. Hier repräsentiert jeder als <tag scheme="TEI">item</tag> realisierte Listeneintrag einen Kommentartext, der ebenso einen vollständig strukturierten bibliographischen Unterblock mit Titel, Veröffentlichung und Autorenreferenz führt.</p>
+ </interpretation>
+ <segmentation xml:id="CMC.TSK.2014.09.Token">
+ <p>Die Texte in diesem Korpus wurden mit der Software <ref target="https://github.com/DFKI-MLT/JTok">jTok</ref> tokenisiert. Im Ergebnis wurden Sätze mit <tag scheme="TEI">s</tag>, Wörter mit <tag scheme="TEI">w</tag> und Satzpunktuation mit <tag scheme="TEI">c</tag> ausgezeichnet. Ferner wurden auf Textebene absatzübergreifend alle Sätze und auf Satzebene durchgängig alle Wörter und jede Satzpunktuation im jeweiligen Attribut <att scheme="TEI">n</att> und bei 1 beginnend durchgezählt.</p>
+ </segmentation>
+ <interpretation xml:id="CMC.TSK.2014.09.POS">
+ <p>Dieses Korpus verwendet ein Wortart-Tagging nach dem Stuttgart-Tübingen-Tagset (<ref target="http://www.ims.uni-stuttgart.de/forschung/ressourcen/lexika/TagSets/stts-table.html">STTS</ref>) und dessen Erweiterung für das Genre der internetbasierten Kommunikation (<ref target="http://opus.bsz-bw.de/ubhi/volltexte/2014/279/pdf/p027.pdf">STTS 2.0</ref>). Dabei wurden drei ursprüngliche Tags ("$.", "$," und "$(") in ("_ENDE", "_KOMMA" und "_SONST") umbennant, um den Validitätsrichtlinien für XML-Identifikatoren (beschrieben in der <ref target="http://www.w3.org/TR/2006/REC-xml-20060816/">W3C-Empfehlung für XML 1.0 vom 16.08.2006</ref>) zu genügen.</p>
+ <ab>
+ <interpGrp type="annotation">
+ <interp xml:id="CMC.TSK.2014.09.POS.ADJA">attributives Adjektiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ADJD">adverbiales oder prädikatives Adjektiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ADR">Adressierung</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ADV">Adverb</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APPO">Postposition</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APPR">Präposition; Zirkumposition links</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APPRART">Präposition mit Artikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.APZR">Zirkumposition rechts</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ART">bestimmter oder unbestimmter Artikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.AW">Aktionswort</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.AWIND">Aktionswort-Indikator</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.CARD">Kardinalzahl</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.EMOASC">Emoticon, als Zeichenfolge dargestellt (Typ "ASCII")</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ERRAW">falsch abgetrennter Wortteil</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ERRTOK">Tokenisierungsfehler</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.FM">Fremdsprachliches Material</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.HST">Hashtag</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ITJ">Interjektion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KOKOM">Vergleichskonjunktion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KON">nebenordnende Konjunktion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KOUI">unterordnende Konjunktion mit "zu" und Infinitiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.KOUS">unterordnende Konjunktion mit Satz</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.NE">Eigennamen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.NN">normales Nomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.ONO">Onomatopoetikon</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PDAT">attribuierendes Demonstrativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PDS">substituierendes Demonstrativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PIAT">attribuierendes Indefinitpronomen ohne Determiner</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PIS">substituierendes Indefinitpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PPER">irreflexives Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PPOSAT">attribuierendes Possessivpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PPOSS">substituierendes Possessivpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PRELAT">attribuierendes Relativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PRELS">substituierendes Relativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PRF">reflexives Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PROAV">Pronominaladverb</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKA">Partikel bei Adjektiv oder Adverb</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKANT">Antwortpartikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKNEG">Negationspartikel</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKVZ">abgetrennter Verbzusatz</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PTKZU">"zu" vor Infinitiv</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PWAT">attribuierendes Interrogativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PWAV">adverbiales Interrogativ- oder Relativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.PWS">substituierendes Interrogativpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.TRUNC">Kompositions-Erstglied</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.URL">Uniform Resource Locator</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAFIN">finites Verb, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAIMP">Imperativ, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAINF">Infinitiv, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAPP">Partizip Perfekt, aux</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VAPPER">Verb, aux mit Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VMFIN">finites Verb, modal</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VMINF">Infinitiv, modal</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VMPP">Partizip Perfekt, modal</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVFIN">finites Verb, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVIMP">Imperativ, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVINF">Infinitiv, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVIZU">Infinitiv mit "zu", voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVPP">Partizip Perfekt, voll</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.VVPPER">Verb, voll mit Personalpronomen</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS.XY">Nichtwort, Sonderzeichen enthaltend</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS._ENDE">Satzbeendende Interpunktion</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS._KOMMA">Komma</interp>
+ <interp xml:id="CMC.TSK.2014.09.POS._SONST">sonstige Satzzeichen; satzintern</interp>
+ </interpGrp>
+ </ab>
+ </interpretation>
+</teiHeader>
\ No newline at end of file
diff --git a/t/real/sgbr/CMC-TSK/header.xml b/t/real/sgbr/CMC-TSK/header.xml
new file mode 100644
index 0000000..c2658d5
--- /dev/null
+++ b/t/real/sgbr/CMC-TSK/header.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<korpusSigle>CMC-TSK</korpusSigle>
+</teiHeader>
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/sentences.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/sentences.xml
new file mode 100644
index 0000000..d3fe3b5
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/sentences.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="PRO-DUD_BSP-2013-01.32" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="0" to="19" />
+ </spanList>
+</layer>
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/tokens_aggr.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/tokens_aggr.xml
new file mode 100644
index 0000000..aa5be2d
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/tokens_aggr.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="PRO-DUD_BSP-2013-01.32" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="6" />
+ <span id="t_1" from="7" to="10" />
+ <span id="t_2" from="11" to="14" />
+ <span id="t_3" from="15" to="19" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/tokens_conservative.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/tokens_conservative.xml
new file mode 100644
index 0000000..aa5be2d
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/base/tokens_conservative.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="PRO-DUD_BSP-2013-01.32" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="6" />
+ <span id="t_1" from="7" to="10" />
+ <span id="t_2" from="11" to="14" />
+ <span id="t_3" from="15" to="19" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/data.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/data.xml
new file mode 100644
index 0000000..e38121e
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="PRO-DUD_BSP-2013-01.32" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>Selbst ist der Jeck</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/header.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/header.xml
new file mode 100644
index 0000000..2080865
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/header.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<textSigle>PRO-DUD_BSP-2013-01.32</textSigle>
+<item ana="#PRO.DUD.BSP.2013.01.Kodex" n="32" rend="T" xml:id="PRO.DUD.BSP.2013.01.Texte.32">
+<biblFull>
+<titleStmt>
+<title>Nur Platt, kein Deutsch</title>
+<author ref="#PRO.DUD.BSP.2013.01.Autoren.1"/>
+</titleStmt>
+<publicationStmt>
+<publisher>Dorfblatt GmbH</publisher>
+<pubPlace>Stadtingen</pubPlace>
+<date>2013-01-26</date>
+</publicationStmt>
+</biblFull>
+</item>
+</teiHeader>
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/ana.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/ana.xml
new file mode 100644
index 0000000..ad84d3f
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/ana.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="PRO-DUD_BSP-2013-01.32" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="6">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Selbst</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="7" to="10">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ist</f>
+ <f name="ctag">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="11" to="14">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="ctag">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="15" to="19">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Jeck</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/lemma.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/lemma.xml
new file mode 100644
index 0000000..e95296f
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/sgbr/lemma.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="PRO-DUD_BSP-2013-01.32" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="6">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Selbst</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="7" to="10">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">sein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="11" to="14">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">d_art</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="15" to="19">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Jeck</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/32/struct/structure.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/struct/structure.xml
new file mode 100644
index 0000000..4926974
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/32/struct/structure.xml
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="PRO-DUD_BSP-2013-01.32" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s0" from="0" to="18" l="1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">text</f>
+ </fs>
+ </span>
+ <span id="s1" from="0" to="18" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">div</f>
+ </fs>
+ </span>
+ <span id="s2" from="0" to="18" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">head</f>
+ </fs>
+ </span>
+ <span id="s3" from="0" to="18" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="0" to="6" l="5">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#PRO.DUD.BSP.2013.01.POS.NE</f>
+ <f name="lemmaRef">#PRO.DUD.BSP.2013.01.Lemmata.3773</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="7" to="10" l="5">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#PRO.DUD.BSP.2013.01.POS.VVFIN</f>
+ <f name="lemmaRef">#PRO.DUD.BSP.2013.01.Lemmata.2</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="11" to="14" l="5">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#PRO.DUD.BSP.2013.01.POS.ART</f>
+ <f name="lemmaRef">#PRO.DUD.BSP.2013.01.Lemmata.3</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="15" to="18" l="5">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#PRO.DUD.BSP.2013.01.POS.NE</f>
+ <f name="lemmaRef">#PRO.DUD.BSP.2013.01.Lemmata.227</f>
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="18" to="18" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">div</f>
+ </fs>
+ </span>
+ <span id="s9" from="18" to="18" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">p</f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/PRO-DUD/BSP-2013-01/header.xml b/t/real/sgbr/PRO-DUD/BSP-2013-01/header.xml
new file mode 100644
index 0000000..a0b5092
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/BSP-2013-01/header.xml
@@ -0,0 +1,139 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<dokumentSigle>PRO-DUD_BSP-2013-01</dokumentSigle>
+ <fileDesc>
+ <titleStmt>
+ <title level="u" type="main">Korpus zur Beobachtung des Schreibgebrauchs im Deutschen</title>
+ <title level="u" type="sub">Subkorpus Ortsblatt</title>
+ <title level="u" type="sub">Jahrgang 2013</title>
+ <title level="u" type="sub">Monat Januar</title>
+ <funder>
+ <orgName>Bundesministerium für Bildung und Forschung</orgName>
+ </funder>
+ <editor n="1" xml:id="PRO.DUD.BSP.2013.01.Bearbeiter.1">
+ <persName>Thorsten Frank</persName>
+ <orgName>Bibliographisches Institut GmbH, Berlin</orgName>
+ </editor>
+ </titleStmt>
+ <publicationStmt>
+ <p>Dieses Subkorpus wurde als solches nicht veröffentlicht.</p>
+ </publicationStmt>
+ <sourceDesc>
+ <biblFull>
+ <titleStmt>
+ <title level="a">Kölner Stadt-Anzeiger</title>
+ </titleStmt>
+ <publicationStmt>
+ <publisher>Dorfmedien GmbH</publisher>
+ <pubPlace>Stadtingen</pubPlace>
+ <date>Januar 2013</date>
+ </publicationStmt>
+ </biblFull>
+ </sourceDesc>
+ </fileDesc>
+ <language ident="de">Deutsch</language>
+ <projectDesc>
+ <p>
+ Dieses Subkorpus wurde vom und für das Gemeinschaftsprojekt <name>Analyse und Instrumentarien zur Beobachtung des Schreibgebrauchs im Deutschen</name> (<ref target="http://www.schreibgebrauch.org/">http://www.schreibgebrauch.org/</ref> ) zusammengestellt. Das Projektkonsortium besteht aus folgenden Partnern:
+ <list><item><orgName>Institut für Deutsche Sprache, Mannheim</orgName></item><item><orgName>Institut für Computerlinguistik, Universität des Saarlandes, Saarbrücken</orgName></item><item><orgName>Bibliographisches Institut GmbH (Dudenverlag), Berlin</orgName></item><item><orgName>Wahrig bei Brockhaus, Gütersloh</orgName></item></list>
+ </p>
+ </projectDesc>
+ <particDesc xml:id="PRO.DUD.BSP.2013.01.Autoren">
+ <person n="1" xml:id="PRO.DUD.BSP.2013.01.Autoren.1">
+ <persName>unbekannt</persName>
+ </person>
+ </particDesc>
+ <interpretation xml:id="PRO.DUD.BSP.2013.01.Kodex">
+ <p>Dieses Korpus beinhaltet Metadaten zu den Texten. Eine Liste aller Texte in diesem Korpus ist in der folgenden Sektion <gi corresp="#PRO.DUD.BSP.2013.01.Texte">editorialDecl</gi> zusammengestellt. Jeder als <tag scheme="TEI">item</tag> realisierter Listeneintrag repräsentiert dabei einen Zeitungsartikel und führt neben einem vollständig strukturierten bibliographischen Unterblock mit Titel und Autorenreferenz auch die Angabe ihrer Verschriftlichungsart im Attribut (<att scheme="TEI">rend</att>). Dieses Attribut ist innerhalb der Korpora mit Texten professioneller Schreiber wohl immer "T" für maschinell verfasste Texte, wird aber hier trotzdem aufgenommen zur besseren Suchbarkeit im Gesamtkorpus. Das Attribut ist wie folgt kodiert:</p>
+ <ab>
+ <elementSpec ident="item">
+ <attList>
+ <attDef ident="rend">
+ <valList>
+ <valItem ident="M">
+ <desc>Manuskript, handgeschriebene Texte</desc>
+ </valItem>
+ <valItem ident="T">
+ <desc>Typoskript, maschinell verfasste Texte</desc>
+ </valItem>
+ <valItem ident="H">
+ <desc>Hybrid, Texte mit sowohl handgeschriebenem als auch maschinell verfasstem Anteil</desc>
+ </valItem>
+ <valItem ident="X">
+ <desc>Texte, deren Verschriftlichungsart unbekannt ist</desc>
+ </valItem>
+ </valList>
+ </attDef>
+ </attList>
+ </elementSpec>
+ </ab>
+ </interpretation>
+ <segmentation xml:id="PRO.DUD.BSP.2013.01.Token">
+ <p>Die Texte in diesem Korpus wurden mit der Software Duden Morphological Engine (DME) tokenisiert. Im Ergebnis wurden Sätze mit <tag scheme="TEI">s</tag>, Wörter mit <tag scheme="TEI">w</tag> und Satzpunktuation mit <tag scheme="TEI">c</tag> ausgezeichnet. Ferner wurden auf Textebene absatzübergreifend alle Sätze und auf Satzebene durchgängig alle Wörter und jede Satzpunktuation im jeweiligen Attribut <att scheme="TEI">n</att> und bei 1 beginnend durchgezählt.</p>
+ </segmentation>
+ <interpretation xml:id="PRO.DUD.BSP.2013.01.POS">
+ <p>Die Texte in diesem Korpus wurden mit der Software Duden Morphological Engine (DME) annotiert. Die Wortartinformationen wurden dann von <respons locus="value" resp="#PRO.DUD.BSP.2013.01.Bearbeiter.1"><desc>Thorsten Frank</desc></respons> in ein Wortart-Tagging nach dem Stuttgart-Tübingen-Tagset (<ref target="http://www.ims.uni-stuttgart.de/forschung/ressourcen/lexika/TagSets/stts-table.html">STTS</ref>) überführt. Dabei wurden drei ursprüngliche Tags ("$.", "$," und "$(") in ("_ENDE", "_KOMMA" und "_SONST") umbenannt, um den Validitätsrichtlinien für XML-Identifikatoren (beschrieben in der <ref target="http://www.w3.org/TR/2006/REC-xml-20060816/">W3C-Empfehlung für XML 1.0 vom 16.08.2006</ref>) zu genügen.</p>
+ <ab>
+ <interpGrp type="annotation">
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.ADJA">attributives Adjektiv</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.ADJD">adverbiales oder prädikatives Adjektiv</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.ADV">Adverb</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.APPO">Postposition</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.APPR">Präposition; Zirkumposition links</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.APPRART">Präposition mit Artikel</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.APZR">Zirkumposition rechts</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.ART">bestimmter oder unbestimmter Artikel</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.CARD">Kardinalzahl</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.FM">Fremdsprachliches Material</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.ITJ">Interjektion</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.KOKOM">Vergleichskonjunktion</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.KON">nebenordnende Konjunktion</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.KOUI">unterordnende Konjunktion mit "zu" und Infinitiv</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.KOUS">unterordnende Konjunktion mit Satz</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.NE">normales Nomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.NN">Eigennamen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.NNE">normales Nomen mit Eigennamen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PAV">Pronominaladverb</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PDAT">attribuierendes Demonstrativpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PDS">substituierendes Demonstrativpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PIAT">attribuierendes Indefinitpronomen ohne Determiner</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PIDAT">attribuierendes Indefinitpronomen mit Determiner</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PIS">substituierendes Indefinitpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PPER">irreflexives Personalpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PPOSAT">attribuierendes Possessivpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PPOSS">substituierendes Possessivpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PRELAT">attribuierendes Relativpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PRELS">substituierendes Relativpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PRF">reflexives Personalpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PROAV">Pronominaladverb</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PTKA">Partikel bei Adjektiv oder Adverb</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PTKANT">Antwortpartikel</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PTKNEG">Negationspartikel</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PTKVZ">abgetrennter Verbzusatz</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PTKZU">"zu" vor Infinitiv</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PWAT">attribuierendes Interrogativpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PWAV">adverbiales Interrogativ- oder Relativpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.PWS">substituierendes Interrogativpronomen</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.TRUNC">Kompositions-Erstglied</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VAFIN">finites Verb, aux</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VAIMP">Imperativ, aux</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VAINF">Infinitiv, aux</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VAPP">Partizip Perfekt, aux</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VMFIN">finites Verb, modal</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VMINF">Infinitiv, modal</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VMPP">Partizip Perfekt, modal</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VVFIN">finites Verb, voll</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VVIMP">Imperativ, voll</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VVINF">Infinitiv, voll</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VVIZU">Infinitiv mit "zu", voll</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.VVPP">Partizip Perfekt, voll</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS.XY">Nichtwort, Sonderzeichen enthaltend</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS._KOMMA">Komma</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS._SONST">sonstige Satzzeichen; satzintern</interp>
+ <interp xml:id="PRO.DUD.BSP.2013.01.POS._ENDE">Satzbeendende Interpunktion</interp>
+ </interpGrp>
+ </ab>
+ </interpretation>
+</teiHeader>
diff --git a/t/real/sgbr/PRO-DUD/header.xml b/t/real/sgbr/PRO-DUD/header.xml
new file mode 100644
index 0000000..1ceb692
--- /dev/null
+++ b/t/real/sgbr/PRO-DUD/header.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<korpusSigle>PRO-DUD</korpusSigle>
+</teiHeader>
diff --git a/t/real/sgbr/TEST/BSP/1/base/paragraph.xml b/t/real/sgbr/TEST/BSP/1/base/paragraph.xml
new file mode 100644
index 0000000..28fa1c7
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/base/paragraph.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="19" to="364" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/base/sentences.xml b/t/real/sgbr/TEST/BSP/1/base/sentences.xml
new file mode 100644
index 0000000..38c4752
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/base/sentences.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span from="0" to="18" />
+ <span from="19" to="52" />
+ <span from="53" to="305" />
+ <span from="306" to="326" />
+ <span from="327" to="364" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/base/tokens_aggr.xml b/t/real/sgbr/TEST/BSP/1/base/tokens_aggr.xml
new file mode 100644
index 0000000..4a8ad39
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/base/tokens_aggr.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="18" />
+ <span id="t_1" from="19" to="21" />
+ <span id="t_2" from="22" to="25" />
+ <span id="t_3" from="26" to="32" />
+ <span id="t_4" from="33" to="36" />
+ <span id="t_5" from="37" to="44" />
+ <span id="t_6" from="45" to="51" />
+ <span id="t_7" from="51" to="52" />
+ <span id="t_8" from="53" to="55" />
+ <span id="t_9" from="56" to="62" />
+ <span id="t_10" from="63" to="70" />
+ <span id="t_11" from="71" to="74" />
+ <span id="t_12" from="75" to="77" />
+ <span id="t_13" from="78" to="81" />
+ <span id="t_14" from="82" to="86" />
+ <span id="t_15" from="87" to="96" />
+ <span id="t_16" from="96" to="97" />
+ <span id="t_17" from="98" to="101" />
+ <span id="t_18" from="102" to="106" />
+ <span id="t_19" from="107" to="115" />
+ <span id="t_20" from="115" to="116" />
+ <span id="t_21" from="117" to="120" />
+ <span id="t_22" from="121" to="138" />
+ <span id="t_23" from="139" to="143" />
+ <span id="t_24" from="143" to="144" />
+ <span id="t_25" from="145" to="148" />
+ <span id="t_26" from="149" to="161" />
+ <span id="t_27" from="162" to="167" />
+ <span id="t_28" from="168" to="171" />
+ <span id="t_29" from="172" to="175" />
+ <span id="t_30" from="176" to="182" />
+ <span id="t_31" from="183" to="190" />
+ <span id="t_32" from="191" to="200" />
+ <span id="t_33" from="201" to="205" />
+ <span id="t_34" from="206" to="209" />
+ <span id="t_35" from="210" to="215" />
+ <span id="t_36" from="215" to="216" />
+ <span id="t_37" from="217" to="220" />
+ <span id="t_38" from="221" to="226" />
+ <span id="t_39" from="227" to="230" />
+ <span id="t_40" from="231" to="234" />
+ <span id="t_41" from="235" to="239" />
+ <span id="t_42" from="239" to="240" />
+ <span id="t_43" from="241" to="244" />
+ <span id="t_44" from="245" to="252" />
+ <span id="t_45" from="253" to="260" />
+ <span id="t_46" from="260" to="261" />
+ <span id="t_47" from="262" to="267" />
+ <span id="t_48" from="268" to="270" />
+ <span id="t_49" from="271" to="280" />
+ <span id="t_50" from="280" to="281" />
+ <span id="t_51" from="281" to="284" />
+ <span id="t_52" from="285" to="297" />
+ <span id="t_53" from="298" to="304" />
+ <span id="t_54" from="304" to="305" />
+ <span id="t_55" from="306" to="308" />
+ <span id="t_56" from="309" to="312" />
+ <span id="t_57" from="313" to="325" />
+ <span id="t_58" from="325" to="326" />
+ <span id="t_59" from="327" to="328" />
+ <span id="t_60" from="328" to="333" />
+ <span id="t_61" from="334" to="337" />
+ <span id="t_62" from="338" to="348" />
+ <span id="t_63" from="348" to="349" />
+ <span id="t_64" from="349" to="350" />
+ <span id="t_65" from="351" to="358" />
+ <span id="t_66" from="359" to="364" />
+ <span id="t_67" from="364" to="365" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/base/tokens_conservative.xml b/t/real/sgbr/TEST/BSP/1/base/tokens_conservative.xml
new file mode 100644
index 0000000..9038686
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/base/tokens_conservative.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="18" />
+ <span id="t_1" from="19" to="21" />
+ <span id="t_2" from="22" to="25" />
+ <span id="t_3" from="26" to="32" />
+ <span id="t_4" from="33" to="36" />
+ <span id="t_5" from="37" to="44" />
+ <span id="t_6" from="45" to="51" />
+ <span id="t_7" from="51" to="52" />
+ <span id="t_8" from="53" to="55" />
+ <span id="t_9" from="56" to="62" />
+ <span id="t_10" from="63" to="70" />
+ <span id="t_11" from="71" to="74" />
+ <span id="t_12" from="75" to="77" />
+ <span id="t_13" from="78" to="81" />
+ <span id="t_14" from="82" to="86" />
+ <span id="t_15" from="87" to="96" />
+ <span id="t_16" from="96" to="97" />
+ <span id="t_17" from="98" to="101" />
+ <span id="t_18" from="102" to="106" />
+ <span id="t_19" from="107" to="115" />
+ <span id="t_20" from="115" to="116" />
+ <span id="t_21" from="117" to="120" />
+ <span id="t_22" from="121" to="138" />
+ <span id="t_23" from="139" to="143" />
+ <span id="t_24" from="143" to="144" />
+ <span id="t_25" from="145" to="148" />
+ <span id="t_26" from="149" to="161" />
+ <span id="t_27" from="162" to="167" />
+ <span id="t_28" from="168" to="171" />
+ <span id="t_29" from="172" to="175" />
+ <span id="t_30" from="176" to="182" />
+ <span id="t_31" from="183" to="190" />
+ <span id="t_32" from="191" to="200" />
+ <span id="t_33" from="201" to="205" />
+ <span id="t_34" from="206" to="209" />
+ <span id="t_35" from="210" to="215" />
+ <span id="t_36" from="215" to="216" />
+ <span id="t_37" from="217" to="220" />
+ <span id="t_38" from="221" to="226" />
+ <span id="t_39" from="227" to="230" />
+ <span id="t_40" from="231" to="234" />
+ <span id="t_41" from="235" to="239" />
+ <span id="t_42" from="239" to="240" />
+ <span id="t_43" from="241" to="244" />
+ <span id="t_44" from="245" to="252" />
+ <span id="t_45" from="253" to="260" />
+ <span id="t_46" from="260" to="261" />
+ <span id="t_47" from="262" to="267" />
+ <span id="t_48" from="268" to="270" />
+ <span id="t_49" from="271" to="284" />
+ <span id="t_50" from="285" to="297" />
+ <span id="t_51" from="298" to="304" />
+ <span id="t_52" from="304" to="305" />
+ <span id="t_53" from="306" to="308" />
+ <span id="t_54" from="309" to="312" />
+ <span id="t_55" from="313" to="325" />
+ <span id="t_56" from="325" to="326" />
+ <span id="t_57" from="328" to="333" />
+ <span id="t_58" from="334" to="337" />
+ <span id="t_59" from="338" to="348" />
+ <span id="t_60" from="348" to="349" />
+ <span id="t_61" from="349" to="350" />
+ <span id="t_62" from="351" to="358" />
+ <span id="t_63" from="359" to="364" />
+ <span id="t_64" from="364" to="365" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/data.xml b/t/real/sgbr/TEST/BSP/1/data.xml
new file mode 100644
index 0000000..d7b31a0
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>Sommerüberraschung Es war einmal die Familie Patzig. In dieser Familie gab es die Mama Elisabeth, den Papa Guenther, die fuenfzehnjaehrige Lena, den neunjaerigen Kevin und die beiden kleinen Zwillinge Lion und Tayla, Oma Luise und ihr Mann, Opa Hermann Mueller, nicht zu vergessen:ihr Familienhund Barker. Es ist Sommeranfang! "Heute ist Wochenende", schreit Kevin.</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/header.xml b/t/real/sgbr/TEST/BSP/1/header.xml
new file mode 100644
index 0000000..7d82c8d
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/header.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<textSigle>TEST_BSP.1</textSigle>
+<item ana="#TEST.BSP.Kodex" n="1" rend="M" xml:id="TEST.BSP.Texte.1">
+<biblFull>
+<titleStmt>
+<title>Sommerüberraschung</title>
+<author ref="#TEST.BSP.Autoren.1"/>
+</titleStmt>
+<publicationStmt>
+<p>Dieser Text ist in Wirklichkeit nicht vorhanden.</p>
+</publicationStmt>
+</biblFull>
+</item>
+</teiHeader>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/sgbr/ana.xml b/t/real/sgbr/TEST/BSP/1/sgbr/ana.xml
new file mode 100644
index 0000000..9a2e798
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/sgbr/ana.xml
@@ -0,0 +1,687 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="18">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sommerüberraschung</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="19" to="21">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Es</f>
+ <f name="ctag">PPER</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="22" to="25">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">war</f>
+ <f name="ctag">VAFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="26" to="32">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">einmal</f>
+ <f name="ctag">ADV</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="33" to="36">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="ctag">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="37" to="44">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Familie</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="45" to="51">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Patzig</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="51" to="52">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">.</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="53" to="55">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">In</f>
+ <f name="ctag">APPR</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="56" to="62">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">dieser</f>
+ <f name="ctag">PDAT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="63" to="70">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Familie</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="71" to="74">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">gab</f>
+ <f name="ctag">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="75" to="77">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">es</f>
+ <f name="ctag">PPER</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="78" to="81">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="ctag">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="82" to="86">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Mama</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="87" to="96">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Elisabeth</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="96" to="97">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="ctag">_KOMMA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="98" to="101">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">den</f>
+ <f name="ctag">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="102" to="106">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Papa</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="107" to="115">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Guenther</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s21" from="115" to="116">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="ctag">_KOMMA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s22" from="117" to="120">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="ctag">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s23" from="121" to="138">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">fuenfzehnjaehrige</f>
+ <f name="ctag">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s24" from="139" to="143">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Lena</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s25" from="143" to="144">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="ctag">_KOMMA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s26" from="145" to="148">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">den</f>
+ <f name="ctag">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s27" from="149" to="161">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">neunjaerigen</f>
+ <f name="ctag">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s28" from="162" to="167">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kevin</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s29" from="168" to="171">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">und</f>
+ <f name="ctag">KON</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s30" from="172" to="175">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="ctag">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s31" from="176" to="182">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">beiden</f>
+ <f name="ctag">PIAT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s32" from="183" to="190">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">kleinen</f>
+ <f name="ctag">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s33" from="191" to="200">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Zwillinge</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s34" from="201" to="205">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Lion</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s35" from="206" to="209">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">und</f>
+ <f name="ctag">KON</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s36" from="210" to="215">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Tayla</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s37" from="215" to="216">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="ctag">_KOMMA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s38" from="217" to="220">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Oma</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s39" from="221" to="226">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Luise</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s40" from="227" to="230">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">und</f>
+ <f name="ctag">KON</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s41" from="231" to="234">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ihr</f>
+ <f name="ctag">PPOSAT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s42" from="235" to="239">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Mann</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s43" from="239" to="240">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="ctag">_KOMMA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s44" from="241" to="244">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Opa</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s45" from="245" to="252">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Hermann</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s46" from="253" to="260">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Mueller</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s47" from="260" to="261">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="ctag">_KOMMA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s48" from="262" to="267">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">nicht</f>
+ <f name="ctag">PTKNEG</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s49" from="268" to="270">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">zu</f>
+ <f name="ctag">PTKZU</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s50" from="271" to="280">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">vergessen</f>
+ <f name="ctag">VVINF</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s51" from="280" to="281">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">:</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s52" from="281" to="284">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ihr</f>
+ <f name="ctag">PPOSAT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s53" from="285" to="297">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Familienhund</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s54" from="298" to="304">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Barker</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s55" from="304" to="305">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">.</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s56" from="306" to="308">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Es</f>
+ <f name="ctag">PPER</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s57" from="309" to="312">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ist</f>
+ <f name="ctag">VAFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s58" from="313" to="325">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sommeranfang</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s59" from="325" to="326">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">!</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s60" from="327" to="328">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">"</f>
+ <f name="ctag">_SONST</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s61" from="328" to="333">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Heute</f>
+ <f name="ctag">ADV</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s62" from="334" to="337">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ist</f>
+ <f name="ctag">VAFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s63" from="338" to="348">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Wochenende</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s64" from="348" to="349">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">"</f>
+ <f name="ctag">_SONST</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s65" from="349" to="350">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="ctag">_KOMMA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s66" from="351" to="358">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schreit</f>
+ <f name="ctag">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s67" from="359" to="364">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kevin</f>
+ <f name="ctag">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s68" from="364" to="365">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">.</f>
+ <f name="ctag">_ENDE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/sgbr/lemma.xml b/t/real/sgbr/TEST/BSP/1/sgbr/lemma.xml
new file mode 100644
index 0000000..fbb28d0
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/sgbr/lemma.xml
@@ -0,0 +1,494 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s1" from="0" to="18">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sommerüberraschung</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="19" to="21">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">es</f>
+ <f name="lemma">er</f>
+ <f name="lemma">sie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="22" to="25">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">sein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="26" to="32">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">einmal</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="33" to="36">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="lemma">der</f>
+ <f name="lemma">das</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="37" to="44">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Familie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="53" to="55">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">in</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="56" to="62">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">diese</f>
+ <f name="lemma">dies</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="63" to="70">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Familie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="71" to="74">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">geben</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="75" to="77">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">es</f>
+ <f name="lemma">er</f>
+ <f name="lemma">sie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="78" to="81">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="lemma">der</f>
+ <f name="lemma">das</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="82" to="86">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Mama</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="87" to="96">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Elisabeth</f>
+ <f name="lemma">Elisabet</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="98" to="101">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="lemma">die</f>
+ <f name="lemma">das</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="102" to="106">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Papa</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="107" to="115">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Günther</f>
+ <f name="lemma">Günter</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="117" to="120">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="lemma">der</f>
+ <f name="lemma">das</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="139" to="143">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Lena</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="145" to="148">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="lemma">die</f>
+ <f name="lemma">das</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s21" from="162" to="167">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kevin</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s22" from="168" to="171">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">und</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s23" from="172" to="175">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="lemma">der</f>
+ <f name="lemma">das</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s24" from="176" to="182">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">beide</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s25" from="183" to="190">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">klein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s26" from="191" to="200">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Zwilling</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s27" from="201" to="205">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Lion</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s28" from="206" to="209">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">und</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s29" from="210" to="215">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Tayla</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s30" from="217" to="220">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Oma</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s31" from="221" to="226">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Luise</f>
+ <f name="lemma">Luisa</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s32" from="227" to="230">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">und</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s33" from="231" to="234">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ihr</f>
+ <f name="lemma">sein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s34" from="235" to="239">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Mann</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s35" from="241" to="244">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Opa</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s36" from="245" to="252">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Hermann</f>
+ <f name="lemma">Herman</f>
+ <f name="lemma">Herrman</f>
+ <f name="lemma">Herrmann</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s37" from="253" to="260">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Müller</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s38" from="262" to="267">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">nicht</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s39" from="268" to="270">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">zu</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s40" from="271" to="280">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">vergessen</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s41" from="281" to="284">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ihr</f>
+ <f name="lemma">sein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s42" from="285" to="297">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Familienhund</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s43" from="298" to="304">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Barker</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s44" from="306" to="308">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">es</f>
+ <f name="lemma">er</f>
+ <f name="lemma">sie</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s45" from="309" to="312">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">sein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s46" from="313" to="325">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Sommeranfang</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s47" from="328" to="333">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">heute</f>
+ <f name="lemma">heut</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s48" from="334" to="337">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">sein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s49" from="338" to="348">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Wochenende</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s50" from="351" to="358">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schreien</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s51" from="359" to="364">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Kevin</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/1/struct/structure.xml b/t/real/sgbr/TEST/BSP/1/struct/structure.xml
new file mode 100644
index 0000000..6f540c8
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/1/struct/structure.xml
@@ -0,0 +1,871 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer docid="TEST_BSP.1" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+ <spanList>
+ <span id="s0" from="0" to="364" l="1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">text</f>
+ </fs>
+ </span>
+ <span id="s1" from="0" to="18" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">head</f>
+ </fs>
+ </span>
+ <span id="s2" from="0" to="18" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="0" to="18" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.1</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="19" to="364" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">p</f>
+ </fs>
+ </span>
+ <span id="s5" from="19" to="52" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="19" to="21" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PPER</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.2</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="22" to="25" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.VAFIN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.3</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="26" to="32" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ADV</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.4</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="33" to="36" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ART</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.5</f>
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="37" to="44" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.6</f>
+ <f name="n">5</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="45" to="51" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="n">6</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="51" to="52" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._ENDE</f>
+ <f name="n">7</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="53" to="305" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="53" to="55" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.APPR</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.7</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="56" to="62" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PDAT</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.8</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="63" to="70" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.6</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="71" to="74" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.VVFIN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.9</f>
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="75" to="77" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PPER</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.2</f>
+ <f name="n">5</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="78" to="81" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ART</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.5</f>
+ <f name="n">6</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="82" to="86" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.10</f>
+ <f name="n">7</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s21" from="87" to="96" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.11</f>
+ <f name="n">8</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s22" from="96" to="97" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._KOMMA</f>
+ <f name="n">9</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s23" from="98" to="101" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ART</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.12</f>
+ <f name="n">10</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s24" from="102" to="106" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.13</f>
+ <f name="n">11</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s25" from="107" to="115" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.14</f>
+ <f name="n">12</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s26" from="115" to="116" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._KOMMA</f>
+ <f name="n">13</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s27" from="117" to="120" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ART</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.5</f>
+ <f name="n">14</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s28" from="121" to="138" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ADJA</f>
+ <f name="n">15</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s29" from="139" to="143" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.15</f>
+ <f name="n">16</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s30" from="143" to="144" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._KOMMA</f>
+ <f name="n">17</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s31" from="145" to="148" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ART</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.12</f>
+ <f name="n">18</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s32" from="149" to="161" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ADJA</f>
+ <f name="n">19</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s33" from="162" to="167" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.16</f>
+ <f name="n">20</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s34" from="168" to="171" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.KON</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.17</f>
+ <f name="n">21</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s35" from="172" to="175" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ART</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.5</f>
+ <f name="n">22</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s36" from="176" to="182" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PIAT</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.18</f>
+ <f name="n">23</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s37" from="183" to="190" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ADJA</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.19</f>
+ <f name="n">24</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s38" from="191" to="200" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.20</f>
+ <f name="n">25</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s39" from="201" to="205" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.21</f>
+ <f name="n">26</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s40" from="206" to="209" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.KON</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.17</f>
+ <f name="n">27</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s41" from="210" to="215" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.22</f>
+ <f name="n">28</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s42" from="215" to="216" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._KOMMA</f>
+ <f name="n">29</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s43" from="217" to="220" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.23</f>
+ <f name="n">30</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s44" from="221" to="226" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.24</f>
+ <f name="n">31</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s45" from="227" to="230" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.KON</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.17</f>
+ <f name="n">32</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s46" from="231" to="234" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PPOSAT</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.25</f>
+ <f name="n">33</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s47" from="235" to="239" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.26</f>
+ <f name="n">34</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s48" from="239" to="240" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._KOMMA</f>
+ <f name="n">35</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s49" from="241" to="244" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.27</f>
+ <f name="n">36</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s50" from="245" to="252" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.28</f>
+ <f name="n">37</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s51" from="253" to="260" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.29</f>
+ <f name="n">38</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s52" from="260" to="261" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._KOMMA</f>
+ <f name="n">39</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s53" from="262" to="267" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PTKNEG</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.30</f>
+ <f name="n">40</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s54" from="268" to="270" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PTKZU</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.31</f>
+ <f name="n">41</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s55" from="271" to="280" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.VVINF</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.32</f>
+ <f name="n">42</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s56" from="280" to="281" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._ENDE</f>
+ <f name="n">43</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s57" from="281" to="284" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PPOSAT</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.25</f>
+ <f name="n">44</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s58" from="285" to="297" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.33</f>
+ <f name="n">45</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s59" from="298" to="304" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.34</f>
+ <f name="n">46</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s60" from="304" to="305" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._ENDE</f>
+ <f name="n">47</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s61" from="306" to="326" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s62" from="306" to="308" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.PPER</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.2</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s63" from="309" to="312" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.VAFIN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.3</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s64" from="313" to="325" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.35</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s65" from="325" to="326" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._ENDE</f>
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s66" from="327" to="364" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="n">5</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s67" from="327" to="328" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._SONST</f>
+ <f name="n">1</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s68" from="328" to="333" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.ADV</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.36</f>
+ <f name="n">2</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s69" from="334" to="337" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.VAFIN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.3</f>
+ <f name="n">3</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s70" from="338" to="348" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.37</f>
+ <f name="n">4</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s71" from="348" to="349" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._SONST</f>
+ <f name="n">5</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s72" from="349" to="350" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._KOMMA</f>
+ <f name="n">6</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s73" from="351" to="358" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.VVFIN</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.38</f>
+ <f name="n">7</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s74" from="359" to="364" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">w</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS.NE</f>
+ <f name="lemmaRef">#TEST.BSP.Lemmata.16</f>
+ <f name="n">8</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s75" from="364" to="364" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">c</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="ana">#TEST.BSP.POS._ENDE</f>
+ <f name="n">9</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/BSP/header.xml b/t/real/sgbr/TEST/BSP/header.xml
new file mode 100644
index 0000000..0af4d67
--- /dev/null
+++ b/t/real/sgbr/TEST/BSP/header.xml
@@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<dokumentSigle>TEST_BSP</dokumentSigle>
+ <fileDesc>
+ <titleStmt>
+ <title level="u" type="main">Beispielkorpus</title>
+ <title level="u" type="sub">Subkorpus Beispieltext</title>
+ </titleStmt>
+ <publicationStmt>
+ <p>Dieses Korpus wird als Teil der OpenSource-Testsuite einer Konvertierungssoftware veröffentlicht.</p>
+ </publicationStmt>
+ <sourceDesc>
+ <p>Der hier erfasste Text entstammt einem größeren Forschungskorpus.</p>
+ </sourceDesc>
+ </fileDesc>
+ <language ident="de">Deutsch</language>
+ <editorialDecl xml:id="TEST.BSP.Metadaten.Autoren">
+ <interpretation>
+ <p>Dieses Korpus beinhaltet Metadaten zum Autor des Textes und weiteren, fiktiven Autoren. Eine Liste aller Autoren ist in der Sektion <gi corresp="#TEST.BSP.Autoren">particDesc</gi> zusammengestellt. Jeder Autor ist dabei als <tag scheme="TEI">person</tag> kodiert und führt Attribute mit Angaben zu seiner Altersklasse (<att scheme="TEI">age</att>) und seinem Geschlecht (<att scheme="TEI">sex</att>). Letzteres ist gemäß <ref target="http://microformats.org/wiki/gender-formats">vCard-Richtlinien</ref> kodiert. Die Zuordnung der Autoren zu ihren Texten regeln die Metadaten der Texte.</p>
+ <ab>
+ <elementSpec ident="person">
+ <attList>
+ <attDef ident="age">
+ <valList>
+ <valItem ident="A">
+ <desc>Kinder im Alter bis zu 6 Jahren</desc>
+ </valItem>
+ <valItem ident="B">
+ <desc>Kinder im Alter zwischen 7 und 10 Jahren (entspricht etwa den Schulklassen 1 bis 4)</desc>
+ </valItem>
+ <valItem ident="C">
+ <desc>Kinder im Alter zwischen 11 und 12 Jahren (entspricht etwa den Schulklassen 5 bis 6)</desc>
+ </valItem>
+ <valItem ident="D">
+ <desc>Kinder im Alter zwischen 13 und 14 Jahren (entspricht etwa den Schulklassen 7 bis 8)</desc>
+ </valItem>
+ <valItem ident="X">
+ <desc>Kinder, deren Alter nicht dokumentiert ist</desc>
+ </valItem>
+ </valList>
+ </attDef>
+ </attList>
+ </elementSpec>
+ </ab>
+ </interpretation>
+ </editorialDecl>
+ <particDesc xml:id="TEST.BSP.Autoren">
+ <person age="X" ana="#TEST.BSP.Metadaten.Autoren" n="1" sex="M" xml:id="TEST.BSP.Autoren.1"/>
+ <person age="A" ana="#TEST.BSP.Metadaten.Autoren" n="2" sex="M" xml:id="TEST.BSP.Autoren.2"/>
+ <person age="A" ana="#TEST.BSP.Metadaten.Autoren" n="3" sex="F" xml:id="TEST.BSP.Autoren.3"/>
+ <person age="B" ana="#TEST.BSP.Metadaten.Autoren" n="4" sex="M" xml:id="TEST.BSP.Autoren.4a"/>
+ <person age="C" ana="#TEST.BSP.Metadaten.Autoren" n="5" sex="F" xml:id="TEST.BSP.Autoren.4b"/>
+ <person age="A" ana="#TEST.BSP.Metadaten.Autoren" n="6" sex="F" xml:id="TEST.BSP.Autoren.5a"/>
+ <person age="B" ana="#TEST.BSP.Metadaten.Autoren" n="7" sex="F" xml:id="TEST.BSP.Autoren.5b"/>
+ <person age="B" ana="#TEST.BSP.Metadaten.Autoren" n="8" sex="F" xml:id="TEST.BSP.Autoren.5c"/>
+ <person age="C" ana="#TEST.BSP.Metadaten.Autoren" n="9" sex="M" xml:id="TEST.BSP.Autoren.6"/>
+ <person age="C" ana="#TEST.BSP.Metadaten.Autoren" n="10" sex="M" xml:id="TEST.BSP.Autoren.7"/>
+ <person age="A" ana="#TEST.BSP.Metadaten.Autoren" n="11" sex="F" xml:id="TEST.BSP.Autoren.8a"/>
+ <person age="A" ana="#TEST.BSP.Metadaten.Autoren" n="12" sex="F" xml:id="TEST.BSP.Autoren.8b"/>
+ <person age="C" ana="#TEST.BSP.Metadaten.Autoren" n="13" sex="M" xml:id="TEST.BSP.Autoren.9a"/>
+ <person age="C" ana="#TEST.BSP.Metadaten.Autoren" n="14" sex="M" xml:id="TEST.BSP.Autoren.9b"/>
+ <person age="A" ana="#TEST.BSP.Metadaten.Autoren" n="15" sex="F" xml:id="TEST.BSP.Autoren.10"/>
+ </particDesc>
+ <interpretation xml:id="TEST.BSP.Kodex">
+ <p>Dieses Korpus beinhaltet Metadaten zu den Texten. Eine Liste aller Texte ist in der folgenden Sektion <gi corresp="#TEST.BSP.Texte">editorialDecl</gi> zusammengestellt. Jeder als <tag scheme="TEI">item</tag> realisierter Listeneintrag führt neben einem vollständig strukturierten bibliographischen Unterblock mit Titel und Autorenreferenz auch die Angabe ihrer Verschriftlichungsart im Attribut (<att scheme="TEI">rendition</att>). Letzteres ist gemäß folgender Interpretation kodiert:</p>
+ <ab>
+ <elementSpec ident="item">
+ <attList>
+ <attDef ident="rend">
+ <valList>
+ <valItem ident="M">
+ <desc>Manuskript, handgeschriebene Texte</desc>
+ </valItem>
+ <valItem ident="T">
+ <desc>Typoskript, maschinell verfasste Texte</desc>
+ </valItem>
+ <valItem ident="H">
+ <desc>Hybrid, Texte mit sowohl handgeschriebenem als auch maschinell verfasstem Anteil</desc>
+ </valItem>
+ <valItem ident="X">
+ <desc>Texte, deren Verschriftlichungsart unbekannt ist</desc>
+ </valItem>
+ </valList>
+ </attDef>
+ </attList>
+ </elementSpec>
+ </ab>
+ </interpretation>
+ <segmentation xml:id="TEST.BSP.Token">
+ <p>Der Text in diesem Korpus wurde mit der Software <ref target="https://github.com/DFKI-MLT/JTok">jTok</ref> tokenisiert. Im Ergebnis wurden Sätze mit <tag scheme="TEI">s</tag>, Wörter mit <tag scheme="TEI">w</tag> und Satzpunktuation mit <tag scheme="TEI">c</tag> ausgezeichnet. Ferner wurden auf Textebene absatzübergreifend alle Sätze und auf Satzebene durchgängig alle Wörter und jede Satzpunktuation im jeweiligen Attribut <att scheme="TEI">n</att> und bei 1 beginnend durchgezählt.</p>
+ </segmentation>
+ <interpretation xml:id="TEST.BSP.POS">
+ <p>Dieses Korpus verwendet ein Wortart-Tagging nach dem Stuttgart-Tübingen-Tagset (<ref target="http://www.ims.uni-stuttgart.de/forschung/ressourcen/lexika/TagSets/stts-table.html">STTS</ref>). Dabei wurden drei ursprüngliche Tags ("$.", "$," und "$(") in ("_ENDE", "_KOMMA" und "_SONST") umbennant, um den Validitätsrichtlinien für XML-Identifikatoren (beschrieben in der <ref target="http://www.w3.org/TR/2006/REC-xml-20060816/">W3C-Empfehlung für XML 1.0 vom 16.08.2006</ref>) zu genügen.</p>
+ <ab>
+ <interpGrp type="annotation">
+ <interp xml:id="TEST.BSP.POS.ADJA">attributives Adjektiv</interp>
+ <interp xml:id="TEST.BSP.POS.ADJD">adverbiales oder prädikatives Adjektiv</interp>
+ <interp xml:id="TEST.BSP.POS.ADV">Adverb</interp>
+ <interp xml:id="TEST.BSP.POS.APPO">Postposition</interp>
+ <interp xml:id="TEST.BSP.POS.APPR">Präposition; Zirkumposition links</interp>
+ <interp xml:id="TEST.BSP.POS.APPRART">Präposition mit Artikel</interp>
+ <interp xml:id="TEST.BSP.POS.APZR">Zirkumposition rechts</interp>
+ <interp xml:id="TEST.BSP.POS.ART">bestimmter oder unbestimmter Artikel</interp>
+ <interp xml:id="TEST.BSP.POS.CARD">Kardinalzahl</interp>
+ <interp xml:id="TEST.BSP.POS.FM">Fremdsprachliches Material</interp>
+ <interp xml:id="TEST.BSP.POS.ITJ">Interjektion</interp>
+ <interp xml:id="TEST.BSP.POS.KOKOM">Vergleichskonjunktion</interp>
+ <interp xml:id="TEST.BSP.POS.KON">nebenordnende Konjunktion</interp>
+ <interp xml:id="TEST.BSP.POS.KOUI">unterordnende Konjunktion mit "zu" und Infinitiv</interp>
+ <interp xml:id="TEST.BSP.POS.KOUS">unterordnende Konjunktion mit Satz</interp>
+ <interp xml:id="TEST.BSP.POS.NE">normales Nomen</interp>
+ <interp xml:id="TEST.BSP.POS.NN">Eigennamen</interp>
+ <interp xml:id="TEST.BSP.POS.NNE">normales Nomen mit Eigennamen</interp>
+ <interp xml:id="TEST.BSP.POS.PAV">Pronominaladverb</interp>
+ <interp xml:id="TEST.BSP.POS.PDAT">attribuierendes Demonstrativpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PDS">substituierendes Demonstrativpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PIAT">attribuierendes Indefinitpronomen ohne Determiner</interp>
+ <interp xml:id="TEST.BSP.POS.PIDAT">attribuierendes Indefinitpronomen mit Determiner</interp>
+ <interp xml:id="TEST.BSP.POS.PIS">substituierendes Indefinitpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PPER">irreflexives Personalpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PPOSAT">attribuierendes Possessivpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PPOSS">substituierendes Possessivpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PRELAT">attribuierendes Relativpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PRELS">substituierendes Relativpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PRF">reflexives Personalpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PROAV">Pronominaladverb</interp>
+ <interp xml:id="TEST.BSP.POS.PTKA">Partikel bei Adjektiv oder Adverb</interp>
+ <interp xml:id="TEST.BSP.POS.PTKANT">Antwortpartikel</interp>
+ <interp xml:id="TEST.BSP.POS.PTKNEG">Negationspartikel</interp>
+ <interp xml:id="TEST.BSP.POS.PTKVZ">abgetrennter Verbzusatz</interp>
+ <interp xml:id="TEST.BSP.POS.PTKZU">"zu" vor Infinitiv</interp>
+ <interp xml:id="TEST.BSP.POS.PWAT">attribuierendes Interrogativpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PWAV">adverbiales Interrogativ- oder Relativpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.PWS">substituierendes Interrogativpronomen</interp>
+ <interp xml:id="TEST.BSP.POS.TRUNC">Kompositions-Erstglied</interp>
+ <interp xml:id="TEST.BSP.POS.VAFIN">finites Verb, aux</interp>
+ <interp xml:id="TEST.BSP.POS.VAIMP">Imperativ, aux</interp>
+ <interp xml:id="TEST.BSP.POS.VAINF">Infinitiv, aux</interp>
+ <interp xml:id="TEST.BSP.POS.VAPP">Partizip Perfekt, aux</interp>
+ <interp xml:id="TEST.BSP.POS.VMFIN">finites Verb, modal</interp>
+ <interp xml:id="TEST.BSP.POS.VMINF">Infinitiv, modal</interp>
+ <interp xml:id="TEST.BSP.POS.VMPP">Partizip Perfekt, modal</interp>
+ <interp xml:id="TEST.BSP.POS.VVFIN">finites Verb, voll</interp>
+ <interp xml:id="TEST.BSP.POS.VVIMP">Imperativ, voll</interp>
+ <interp xml:id="TEST.BSP.POS.VVINF">Infinitiv, voll</interp>
+ <interp xml:id="TEST.BSP.POS.VVIZU">Infinitiv mit "zu", voll</interp>
+ <interp xml:id="TEST.BSP.POS.VVPP">Partizip Perfekt, voll</interp>
+ <interp xml:id="TEST.BSP.POS.XY">Nichtwort, Sonderzeichen enthaltend</interp>
+ <interp xml:id="TEST.BSP.POS._KOMMA">Komma</interp>
+ <interp xml:id="TEST.BSP.POS._SONST">sonstige Satzzeichen; satzintern</interp>
+ <interp xml:id="TEST.BSP.POS._ENDE">Satzbeendende Interpunktion</interp>
+ </interpGrp>
+ </ab>
+ </interpretation>
+</teiHeader>
\ No newline at end of file
diff --git a/t/real/sgbr/TEST/header.xml b/t/real/sgbr/TEST/header.xml
new file mode 100644
index 0000000..87a0b99
--- /dev/null
+++ b/t/real/sgbr/TEST/header.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<teiHeader>
+<korpusSigle>TEST</korpusSigle>
+</teiHeader>
diff --git a/t/real/sgbr/base.t b/t/real/sgbr/base.t
new file mode 100644
index 0000000..194c49c
--- /dev/null
+++ b/t/real/sgbr/base.t
@@ -0,0 +1,43 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+my $path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', 3401);
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse('Sgbr'), 'Parse document');
+
+ok(my $tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+ok($tokens->add('Base', 'Sentences'), 'Add Sentences');
+
+my $stream = $tokens->to_data->{data}->{stream};
+
+is($stream->[0]->[0], '-:base/sentences$<i>1');
+is($stream->[0]->[1], '-:tokens$<i>15');
+is($stream->[0]->[2], '<>:base/s:t$<b>64<i>0<i>115<i>15<b>0');
+is($stream->[0]->[3], '<>:base/s:s$<b>64<i>16<i>114<i>15<b>2');
+is($stream->[0]->[4], '_0$<i>17<i>18');
+
+done_testing;
diff --git a/t/real/sgbr/lemma.t b/t/real/sgbr/lemma.t
new file mode 100644
index 0000000..54f94e4
--- /dev/null
+++ b/t/real/sgbr/lemma.t
@@ -0,0 +1,110 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+ok(my $tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+ok($tokens->add('Sgbr', 'Lemma'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+my $stream = $data->{stream};
+is($stream->[0]->[0], '-:tokens$<i>51', 'Token number');
+is($stream->[0]->[1], '<>:base/s:t$<b>64<i>0<i>365<i>51<b>0', 'Text Boundary');
+is($stream->[0]->[2], '_0$<i>0<i>18', 'Position');
+is($stream->[0]->[3], 'i:sommerüberraschung', 'First term');
+is($stream->[0]->[4], 's:Sommerüberraschung', 'First term');
+is($stream->[0]->[5], 'sgbr/l:Sommerüberraschung', 'First term');
+ok(!defined $stream->[0]->[6], 'First term');
+
+is($stream->[1]->[0], '_1$<i>19<i>21', 'Position');
+is($stream->[1]->[1], 'i:es', 'Second term');
+is($stream->[1]->[2], 's:Es', 'Second term');
+is($stream->[1]->[3], 'sgbr/l:es', 'Second term');
+is($stream->[1]->[4], 'sgbr/lv:er', 'Second term');
+is($stream->[1]->[5], 'sgbr/lv:sie', 'Second term');
+
+is($stream->[16]->[0], '_16$<i>107<i>115', 'Position');
+is($stream->[16]->[1], 'i:guenther', '16th term');
+is($stream->[16]->[2], 's:Guenther', '16th term');
+is($stream->[16]->[3], 'sgbr/l:Günther', '16th term');
+is($stream->[16]->[4], 'sgbr/lv:Günter', '16th term');
+
+is($stream->[-1]->[0], '_50$<i>359<i>364', 'Position');
+is($stream->[-1]->[1], 'i:kevin', 'Last term');
+is($stream->[-1]->[2], 's:Kevin', 'Last term');
+is($stream->[-1]->[3], 'sgbr/l:Kevin', 'Last term');
+
+
+# Real data 1
+$path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '2843');
+
+ok($doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+ok($tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+ok(!$tokens->add('Sgbr', 'Lemma'), 'Add Structure impossible - no token data');
+
+
+# Real data 2
+$path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '3401');
+
+ok($doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+ok($tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+ok($tokens->add('Sgbr', 'Lemma'), 'Add Structure');
+
+done_testing;
+
+__END__
diff --git a/t/real/sgbr/meta.t b/t/real/sgbr/meta.t
new file mode 100644
index 0000000..dcaf6cf
--- /dev/null
+++ b/t/real/sgbr/meta.t
@@ -0,0 +1,78 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/',
+ meta_type => 'Sgbr'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+like($doc->path, qr!\Q$path\E/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'TEST/BSP/1', 'ID-text');
+is($doc->doc_sigle, 'TEST/BSP', 'ID-doc');
+is($doc->corpus_sigle, 'TEST', 'ID-corpus');
+
+my $meta = $doc->meta;
+
+is($meta->{T_title}, 'Sommerüberraschung', 'title');
+is($meta->{T_author}, 'TEST.BSP.Autoren.1', 'Author');
+is($meta->{'S_sgbr_author_age_class'}, 'X', 'AgeClass');
+
+is($meta->{'S_sgbr_author_sex'}, 'M', 'Sex');
+is($meta->{'S_sgbr_kodex'}, 'M', 'Kodex');
+
+is($meta->{T_doc_title}, 'Beispielkorpus', 'Doc: title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Beispieltext', 'Doc: subtitle');
+
+is($meta->{S_language}, 'de', 'Language');
+
+ok(!$meta->{A_publisher}, 'Publisher');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+ok(!$meta->{license}, 'License');
+ok(!$meta->{pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
+ok(!$meta->{A_reference}, 'Reference');
+
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
+
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
+
+my $hash = $doc->to_hash;
+is($hash->{title}, 'Sommerüberraschung', 'Corpus title');
+is($hash->{sgbrAuthorSex}, 'M', 'additional');
+
+# Sgbr specific keywords
+is($meta->keywords('K_keywords'), 'sgbrAuthorAgeClass:X sgbrAuthorSex:M sgbrKodex:M');
+
+
+done_testing;
+
+
+__END__
diff --git a/t/real/sgbr/meta_duden.t b/t/real/sgbr/meta_duden.t
new file mode 100644
index 0000000..16f8f8d
--- /dev/null
+++ b/t/real/sgbr/meta_duden.t
@@ -0,0 +1,84 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+my $path = catdir(dirname(__FILE__), 'PRO-DUD', 'BSP-2013-01', 32);
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/',
+ meta_type => 'Sgbr'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+like($doc->path, qr!\Q$path\E/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'PRO-DUD/BSP-2013-01/32', 'ID-text');
+is($doc->doc_sigle, 'PRO-DUD/BSP-2013-01', 'ID-doc');
+is($doc->corpus_sigle, 'PRO-DUD', 'ID-corpus');
+
+my $meta = $doc->meta;
+is($meta->{T_title}, 'Nur Platt, kein Deutsch', 'title');
+ok(!$meta->{T_sub_title}, 'no subtitle');
+
+is($meta->{A_publisher}, 'Dorfblatt GmbH', 'Publisher');
+is($meta->{D_pub_date}, '20130126');
+is($meta->{D_sgbr_date}, '2013-01-26');
+is($meta->{S_pub_place}, 'Stadtingen');
+
+is($meta->{T_doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Ortsblatt, Jahrgang 2013, Monat Januar', 'Doc Sub title');
+
+is($meta->{'A_funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
+
+is($meta->{T_author}, 'unbekannt', 'Author');
+ok(!$meta->{'S_sgbr_author_sex'}, 'No Sex');
+is($meta->{'S_sgbr_kodex'}, 'T', '');
+
+is($meta->keywords('K_keywords'), 'sgbrKodex:T');
+
+is($meta->{S_language}, 'de', 'Language');
+
+ok(!$meta->{A_editor}, 'Editor');
+
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+ok(!$meta->{A_license}, 'License');
+ok(!$meta->{A_pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
+ok(!$meta->{A_reference}, 'Reference');
+
+
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
+
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
+
+my $hash = $doc->to_hash;
+is($hash->{title}, 'Nur Platt, kein Deutsch', 'Corpus title');
+is($hash->{sgbrKodex}, 'T', 'store');
+
+
+done_testing;
+
+
+__END__
diff --git a/t/real/sgbr/meta_ids.t b/t/real/sgbr/meta_ids.t
new file mode 100644
index 0000000..d81a494
--- /dev/null
+++ b/t/real/sgbr/meta_ids.t
@@ -0,0 +1,153 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+my $path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '2843');
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/',
+ meta_type => 'Sgbr'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+like($doc->path, qr!\Q$path\E/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'CMC-TSK/2014-09/2843', 'ID-text');
+
+is($doc->doc_sigle, 'CMC-TSK/2014-09', 'ID-doc');
+is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus');
+
+my $meta = $doc->meta;
+
+is($meta->{T_title}, '@ Koelle_am_Rhing 10:18', 'title');
+
+ok(!$meta->{T_sub_title}, 'no subtitle');
+
+is($meta->{A_publisher}, 'tagesschau.de', 'Publisher');
+
+is($meta->{D_pub_date}, '20140930');
+
+ok(!$meta->{S_pub_place}, 'No pub place');
+
+is($meta->{T_doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
+
+is($meta->{'A_funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
+
+is($meta->{T_author}, 'privat23', 'Author');
+ok(!$meta->{'S_sgbr_author_sex'}, 'No Sex');
+ok(!$meta->{'S_sgbr_kodex'}, 'No kodex');
+is($meta->{A_reference}, 'http://meta.tagesschau.de/node/090285#comment-1732187', 'Publace ref');
+
+is($meta->keywords('K_keywords'), '');
+
+is($meta->{S_language}, 'de', 'Language');
+
+ok(!$meta->{A_editor}, 'Editor');
+
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+ok(!$meta->{S_license}, 'License');
+ok(!$meta->{A_pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
+
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
+
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
+
+my $hash = $doc->to_hash;
+is($hash->{title}, '@ Koelle_am_Rhing 10:18', 'Corpus title');
+
+# Second document
+$path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '3401');
+
+ok($doc = KorAP::XML::Krill->new(
+ path => $path . '/',
+ meta_type => 'Sgbr'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+like($doc->path, qr!\Q$path\E/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'CMC-TSK/2014-09/3401', 'ID-text');
+
+is($doc->doc_sigle, 'CMC-TSK/2014-09', 'ID-doc');
+is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus');
+
+
+$meta = $doc->meta;
+is($meta->{T_title}, '@fitnessfrosch', 'title');
+
+ok(!$meta->{T_sub_title}, 'no subtitle');
+
+is($meta->{A_publisher}, 'tagesschau.de', 'Publisher');
+
+is($meta->{D_pub_date}, '20141001');
+is($meta->{'D_sgbr_date'}, '2014-10-01 00:50:00');
+
+ok(!$meta->{S_pub_place}, 'No pub place');
+
+is($meta->{T_doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
+
+is($meta->{'A_funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
+
+is($meta->{T_author}, 'weltoffen', 'Author');
+ok(!$meta->{'S_sgbr_author_sex'}, 'No Sex');
+ok(!$meta->{'S_sgbr_kodex'}, 'No kodex');
+is($meta->{A_reference}, 'http://meta.tagesschau.de/node/090308#comment-1732754', 'Publace ref');
+
+is($meta->keywords('K_keywords'), '');
+
+is($meta->{S_language}, 'de', 'Language');
+
+ok(!$meta->{A_editor}, 'Editor');
+
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+ok(!$meta->{S_license}, 'License');
+ok(!$meta->{A_pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
+
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
+
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
+
+$hash = $doc->to_hash;
+is($hash->{title}, '@fitnessfrosch', 'Corpus title');
+
+done_testing;
+__END__
+
diff --git a/t/real/sgbr/pos.t b/t/real/sgbr/pos.t
new file mode 100644
index 0000000..33fbc90
--- /dev/null
+++ b/t/real/sgbr/pos.t
@@ -0,0 +1,62 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+ok(my $tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+ok($tokens->add('Sgbr', 'Morpho'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+my $stream = $data->{stream};
+
+is($stream->[0]->[0], '-:tokens$<i>51', 'Token number');
+is($stream->[0]->[1], '<>:base/s:t$<b>64<i>0<i>365<i>51<b>0', 'Text boundary');
+is($stream->[0]->[2], '_0$<i>0<i>18', 'Position');
+is($stream->[0]->[3], 'i:sommerüberraschung', 'First term');
+is($stream->[0]->[4], 's:Sommerüberraschung', 'First term');
+is($stream->[0]->[5], 'sgbr/p:NN', 'First term POS');
+
+is($stream->[1]->[3], 'sgbr/p:PPER', 'First term POS');
+is($stream->[-1]->[3], 'sgbr/p:NE', 'Last term POS');
+
+
+ok($tokens->add('Sgbr', 'Lemma'), 'Add Structure');
+
+$data = $tokens->to_data->{data};
+$stream = $data->{stream};
+
+is($stream->[-1]->[0], '_50$<i>359<i>364', 'Token number');
+is($stream->[-1]->[1], 'i:kevin', 'Position');
+is($stream->[-1]->[2], 's:Kevin', 'Last term');
+is($stream->[-1]->[3], 'sgbr/l:Kevin', 'Last term');
+is($stream->[-1]->[4], 'sgbr/p:NE', 'Last term');
+ok(!defined $stream->[-1]->[5], 'Last term');
+
+done_testing;
diff --git a/t/real/sgbr/script_single.t b/t/real/sgbr/script_single.t
new file mode 100644
index 0000000..934ce8a
--- /dev/null
+++ b/t/real/sgbr/script_single.t
@@ -0,0 +1,69 @@
+#/usr/bin/env perl
+use strict;
+use warnings;
+use File::Basename 'dirname';
+use File::Spec::Functions qw/catdir catfile/;
+use File::Temp qw/:POSIX/;
+use Mojo::File;
+use Mojo::JSON qw/decode_json/;
+use IO::Uncompress::Gunzip;
+use Test::More;
+use Test::Output;
+use Data::Dumper;
+use utf8;
+
+if ($ENV{SKIP_SCRIPT} || $ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip script/real tests';
+};
+
+my $f = dirname(__FILE__);
+my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
+
+my $input = catdir($f, '..', 'sgbr', 'PRO-DUD', 'BSP-2013-01', '32');
+
+my $output = tmpnam();
+my $cache = tmpnam();
+
+# Use a different token source and skip all annotations,
+# except for DeReKo#Structure and Mate#Dependency
+my $call = join(
+ ' ',
+ 'perl', $script,
+ '--input' => $input,
+ '--output' => $output,
+ '--cache' => $cache,
+ '-m' => 'Sgbr',
+ '-t' => 'Base#Tokens_aggr',
+ '-l' => 'INFO'
+);
+
+stderr_like(
+ sub {
+ system($call);
+ },
+ qr!The code took!,
+ $call
+);
+
+ok(-f $output, 'Output does exist');
+ok((my $file = Mojo::File->new($output)->slurp), 'Slurp data');
+ok((my $json = decode_json $file), 'decode json');
+
+is($json->{data}->{text}, 'Selbst ist der Jeck', 'Text');
+is($json->{data}->{tokenSource}, 'base#tokens_aggr', 'TokenSource');
+is($json->{pubPlace}, 'Stadtingen', 'pubPlace');
+is($json->{textSigle}, 'PRO-DUD/BSP-2013-01/32', 'textSigle');
+is($json->{docSigle}, 'PRO-DUD/BSP-2013-01', 'docSigle');
+is($json->{corpusSigle}, 'PRO-DUD', 'corpusSigle');
+is($json->{sgbrKodex}, 'T', 'sgbrKodex');
+is($json->{author}, 'unbekannt', 'Author');
+is($json->{language}, 'de', 'Language');
+is($json->{docTitle}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'docTitle');
+is($json->{funder}, 'Bundesministerium für Bildung und Forschung', 'docTitle');
+is($json->{title}, 'Nur Platt, kein Deutsch', 'title');
+is($json->{pubDate}, '20130126', 'pubDate');
+is($json->{docSubTitle}, 'Subkorpus Ortsblatt, Jahrgang 2013, Monat Januar', 'docSubTitle');
+is($json->{keywords}, 'sgbrKodex:T', 'keywords');
+is($json->{publisher}, 'Dorfblatt GmbH', 'publisher');
+
+done_testing;
diff --git a/t/real/sgbr/token.t b/t/real/sgbr/token.t
new file mode 100644
index 0000000..1c95001
--- /dev/null
+++ b/t/real/sgbr/token.t
@@ -0,0 +1,45 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::XML::Tokenizer;
+use KorAP::XML::Krill;
+use utf8;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
+
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+ok(my $tokens = KorAP::XML::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'Sgbr',
+ layer => 'Lemma',
+ name => 'tokens'
+), 'Create tokens based on lemmata');
+
+ok($tokens->parse, 'Parse tokenization based on lemmata');
+
+my $data = $tokens->to_data->{data};
+
+my $stream = $data->{stream};
+
+is($stream->[0]->[0], '-:tokens$<i>51', 'Token number');
+is($stream->[0]->[2], '_0$<i>0<i>18', 'Position');
+is($stream->[0]->[3], 'i:sommerüberraschung', 'First term');
+is($stream->[0]->[4], 's:Sommerüberraschung', 'First term');
+is($stream->[-1]->[0], '_50$<i>359<i>364', 'Last position');
+is($stream->[-1]->[1], 'i:kevin', 'Last term');
+is($stream->[-1]->[2], 's:Kevin', 'Last term');
+
+done_testing;