Added caching test
Change-Id: I37425aa7f5397b88212c234fb1c668404f6d0b70
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 7e729e0..5e9cc38 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -339,35 +339,38 @@
# Extract XML files
elsif ($cmd eq 'extract') {
- my $input = $input[0];
-
pod2usage(%ERROR_HASH) unless $output;
- # TODO: Support sigles and full archives
-
if ($output && (!-e $output || !-d $output)) {
print "Directory '$output' does not exist.\n\n";
exit(0);
};
-#TODOOOOOO
+ # TODO: Support sigles and full archives
- if (-f($input) && (my $archive = KorAP::XML::Archive->new($input))) {
+ if (-f($input[0]) && (my $archive = KorAP::XML::Archive->new($input[0]))) {
unless ($archive->test_unzip) {
print "Unzip is not installed or incompatible.\n\n";
exit(1);
};
+ # Add further annotation archived
+ $archive->attach($_) foreach @input;
+
# Iterate over all given sigles and extract
foreach (@sigle) {
print "$_ ";
- print '' . ($archive->extract('./'. $_, $output) ? '' : 'not ');
+# print '' . ($archive->extract('./'. $_, $output) ? '' : 'not ');
+ print '' . ($archive->extract('./' . $_, $output) ? '' : 'not ');
print "extracted.\n";
};
print "\n";
exit(1);
+ }
+ else {
+ $log->error('Unable to extract from primary archive ' . $input[0]);
};
}
@@ -701,6 +704,7 @@
Extract the given text sigles.
Can be set multiple times.
I<Currently only supported on C<extract>.>
+Sigles have the structure C<Corpus>/C<Document>/C<Text>.
=item B<--log|-l>
diff --git a/t/corpus/REI/BNG/header.xml b/t/corpus/REI/BNG/header.xml
index 1dbe5fc..6587170 100644
--- a/t/corpus/REI/BNG/header.xml
+++ b/t/corpus/REI/BNG/header.xml
@@ -2,24 +2,24 @@
<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
<idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
- <fileDesc>
- <titleStmt>
- <dokumentSigle>REI/BNG</dokumentSigle>
- <d.title>Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)</d.title>
- </titleStmt>
- <publicationStmt>
- <distributor/>
- <pubAddress/>
- <availability region="world">CC-BY-SA</availability>
- <pubDate/>
- </publicationStmt>
- <sourceDesc>
- <biblStruct>
- <monogr>
- <h.title type="main"/>
- <imprint/>
- </monogr>
- </biblStruct>
- </sourceDesc>
- </fileDesc>
- </idsHeader>
\ No newline at end of file
+ <fileDesc>
+ <titleStmt>
+ <dokumentSigle>REI/BNG</dokumentSigle>
+ <d.title>Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)</d.title>
+ </titleStmt>
+ <publicationStmt>
+ <distributor/>
+ <pubAddress/>
+ <availability region="world">CC-BY-SA</availability>
+ <pubDate/>
+ </publicationStmt>
+ <sourceDesc>
+ <biblStruct>
+ <monogr>
+ <h.title type="main"/>
+ <imprint/>
+ </monogr>
+ </biblStruct>
+ </sourceDesc>
+ </fileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/corpus/REI/header.xml b/t/corpus/REI/header.xml
index 8acf7cc..54ebf4a 100644
--- a/t/corpus/REI/header.xml
+++ b/t/corpus/REI/header.xml
@@ -2,32 +2,32 @@
<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
<idsHeader type="corpus" pattern="allesaußerZtg/Zschr" status="new" version="1.1" TEIform="teiHeader">
- <fileDesc>
- <titleStmt>
- <korpusSigle>REI</korpusSigle>
- <c.title>Reden und Interviews</c.title>
- </titleStmt>
- <publicationStmt>
- <distributor> Institut für Deutsche Sprache </distributor>
- <pubAddress> Postfach 10 16 21, D-68016 Mannheim </pubAddress>
- <telephone> +49 (0)621 1581 0 </telephone>
- <availability region="world">CC-BY-SA</availability>
- <pubDate/>
- </publicationStmt>
- <sourceDesc>
- <biblStruct>
- <monogr>
- <h.title type="main"/>
- <imprint/>
- </monogr>
- </biblStruct>
- </sourceDesc>
- </fileDesc>
- <encodingDesc>
- <projectDesc/>
- <samplingDecl/>
- <editorialDecl>
- <transduction> TraDuCES - Korpus-Transformationscompiler, Version 3.6.3,
+ <fileDesc>
+ <titleStmt>
+ <korpusSigle>REI</korpusSigle>
+ <c.title>Reden und Interviews</c.title>
+ </titleStmt>
+ <publicationStmt>
+ <distributor> Institut für Deutsche Sprache </distributor>
+ <pubAddress> Postfach 10 16 21, D-68016 Mannheim </pubAddress>
+ <telephone> +49 (0)621 1581 0 </telephone>
+ <availability region="world">CC-BY-SA</availability>
+ <pubDate/>
+ </publicationStmt>
+ <sourceDesc>
+ <biblStruct>
+ <monogr>
+ <h.title type="main"/>
+ <imprint/>
+ </monogr>
+ </biblStruct>
+ </sourceDesc>
+ </fileDesc>
+ <encodingDesc>
+ <projectDesc/>
+ <samplingDecl/>
+ <editorialDecl>
+ <transduction> TraDuCES - Korpus-Transformationscompiler, Version 3.6.3,
Eric Seubert, IDS Mannheim, 5. Mai 2010
Optionen bei der Konvertierung:
- Dubletten-Modus:
@@ -40,215 +40,215 @@
Entfernung aller Deklarationen für Dubletten.
Entfernung von Texten mit Sperrvermerken.
Entfernung von Texten mit minimalem Inhalt.</transduction>
- <transduction>$Id: fixPreXCES.l 1194 2008-04-30 14:16:53Z kupietz $</transduction>
- <transduction>$Id: idsces2idsxces 1843 2011-09-13 14:52:21Z kupietz $</transduction>
- <pagination type="no"/>
- </editorialDecl>
- <classDecl>
- <taxonomy id="topic">
- <h.bibl>Thementaxonomie (siehe http://www.ids-mannheim.de/kl/projekte/methoden/te.html)</h.bibl>
- <category id="topic.fiktion">
- <catDesc>Fiktion</catDesc>
- <category id="topic.fiktion.vermischtes">
- <catDesc>Fiktion:Vermischtes</catDesc>
- </category>
- </category>
- <category id="topic.freizeit-unterhaltung">
- <catDesc>Freizeit_Unterhaltung</catDesc>
- <category id="topic.freizeit-unterhaltung.reisen">
- <catDesc>Freizeit_Unterhaltung:Reisen</catDesc>
- </category>
- <category id="topic.freizeit-unterhaltung.rundfunk">
- <catDesc>Freizeit_Unterhaltung:Rundfunk</catDesc>
- </category>
- <category id="topic.freizeit-unterhaltung.vereine-veranstaltungen">
- <catDesc>Freizeit_Unterhaltung:Vereine_Veranstaltungen</catDesc>
- </category>
- </category>
- <category id="topic.gesundheit-ernaehrung">
- <catDesc>Gesundheit_Ernaehrung</catDesc>
- <category id="topic.gesundheit-ernaehrung.ernaehrung">
- <catDesc>Gesundheit_Ernaehrung:Ernaehrung</catDesc>
- </category>
- <category id="topic.gesundheit-ernaehrung.gesundheit">
- <catDesc>Gesundheit_Ernaehrung:Gesundheit</catDesc>
- </category>
- </category>
- <category id="topic.kultur">
- <catDesc>Kultur</catDesc>
- <category id="topic.kultur.bildende-kunst">
- <catDesc>Kultur:Bildende Kunst</catDesc>
- </category>
- <category id="topic.kultur.darstellende-kunst">
- <catDesc>Kultur:Darstellende Kunst</catDesc>
- </category>
- <category id="topic.kultur.film">
- <catDesc>Kultur:Film</catDesc>
- </category>
- <category id="topic.kultur.literatur">
- <catDesc>Kultur:Literatur</catDesc>
- </category>
- <category id="topic.kultur.mode">
- <catDesc>Kultur:Mode</catDesc>
- </category>
- <category id="topic.kultur.musik">
- <catDesc>Kultur:Musik</catDesc>
- </category>
- </category>
- <category id="topic.natur-umwelt">
- <catDesc>Natur_Umwelt</catDesc>
- <category id="topic.natur-umwelt.garten">
- <catDesc>Natur_Umwelt:Garten</catDesc>
- </category>
- <category id="topic.natur-umwelt.tiere">
- <catDesc>Natur_Umwelt:Tiere</catDesc>
- </category>
- <category id="topic.natur-umwelt.wetter-klima">
- <catDesc>Natur_Umwelt:Wetter_Klima</catDesc>
- </category>
- </category>
- <category id="topic.politik">
- <catDesc>Politik</catDesc>
- <category id="topic.politik.ausland">
- <catDesc>Politik:Ausland</catDesc>
- </category>
- <category id="topic.politik.inland">
- <catDesc>Politik:Inland</catDesc>
- </category>
- <category id="topic.politik.kommunalpolitik">
- <catDesc>Politik:Kommunalpolitik</catDesc>
- </category>
- </category>
- <category id="topic.rest">
- <catDesc>Rest</catDesc>
- <category id="topic.rest.boersenkurse">
- <catDesc>Rest:boersenkurse</catDesc>
- </category>
- <category id="topic.rest.geburt-tod-heirat">
- <catDesc>Rest:geburt_tod_heirat</catDesc>
- </category>
- <category id="topic.rest.impressum">
- <catDesc>Rest:impressum</catDesc>
- </category>
- <category id="topic.rest.inhaltsverzeichnisse">
- <catDesc>Rest:inhaltsverzeichnisse</catDesc>
- </category>
- <category id="topic.rest.ligatabellen">
- <catDesc>Rest:ligatabellen</catDesc>
- </category>
- <category id="topic.rest.tabellen">
- <catDesc>Rest:tabellen</catDesc>
- </category>
- <category id="topic.rest.veranstaltungshinweise">
- <catDesc>Rest:veranstaltungshinweise</catDesc>
- </category>
- </category>
- <category id="topic.sport">
- <catDesc>Sport</catDesc>
- <category id="topic.sport.ballsport">
- <catDesc>Sport:Ballsport</catDesc>
- </category>
- <category id="topic.sport.fussball">
- <catDesc>Sport:Fussball</catDesc>
- </category>
- <category id="topic.sport.motorsport">
- <catDesc>Sport:Motorsport</catDesc>
- </category>
- <category id="topic.sport.radsport">
- <catDesc>Sport:Radsport</catDesc>
- </category>
- <category id="topic.sport.tennis">
- <catDesc>Sport:Tennis</catDesc>
- </category>
- <category id="topic.sport.vermischtes">
- <catDesc>Sport:Vermischtes</catDesc>
- </category>
- <category id="topic.sport.wintersport">
- <catDesc>Sport:Wintersport</catDesc>
- </category>
- </category>
- <category id="topic.staat-gesellschaft">
- <catDesc>Staat_Gesellschaft</catDesc>
- <category id="topic.staat-gesellschaft.arbeit-und-beruf">
- <catDesc>Staat_Gesellschaft:Arbeit_und_Beruf</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.bildung">
- <catDesc>Staat_Gesellschaft:Bildung</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.biographien-interviews">
- <catDesc>Staat_Gesellschaft:Biographien_Interviews</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.drittes-reich-rechtsextremismus">
- <catDesc>Staat_Gesellschaft:Drittes_Reich_Rechtsextremismus</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.familie-geschlecht">
- <catDesc>Staat_Gesellschaft:Familie_Geschlecht</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.kirche">
- <catDesc>Staat_Gesellschaft:Kirche</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.recht">
- <catDesc>Staat_Gesellschaft:Recht</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.tod">
- <catDesc>Staat_Gesellschaft:Tod</catDesc>
- </category>
- <category id="topic.staat-gesellschaft.verbrechen">
- <catDesc>Staat_Gesellschaft:Verbrechen</catDesc>
- </category>
- </category>
- <category id="topic.technik-industrie">
- <catDesc>Technik_Industrie</catDesc>
- <category id="topic.technik-industrie.edv-elektronik">
- <catDesc>Technik_Industrie:EDV_Elektronik</catDesc>
- </category>
- <category id="topic.technik-industrie.kfz">
- <catDesc>Technik_Industrie:Kfz</catDesc>
- </category>
- <category id="topic.technik-industrie.transport-verkehr">
- <catDesc>Technik_Industrie:Transport_Verkehr</catDesc>
- </category>
- <category id="topic.technik-industrie.umweltschutz">
- <catDesc>Technik_Industrie:Umweltschutz</catDesc>
- </category>
- <category id="topic.technik-industrie.unfaelle">
- <catDesc>Technik_Industrie:Unfaelle</catDesc>
- </category>
- </category>
- <category id="topic.wirtschaft-finanzen">
- <catDesc>Wirtschaft_Finanzen</catDesc>
- <category id="topic.wirtschaft-finanzen.banken">
- <catDesc>Wirtschaft_Finanzen:Banken</catDesc>
- </category>
- <category id="topic.wirtschaft-finanzen.bilanzen">
- <catDesc>Wirtschaft_Finanzen:Bilanzen</catDesc>
- </category>
- <category id="topic.wirtschaft-finanzen.oeffentliche-finanzen">
- <catDesc>Wirtschaft_Finanzen:Oeffentliche_Finanzen</catDesc>
- </category>
- <category id="topic.wirtschaft-finanzen.sozialprodukt">
- <catDesc>Wirtschaft_Finanzen:Sozialprodukt</catDesc>
- </category>
- <category id="topic.wirtschaft-finanzen.waehrung">
- <catDesc>Wirtschaft_Finanzen:Waehrung</catDesc>
- </category>
- </category>
- <category id="topic.wissenschaft">
- <catDesc>Wissenschaft</catDesc>
- <category id="topic.wissenschaft.populaerwissenschaft">
- <catDesc>Wissenschaft:Populaerwissenschaft</catDesc>
- </category>
- </category>
- <category id="topic.unklassifizierbar">
- <catDesc>Text ist thematisch nicht klassifizierbar.</catDesc>
- </category>
- </taxonomy>
- </classDecl>
- </encodingDesc>
- <profileDesc>
- <langUsage>
- <language id="de" usage="100">Deutsch</language>
- </langUsage>
- <textDesc/>
- </profileDesc>
- </idsHeader>
\ No newline at end of file
+ <transduction>$Id: fixPreXCES.l 1194 2008-04-30 14:16:53Z kupietz $</transduction>
+ <transduction>$Id: idsces2idsxces 1843 2011-09-13 14:52:21Z kupietz $</transduction>
+ <pagination type="no"/>
+ </editorialDecl>
+ <classDecl>
+ <taxonomy id="topic">
+ <h.bibl>Thementaxonomie (siehe http://www.ids-mannheim.de/kl/projekte/methoden/te.html)</h.bibl>
+ <category id="topic.fiktion">
+ <catDesc>Fiktion</catDesc>
+ <category id="topic.fiktion.vermischtes">
+ <catDesc>Fiktion:Vermischtes</catDesc>
+ </category>
+ </category>
+ <category id="topic.freizeit-unterhaltung">
+ <catDesc>Freizeit_Unterhaltung</catDesc>
+ <category id="topic.freizeit-unterhaltung.reisen">
+ <catDesc>Freizeit_Unterhaltung:Reisen</catDesc>
+ </category>
+ <category id="topic.freizeit-unterhaltung.rundfunk">
+ <catDesc>Freizeit_Unterhaltung:Rundfunk</catDesc>
+ </category>
+ <category id="topic.freizeit-unterhaltung.vereine-veranstaltungen">
+ <catDesc>Freizeit_Unterhaltung:Vereine_Veranstaltungen</catDesc>
+ </category>
+ </category>
+ <category id="topic.gesundheit-ernaehrung">
+ <catDesc>Gesundheit_Ernaehrung</catDesc>
+ <category id="topic.gesundheit-ernaehrung.ernaehrung">
+ <catDesc>Gesundheit_Ernaehrung:Ernaehrung</catDesc>
+ </category>
+ <category id="topic.gesundheit-ernaehrung.gesundheit">
+ <catDesc>Gesundheit_Ernaehrung:Gesundheit</catDesc>
+ </category>
+ </category>
+ <category id="topic.kultur">
+ <catDesc>Kultur</catDesc>
+ <category id="topic.kultur.bildende-kunst">
+ <catDesc>Kultur:Bildende Kunst</catDesc>
+ </category>
+ <category id="topic.kultur.darstellende-kunst">
+ <catDesc>Kultur:Darstellende Kunst</catDesc>
+ </category>
+ <category id="topic.kultur.film">
+ <catDesc>Kultur:Film</catDesc>
+ </category>
+ <category id="topic.kultur.literatur">
+ <catDesc>Kultur:Literatur</catDesc>
+ </category>
+ <category id="topic.kultur.mode">
+ <catDesc>Kultur:Mode</catDesc>
+ </category>
+ <category id="topic.kultur.musik">
+ <catDesc>Kultur:Musik</catDesc>
+ </category>
+ </category>
+ <category id="topic.natur-umwelt">
+ <catDesc>Natur_Umwelt</catDesc>
+ <category id="topic.natur-umwelt.garten">
+ <catDesc>Natur_Umwelt:Garten</catDesc>
+ </category>
+ <category id="topic.natur-umwelt.tiere">
+ <catDesc>Natur_Umwelt:Tiere</catDesc>
+ </category>
+ <category id="topic.natur-umwelt.wetter-klima">
+ <catDesc>Natur_Umwelt:Wetter_Klima</catDesc>
+ </category>
+ </category>
+ <category id="topic.politik">
+ <catDesc>Politik</catDesc>
+ <category id="topic.politik.ausland">
+ <catDesc>Politik:Ausland</catDesc>
+ </category>
+ <category id="topic.politik.inland">
+ <catDesc>Politik:Inland</catDesc>
+ </category>
+ <category id="topic.politik.kommunalpolitik">
+ <catDesc>Politik:Kommunalpolitik</catDesc>
+ </category>
+ </category>
+ <category id="topic.rest">
+ <catDesc>Rest</catDesc>
+ <category id="topic.rest.boersenkurse">
+ <catDesc>Rest:boersenkurse</catDesc>
+ </category>
+ <category id="topic.rest.geburt-tod-heirat">
+ <catDesc>Rest:geburt_tod_heirat</catDesc>
+ </category>
+ <category id="topic.rest.impressum">
+ <catDesc>Rest:impressum</catDesc>
+ </category>
+ <category id="topic.rest.inhaltsverzeichnisse">
+ <catDesc>Rest:inhaltsverzeichnisse</catDesc>
+ </category>
+ <category id="topic.rest.ligatabellen">
+ <catDesc>Rest:ligatabellen</catDesc>
+ </category>
+ <category id="topic.rest.tabellen">
+ <catDesc>Rest:tabellen</catDesc>
+ </category>
+ <category id="topic.rest.veranstaltungshinweise">
+ <catDesc>Rest:veranstaltungshinweise</catDesc>
+ </category>
+ </category>
+ <category id="topic.sport">
+ <catDesc>Sport</catDesc>
+ <category id="topic.sport.ballsport">
+ <catDesc>Sport:Ballsport</catDesc>
+ </category>
+ <category id="topic.sport.fussball">
+ <catDesc>Sport:Fussball</catDesc>
+ </category>
+ <category id="topic.sport.motorsport">
+ <catDesc>Sport:Motorsport</catDesc>
+ </category>
+ <category id="topic.sport.radsport">
+ <catDesc>Sport:Radsport</catDesc>
+ </category>
+ <category id="topic.sport.tennis">
+ <catDesc>Sport:Tennis</catDesc>
+ </category>
+ <category id="topic.sport.vermischtes">
+ <catDesc>Sport:Vermischtes</catDesc>
+ </category>
+ <category id="topic.sport.wintersport">
+ <catDesc>Sport:Wintersport</catDesc>
+ </category>
+ </category>
+ <category id="topic.staat-gesellschaft">
+ <catDesc>Staat_Gesellschaft</catDesc>
+ <category id="topic.staat-gesellschaft.arbeit-und-beruf">
+ <catDesc>Staat_Gesellschaft:Arbeit_und_Beruf</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.bildung">
+ <catDesc>Staat_Gesellschaft:Bildung</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.biographien-interviews">
+ <catDesc>Staat_Gesellschaft:Biographien_Interviews</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.drittes-reich-rechtsextremismus">
+ <catDesc>Staat_Gesellschaft:Drittes_Reich_Rechtsextremismus</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.familie-geschlecht">
+ <catDesc>Staat_Gesellschaft:Familie_Geschlecht</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.kirche">
+ <catDesc>Staat_Gesellschaft:Kirche</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.recht">
+ <catDesc>Staat_Gesellschaft:Recht</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.tod">
+ <catDesc>Staat_Gesellschaft:Tod</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.verbrechen">
+ <catDesc>Staat_Gesellschaft:Verbrechen</catDesc>
+ </category>
+ </category>
+ <category id="topic.technik-industrie">
+ <catDesc>Technik_Industrie</catDesc>
+ <category id="topic.technik-industrie.edv-elektronik">
+ <catDesc>Technik_Industrie:EDV_Elektronik</catDesc>
+ </category>
+ <category id="topic.technik-industrie.kfz">
+ <catDesc>Technik_Industrie:Kfz</catDesc>
+ </category>
+ <category id="topic.technik-industrie.transport-verkehr">
+ <catDesc>Technik_Industrie:Transport_Verkehr</catDesc>
+ </category>
+ <category id="topic.technik-industrie.umweltschutz">
+ <catDesc>Technik_Industrie:Umweltschutz</catDesc>
+ </category>
+ <category id="topic.technik-industrie.unfaelle">
+ <catDesc>Technik_Industrie:Unfaelle</catDesc>
+ </category>
+ </category>
+ <category id="topic.wirtschaft-finanzen">
+ <catDesc>Wirtschaft_Finanzen</catDesc>
+ <category id="topic.wirtschaft-finanzen.banken">
+ <catDesc>Wirtschaft_Finanzen:Banken</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.bilanzen">
+ <catDesc>Wirtschaft_Finanzen:Bilanzen</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.oeffentliche-finanzen">
+ <catDesc>Wirtschaft_Finanzen:Oeffentliche_Finanzen</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.sozialprodukt">
+ <catDesc>Wirtschaft_Finanzen:Sozialprodukt</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.waehrung">
+ <catDesc>Wirtschaft_Finanzen:Waehrung</catDesc>
+ </category>
+ </category>
+ <category id="topic.wissenschaft">
+ <catDesc>Wissenschaft</catDesc>
+ <category id="topic.wissenschaft.populaerwissenschaft">
+ <catDesc>Wissenschaft:Populaerwissenschaft</catDesc>
+ </category>
+ </category>
+ <category id="topic.unklassifizierbar">
+ <catDesc>Text ist thematisch nicht klassifizierbar.</catDesc>
+ </category>
+ </taxonomy>
+ </classDecl>
+ </encodingDesc>
+ <profileDesc>
+ <langUsage>
+ <language id="de" usage="100">Deutsch</language>
+ </langUsage>
+ <textDesc/>
+ </profileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/meta_caching.t b/t/meta_caching.t
new file mode 100644
index 0000000..4ee4cb2
--- /dev/null
+++ b/t/meta_caching.t
@@ -0,0 +1,53 @@
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Mojo::Cache;
+use lib 'lib', '../lib';
+use Data::Dumper;
+
+use File::Temp qw/tmpnam/;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+use_ok('KorAP::XML::Krill');
+
+my $file = tmpnam();
+
+my $cache = Cache::FastMmap->new(
+ share_file => $file,
+ cache_size => '10m'
+);
+
+my $path = catdir(dirname(__FILE__), qw/corpus REI BNG 00128/);
+ok(my $doc = KorAP::XML::Krill->new(
+ path => $path,
+ meta_type => 'I5',
+ cache => $cache
+), 'Get doc');
+
+like($doc->path, qr!$path/!, 'Path');
+
+ok(!$cache->get('REI'), 'No REI set');
+ok(!$cache->get('REI/BNG'), 'No REI/BNG set');
+ok($doc->parse);
+ok($cache->get('REI'), 'REI set');
+ok($cache->get('REI/BNG'), 'REI/BNG set');
+
+
+# REI
+my $rei = $cache->get('REI');
+is($rei->{availability}, 'CC-BY-SA');
+is($rei->{language}, 'de');
+is($rei->{corpus_title}, 'Reden und Interviews');
+
+# REI/BNG
+my $rei_bng = $cache->get('REI/BNG');
+
+is($rei_bng->{availability}, 'CC-BY-SA');
+is($rei_bng->{language}, 'de');
+is($rei_bng->{corpus_title}, 'Reden und Interviews');
+is($rei_bng->{doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)');
+
+done_testing;
+__END__