Fixed windows support

Change-Id: I00796d75841ce2728f5bc9bd0524275bfb4f7ef2
diff --git a/.gitignore b/.gitignore
index 2aee81a..827ad4c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@
 t/corpus/BRZ13
 nytprof.out
 nytprof
+\#*#
 *.tar.gz
 *~
 *.sqlite
diff --git a/Changes b/Changes
index 140240b..f3f575a 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.21 2016-10-14
+        - Improved Windows support
+
 0.20 2016-10-15
         - Fixed treatment of temporary folders in script
 
diff --git a/lib/KorAP/XML/Annotation/Glemm/Morpho.pm b/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
index f96aee5..c4ccea5 100644
--- a/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
@@ -1,5 +1,7 @@
 package KorAP::XML::Annotation::Glemm::Morpho;
 use KorAP::XML::Annotation::Base;
+use strict;
+use warnings;
 
 sub parse {
   my $self = shift;
@@ -9,15 +11,16 @@
     layer => 'morpho',
     cb => sub {
       my ($stream, $token) = @_;
+
       my $mtt = $stream->pos($token->pos);
 
-      my $content = $token->hash->{fs}->{f} or return;
+      my $content = $token->hash->{'fs'}->{'f'} or return;
 
       # All interpretations
       foreach (ref $content eq 'ARRAY' ? @$content : $content) {
 
 	# All features
-	$content = $_->{fs}->{f};
+	$content = $_->{'fs'}->{'f'};
 
 	my $lemma;
 	my ($composition, $derivation) = (0,0);
diff --git a/lib/KorAP/XML/Batch/File.pm b/lib/KorAP/XML/Batch/File.pm
index 76a4a46..0f56b3c 100644
--- a/lib/KorAP/XML/Batch/File.pm
+++ b/lib/KorAP/XML/Batch/File.pm
@@ -77,7 +77,7 @@
       $file = IO::Compress::Gzip->new($output, TextFlag => 1, Minimal => 1);
     }
     else {
-      $file = IO::File->new($output, "w");
+      $file = IO::File->new($output, "w"); # '>:encoding(UTF-8)'); # "w");
     };
 
     # Write to output
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index ba99cec..f94c07b 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -15,7 +15,7 @@
 use Data::Dumper;
 use File::Spec::Functions qw/catdir catfile catpath splitdir splitpath rel2abs/;
 
-our $VERSION = '0.20';
+our $VERSION = '0.21';
 
 has 'path';
 has [qw/text_sigle doc_sigle corpus_sigle/];
@@ -66,7 +66,6 @@
   }
 
   else {
-
     # Load file
     $file = b($data_xml)->slurp;
     try {
@@ -109,7 +108,7 @@
     $self->log->warn($unable . ': No primary data found');
     return;
   };
-
+  
   # Associate primary data
   $self->{pd} = KorAP::XML::Document::Primary->new($pd);
 
@@ -119,7 +118,8 @@
   # Parse the corpus file, the doc file,
   # and the text file for meta information
   foreach (0..2) {
-    unshift @header, '/' . catfile(@path, 'header.xml');
+    # Removed starting '/'
+    unshift @header, catfile(@path, 'header.xml');
     pop @path;
   };
 
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index 5b9ee6f..27fcd24 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -336,7 +336,6 @@
     return;
   };
 
-
   $self->log->trace(
     ($param{skip} ? 'Skip' : 'Add').' token data '.$param{foundry}.':'.$param{layer}
   );
@@ -397,8 +396,9 @@
   my $mod = 'KorAP::XML::Annotation::' . $foundry . '::' . $layer;
 
   if ($mod->can('new') || eval("require $mod; 1;")) {
-
-    if (my $retval = $mod->new($self)->parse(@_)) {
+      my $obj = $mod->new($self);
+      
+      if (my $retval = $obj->parse(@_)) {
 
       # This layer is supported
       $self->support($foundry => $layer, @_);
diff --git a/lib/KorAP/XML/Tokenizer/Tokens.pm b/lib/KorAP/XML/Tokenizer/Tokens.pm
index 50d62fa..49ca7b6 100644
--- a/lib/KorAP/XML/Tokenizer/Tokens.pm
+++ b/lib/KorAP/XML/Tokenizer/Tokens.pm
@@ -3,6 +3,7 @@
 use Mojo::ByteStream 'b';
 use KorAP::XML::Tokenizer::Token;
 use Carp qw/croak carp/;
+use File::Spec::Functions qw/catdir catfile/;
 use XML::Fast;
 use Try::Tiny;
 
@@ -13,7 +14,8 @@
 sub parse {
   my $self = shift;
 
-  my $path = $self->path . $self->foundry . '/' . $self->layer . '.xml';
+  # my $path = $self->path . $self->foundry . '/' . $self->layer . '.xml';
+  my $path = catfile($self->path, $self->foundry, $self->layer . '.xml');
 
   # Legacy data support
   unless (-e $path) {
@@ -22,7 +24,7 @@
       return unless -e $path;
     }
     elsif ($self->layer eq 'morpho' && $self->foundry eq 'glemm') {
-      $path = $self->path . $self->foundry . '/glemm.xml';
+      $path = catfile($self->path, $self->foundry, 'glemm.xml');
       return unless -e $path;
     }
     else {
@@ -34,8 +36,8 @@
 
   # Bug workaround
   if ($self->foundry eq 'glemm') {
-    if (index($file, "</span\n") > 0) {
-      $file =~ s!</span$!</span>!gm
+    if (index($file, "</span\n") > 0 || index($file, "</span\r") > 0) {
+	$file =~ s!</span[\n\r]!</span>\n!g;
     };
   };
 
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 97a6918..72ba265 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -65,11 +65,14 @@
 # - Added MDParser#Dependency
 #
 # 2016/10/15
-# - Fixed temporary path issue in script.
+# - Fixed temporary path issue in script
+#
+# 2016/10/24
+# - Improved Windows support
 #
 # ----------------------------------------------------------
 
-our $LAST_CHANGE = '2016/10/15';
+our $LAST_CHANGE = '2016/10/24';
 our $LOCAL = $FindBin::Bin;
 our $VERSION_MSG = <<"VERSION";
 Version $KorAP::XML::Krill::VERSION - diewald\@ids-mannheim.de - $LAST_CHANGE
diff --git a/t/annotation/connexor_sentences.t b/t/annotation/connexor_sentences.t
index f03cec0..0c2891c 100644
--- a/t/annotation/connexor_sentences.t
+++ b/t/annotation/connexor_sentences.t
@@ -17,7 +17,7 @@
   path => $path . '/'
 ), 'Load Korap::Document');
 
-like($doc->path, qr!$path/$!, 'Path');
+like($doc->path, qr!\Q$path\E/$!, 'Path');
 ok($doc->parse, 'Parse document');
 
 ok($doc->primary->data, 'Primary data in existence');
diff --git a/t/annotation/mdp_dependency.t b/t/annotation/mdp_dependency.t
index 5b6858c..30a8708 100644
--- a/t/annotation/mdp_dependency.t
+++ b/t/annotation/mdp_dependency.t
@@ -20,16 +20,22 @@
 use lib 't/annotation';
 use File::Temp qw/tempdir/;
 
-use_ok('KorAP::XML::Annotation::MDParser::Dependency');
-use_ok('KorAP::XML::Archive');
-use_ok('KorAP::XML::Krill');
-use_ok('KorAP::XML::Tokenizer');
+use KorAP::XML::Archive;
 
 my $name = 'wpd15-single';
 my @path = (dirname(__FILE__), '..', 'corpus','archives');
 
 my $file = catfile(@path, $name . '.zip');
-ok(my $archive = KorAP::XML::Archive->new($file), 'Create archive');
+my $archive = KorAP::XML::Archive->new($file);
+
+unless ($archive->test_unzip) {
+  plan skip_all => 'unzip not found';
+};
+
+use_ok('KorAP::XML::Annotation::MDParser::Dependency');
+use_ok('KorAP::XML::Krill');
+use_ok('KorAP::XML::Tokenizer');
+
 
 ok($archive->attach('#' . catfile(@path, $name . '.mdparser.zip')), 'Attach mdparser archive');
 
diff --git a/t/annotation/meta.t b/t/annotation/meta.t
index f641ccf..d4773d7 100644
--- a/t/annotation/meta.t
+++ b/t/annotation/meta.t
@@ -16,7 +16,7 @@
 
 ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
 ok($doc->parse, 'Parse document');
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 # Metdata
 is($doc->text_sigle, 'Corpus/Doc/0001', 'ID-text');
diff --git a/t/annotation/primary.t b/t/annotation/primary.t
index 32431d3..1bc23a0 100644
--- a/t/annotation/primary.t
+++ b/t/annotation/primary.t
@@ -15,7 +15,7 @@
 
 ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::XML::Krill');
 ok($doc->parse, 'Parse document');
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 is($doc->primary->data,
    'Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, '.
diff --git a/t/archive.t b/t/archive.t
index 9d2ce68..3d40549 100644
--- a/t/archive.t
+++ b/t/archive.t
@@ -6,7 +6,7 @@
 use File::Spec::Functions qw/catfile catdir/;
 use File::Temp qw/tempdir/;
 
-use_ok('KorAP::XML::Archive');
+use KorAP::XML::Archive;
 
 my $file = catfile(dirname(__FILE__), 'corpus','archive.zip');
 my $archive = KorAP::XML::Archive->new($file);
diff --git a/t/batch_file.t b/t/batch_file.t
index 947e1ef..50b59a8 100644
--- a/t/batch_file.t
+++ b/t/batch_file.t
@@ -112,7 +112,7 @@
 ok($bf->process($path => $output), 'Process file');
 ok(-f $output, 'File exists');
 ok($file = slurp $output, 'Slurp data');
-like($file, qr/^\{\n\s+"/, 'No pretty printing');
+like($file, qr/^\{[\n\s]+"/, 'No pretty printing');
 
 # Check overwriting
 $bf->{overwrite} = 0;
diff --git a/t/meta.t b/t/meta.t
index 78be93c..a775cbd 100644
--- a/t/meta.t
+++ b/t/meta.t
@@ -16,10 +16,10 @@
 # WPD/00001
 my $path = catdir(dirname(__FILE__), 'corpus/WPD/00001');
 ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
-like($doc->path, qr!$path/$!, 'Path');
+like($doc->path, qr!\Q$path\E/$!, 'Path');
 
 ok($doc->parse, 'Parse document');
 
@@ -28,6 +28,7 @@
 
 my $meta = $doc->meta;
 is($meta->{title}, 'A', 'title');
+
 ok(!$meta->{sub_title}, 'subTitle');
 is($doc->corpus_sigle, 'WPD', 'corpusID');
 is($meta->{pub_date}, '20050328', 'pubDate');
@@ -39,6 +40,7 @@
 ok(!$meta->{text_class}->[4], 'TextClass');
 is($meta->{author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
 
+
 #is($doc->author->[0], 'Ruru', 'author');
 #is($doc->author->[1], 'Jens.Ol', 'author');
 #is($doc->author->[2], 'Aglarech', 'author');
@@ -286,10 +288,10 @@
 # Multipath headers
 $path = catdir(dirname(__FILE__), 'corpus/VDI/JAN/00001');
 ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
-like($doc->path, qr!$path/$!, 'Path');
+like($doc->path, qr!\Q$path\E/$!, 'Path');
 
 ok($doc->parse, 'Parse document');
 $meta = $doc->meta;
@@ -339,7 +341,7 @@
 # WDD
 $path = catdir(dirname(__FILE__), 'corpus/WDD/G27/38989');
 ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 ok($doc->parse, 'Parse document');
 $meta = $doc->meta;
 
diff --git a/t/meta_caching.t b/t/meta_caching.t
index 4ee4cb2..a0787fa 100644
--- a/t/meta_caching.t
+++ b/t/meta_caching.t
@@ -26,7 +26,7 @@
   cache => $cache
 ), 'Get doc');
 
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 ok(!$cache->get('REI'), 'No REI set');
 ok(!$cache->get('REI/BNG'), 'No REI/BNG set');
diff --git a/t/multiple_archives.t b/t/multiple_archives.t
index fbe14e3..7865101 100644
--- a/t/multiple_archives.t
+++ b/t/multiple_archives.t
@@ -6,7 +6,7 @@
 use File::Spec::Functions qw/catfile catdir/;
 use File::Temp qw/tempdir/;
 
-use_ok('KorAP::XML::Archive');
+use KorAP::XML::Archive;
 
 my $name = 'wpd15-single';
 my @path = (dirname(__FILE__), 'corpus','archives');
@@ -15,7 +15,7 @@
 my $archive = KorAP::XML::Archive->new($file);
 
 unless ($archive->test_unzip) {
-  plan skip_all => 'unzip not found';
+    plan skip_all => 'unzip not found';
 };
 
 ok($archive->test, 'Test archive');
diff --git a/t/real/bzk.t b/t/real/bzk.t
index 76d01ca..90a0c58 100644
--- a/t/real/bzk.t
+++ b/t/real/bzk.t
@@ -3,6 +3,7 @@
 use Test::More;
 use Data::Dumper;
 use JSON::XS;
+use Log::Log4perl;
 
 use Benchmark qw/:hireswallclock/;
 
@@ -11,12 +12,19 @@
 use utf8;
 use lib 'lib', '../lib';
 
+#Log::Log4perl->init({
+#  'log4perl.rootLogger' => 'DEBUG, STDERR',
+#  'log4perl.appender.STDERR' => 'Log::Log4perl::Appender::ScreenColoredLevels',
+#  'log4perl.appender.STDERR.layout' => 'PatternLayout',
+#  'log4perl.appender.STDERR.layout.ConversionPattern' => '[%r] %F %L %c - %m%n'
+#});
+
 use File::Basename 'dirname';
 use File::Spec::Functions 'catdir';
 
 use_ok('KorAP::XML::Krill');
 
-my $path = catdir(dirname(__FILE__), '../corpus/BZK/D59/00001');
+my $path = catdir(dirname(__FILE__), '..', 'corpus', 'BZK', 'D59', '00001');
 
 ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
 ok($doc->parse, 'Parse document');
diff --git a/t/script/archive.t b/t/script/archive.t
index 308414a..d797c20 100644
--- a/t/script/archive.t
+++ b/t/script/archive.t
@@ -10,6 +10,7 @@
 use Test::More;
 use Test::Output qw/:stdout :stderr :functions/;
 use Data::Dumper;
+use KorAP::XML::Archive;
 use utf8;
 
 my $f = dirname(__FILE__);
@@ -21,6 +22,10 @@
   'archive'
 );
 
+unless (KorAP::XML::Archive::test_unzip) {
+  plan skip_all => 'unzip not found';
+};
+
 # Test without parameters
 stdout_like(
   sub {
diff --git a/t/script/extract.t b/t/script/extract.t
index 2ea1e13..8e7cdfd 100644
--- a/t/script/extract.t
+++ b/t/script/extract.t
@@ -10,6 +10,7 @@
 use Test::More;
 use Test::Output;
 use Data::Dumper;
+use KorAP::XML::Archive;
 use utf8;
 
 my $f = dirname(__FILE__);
@@ -21,6 +22,10 @@
   'extract'
 );
 
+unless (KorAP::XML::Archive::test_unzip) {
+  plan skip_all => 'unzip not found';
+};
+
 # Test without parameters
 stdout_like(
   sub {
diff --git a/t/script/single.t b/t/script/single.t
index 9d8d28f..40b7940 100644
--- a/t/script/single.t
+++ b/t/script/single.t
@@ -72,6 +72,7 @@
 while ($gz->read($buffer)) {
   $file .= $buffer;
 };
+$gz->close;
 
 ok($json = decode_json($file), 'decode json');
 
@@ -83,8 +84,8 @@
 is($json->{data}->{stream}->[0]->[0], '-:base/paragraphs$<i>1', 'Paragraphs');
 
 # Delete output
-unlink $output;
-ok(!-f $output, 'Output does not exist');
+is(unlink($output), 1, 'Unlink successful');
+ok(!-e $output, 'Output does not exist');
 
 # Use a different token source and skip all annotations,
 # except for DeReKo#Structure and Mate#Dependency
diff --git a/t/sgbr/meta.t b/t/sgbr/meta.t
index 91b0d8d..20b5ad8 100644
--- a/t/sgbr/meta.t
+++ b/t/sgbr/meta.t
@@ -17,7 +17,7 @@
 
 ok($doc->parse, 'Parse document');
 
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 # Metdata
 is($doc->text_sigle, 'TEST/BSP/1', 'ID-text');
diff --git a/t/sgbr/meta_duden.t b/t/sgbr/meta_duden.t
index a375307..14f9746 100644
--- a/t/sgbr/meta_duden.t
+++ b/t/sgbr/meta_duden.t
@@ -17,7 +17,7 @@
 
 ok($doc->parse, 'Parse document');
 
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 # Metdata
 is($doc->text_sigle, 'PRO-DUD/BSP-2013-01/32', 'ID-text');
diff --git a/t/sgbr/meta_ids.t b/t/sgbr/meta_ids.t
index 5c9c628..8a4cc4e 100644
--- a/t/sgbr/meta_ids.t
+++ b/t/sgbr/meta_ids.t
@@ -17,7 +17,7 @@
 
 ok($doc->parse, 'Parse document');
 
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 # Metdata
 is($doc->text_sigle, 'CMC-TSK/2014-09/2843', 'ID-text');
@@ -85,7 +85,7 @@
 
 ok($doc->parse, 'Parse document');
 
-like($doc->path, qr!$path/!, 'Path');
+like($doc->path, qr!\Q$path\E/!, 'Path');
 
 # Metdata
 is($doc->text_sigle, 'CMC-TSK/2014-09/3401', 'ID-text');
diff --git a/t/transform.t b/t/transform.t
index 522b267..798e6c2 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -70,10 +70,10 @@
 
 my $path = catdir(dirname(__FILE__), 'corpus/WPD/00001');
 ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
-like($doc->path, qr!$path/$!, 'Path');
+like($doc->path, qr!\Q$path\E/$!, 'Path');
 
 ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
-like($doc->path, qr!$path/$!, 'Path');
+like($doc->path, qr!\Q$path\E/$!, 'Path');
 
 ok($doc->parse, 'Parse document');
 
@@ -106,7 +106,7 @@
 ), 'New Tokenizer');
 ok($tokens->parse, 'Parse');
 
-like($tokens->path, qr!$path/$!, 'Path');
+like($tokens->path, qr!\Q$path\E/$!, 'Path');
 is($tokens->foundry, 'OpenNLP', 'Foundry');
 is($tokens->doc->text_sigle, 'WPD/AAA/00001', 'Doc id');
 is($tokens->should, 1068, 'Should');