Fixed windows support
Change-Id: I042ab736bb0fc6e7dce17c330b3bc663be60cc79
diff --git a/lib/KorAP/XML/Annotation/Glemm/Morpho.pm b/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
index f96aee5..c4ccea5 100644
--- a/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/Glemm/Morpho.pm
@@ -1,5 +1,7 @@
package KorAP::XML::Annotation::Glemm::Morpho;
use KorAP::XML::Annotation::Base;
+use strict;
+use warnings;
sub parse {
my $self = shift;
@@ -9,15 +11,16 @@
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
+
my $mtt = $stream->pos($token->pos);
- my $content = $token->hash->{fs}->{f} or return;
+ my $content = $token->hash->{'fs'}->{'f'} or return;
# All interpretations
foreach (ref $content eq 'ARRAY' ? @$content : $content) {
# All features
- $content = $_->{fs}->{f};
+ $content = $_->{'fs'}->{'f'};
my $lemma;
my ($composition, $derivation) = (0,0);
diff --git a/lib/KorAP/XML/Batch/File.pm b/lib/KorAP/XML/Batch/File.pm
index 76a4a46..0f56b3c 100644
--- a/lib/KorAP/XML/Batch/File.pm
+++ b/lib/KorAP/XML/Batch/File.pm
@@ -77,7 +77,7 @@
$file = IO::Compress::Gzip->new($output, TextFlag => 1, Minimal => 1);
}
else {
- $file = IO::File->new($output, "w");
+ $file = IO::File->new($output, "w"); # '>:encoding(UTF-8)'); # "w");
};
# Write to output
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index ba99cec..f94c07b 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -15,7 +15,7 @@
use Data::Dumper;
use File::Spec::Functions qw/catdir catfile catpath splitdir splitpath rel2abs/;
-our $VERSION = '0.20';
+our $VERSION = '0.21';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
@@ -66,7 +66,6 @@
}
else {
-
# Load file
$file = b($data_xml)->slurp;
try {
@@ -109,7 +108,7 @@
$self->log->warn($unable . ': No primary data found');
return;
};
-
+
# Associate primary data
$self->{pd} = KorAP::XML::Document::Primary->new($pd);
@@ -119,7 +118,8 @@
# Parse the corpus file, the doc file,
# and the text file for meta information
foreach (0..2) {
- unshift @header, '/' . catfile(@path, 'header.xml');
+ # Removed starting '/'
+ unshift @header, catfile(@path, 'header.xml');
pop @path;
};
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index 5b9ee6f..27fcd24 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -336,7 +336,6 @@
return;
};
-
$self->log->trace(
($param{skip} ? 'Skip' : 'Add').' token data '.$param{foundry}.':'.$param{layer}
);
@@ -397,8 +396,9 @@
my $mod = 'KorAP::XML::Annotation::' . $foundry . '::' . $layer;
if ($mod->can('new') || eval("require $mod; 1;")) {
-
- if (my $retval = $mod->new($self)->parse(@_)) {
+ my $obj = $mod->new($self);
+
+ if (my $retval = $obj->parse(@_)) {
# This layer is supported
$self->support($foundry => $layer, @_);
diff --git a/lib/KorAP/XML/Tokenizer/Tokens.pm b/lib/KorAP/XML/Tokenizer/Tokens.pm
index 50d62fa..49ca7b6 100644
--- a/lib/KorAP/XML/Tokenizer/Tokens.pm
+++ b/lib/KorAP/XML/Tokenizer/Tokens.pm
@@ -3,6 +3,7 @@
use Mojo::ByteStream 'b';
use KorAP::XML::Tokenizer::Token;
use Carp qw/croak carp/;
+use File::Spec::Functions qw/catdir catfile/;
use XML::Fast;
use Try::Tiny;
@@ -13,7 +14,8 @@
sub parse {
my $self = shift;
- my $path = $self->path . $self->foundry . '/' . $self->layer . '.xml';
+ # my $path = $self->path . $self->foundry . '/' . $self->layer . '.xml';
+ my $path = catfile($self->path, $self->foundry, $self->layer . '.xml');
# Legacy data support
unless (-e $path) {
@@ -22,7 +24,7 @@
return unless -e $path;
}
elsif ($self->layer eq 'morpho' && $self->foundry eq 'glemm') {
- $path = $self->path . $self->foundry . '/glemm.xml';
+ $path = catfile($self->path, $self->foundry, 'glemm.xml');
return unless -e $path;
}
else {
@@ -34,8 +36,8 @@
# Bug workaround
if ($self->foundry eq 'glemm') {
- if (index($file, "</span\n") > 0) {
- $file =~ s!</span$!</span>!gm
+ if (index($file, "</span\n") > 0 || index($file, "</span\r") > 0) {
+ $file =~ s!</span[\n\r]!</span>\n!g;
};
};