Move get_file() as a function to Krill.pm
Change-Id: Ia61423c62d227333d4bde59bb902aba8a1f2fd1b
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index 2dfa32e..58d8c05 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -15,7 +15,7 @@
use File::Spec::Functions qw/catdir catfile catpath splitdir splitpath rel2abs/;
use Exporter 'import';
-our @EXPORT_OK = qw(get_file_name_from_glob);
+our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
our $VERSION = '0.41';
@@ -316,6 +316,28 @@
};
+# Get file name based on path information
+sub get_file_name ($$) {
+ my $i = shift;
+
+ # Check if the base dir is a directory
+ if (-d $i) {
+
+ # Remove following slashes
+ $i =~ s![^\/]+$!!;
+ };
+ my $file = shift;
+
+ # Remove temp dir fragments
+ $file =~ s!^/?tmp/[^/]+!!;
+ $file =~ s/^?\/?$i//;
+ $file =~ tr/\//-/;
+ $file =~ s{^-+}{};
+ $file =~ s/^.*?-(.+?-.+?-.+?)$/$1/; # shorten
+ return $file;
+};
+
+
1;
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 0138423..7d246ec 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -12,7 +12,7 @@
use Pod::Usage;
use Cache::FastMmap;
use Directory::Iterator;
-use KorAP::XML::Krill qw!get_file_name_from_glob!;
+use KorAP::XML::Krill qw!get_file_name get_file_name_from_glob!;
use KorAP::XML::Archive;
use KorAP::XML::Tokenizer;
use KorAP::XML::Batch::File;
@@ -162,7 +162,7 @@
VERSION
# Prototypes
-sub get_file_name($);
+sub get_file_name($$);
# Parse comand
my $cmd;
@@ -614,24 +614,6 @@
non_verbal_tokens => $non_verbal_tokens
);
-# Get file name based on path information
-sub get_file_name ($) {
- my $i = $input[0];
- if (-d $i) {
- $i =~ s![^\/]+$!!;
- };
- my $file = shift;
-
- # Remove temp dir fragments
- $file =~ s!^/?tmp/[^/]+!!;
- $file =~ s/^?\/?$i//;
- $file =~ tr/\//-/;
- $file =~ s{^-+}{};
- $file =~ s/^.*?-(.+?-.+?-.+?)$/$1/;
- return $file;
-};
-
-
# Convert sigle to path construct
s!^\s*([^_]+?)_([^\.]+?)\.(.+?)\s*$!$1/$2/$3! foreach @sigle;
@@ -897,7 +879,7 @@
my $filename = catfile(
$output_dir,
- get_file_name($dirs[$i]) . '.json' . ($gzip ? '.gz' : '')
+ get_file_name($input[0], $dirs[$i]) . '.json' . ($gzip ? '.gz' : '')
);
# Get the next fork
@@ -947,6 +929,7 @@
my $filename = catfile(
$output_dir,
get_file_name(
+ $input[0],
catfile($corpus, $doc, $text)
. '.json' . ($gzip ? '.gz' : '')
)