Support regex definition for virtual corpora
Change-Id: Iecf55d050f02b019c2591f100cd4d45cb90488a7
diff --git a/t/regex2vc.t b/t/regex2vc.t
new file mode 100644
index 0000000..fbdb721
--- /dev/null
+++ b/t/regex2vc.t
@@ -0,0 +1,59 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::More;
+use KorAP::DefList;
+use Mojo::JSON 'decode_json';
+
+my $rf = \&KorAP::DefList::from_regex;
+
+sub _collection {
+ return decode_json($rf->(@_)->to_string)->{collection}
+}
+
+my $doc = _collection("x",'Name','Beschreibung');
+is($doc->{key},'corpusSigle');
+is($doc->{value},'x');
+is($doc->{type},'type:string');
+is($doc->{comment},'name:"Name",desc:"Beschreibung"');
+
+$doc = _collection("x/");
+is($doc->{key},'corpusSigle');
+is($doc->{value},'x');
+is($doc->{type},'type:string');
+
+$doc = _collection("x[0-3]",'Na"me','Besch"re\'ibung');
+is($doc->{key},'corpusSigle');
+is($doc->{value},'x[0-3]');
+is($doc->{type},'type:regex');
+is($doc->{comment},'name:"Na\"me",desc:"Besch\"re\'ibung"');
+
+$doc = _collection('x[0-3]/');
+is($doc->{key},'corpusSigle');
+is($doc->{value},'x[0-3]');
+is($doc->{type},'type:regex');
+
+$doc = _collection('BIO/(BKA|LTI|TK1|TK2|TK3|TK4|TK5|TK6)');
+is($doc->{key},'docSigle');
+is($doc->{value},'BIO/(BKA|LTI|TK1|TK2|TK3|TK4|TK5|TK6)');
+is($doc->{type},'type:regex');
+
+$doc = _collection('(GOE/(AGD|AGM|AGN|AGV|AGW))|(MK1/(LBC|LBT|LFH|LGB|LJA|LMB|LSO|MHE|TJM|TPM))|(MK2/TRI)');
+is($doc->{key},'docSigle');
+is($doc->{value},'(GOE/(AGD|AGM|AGN|AGV|AGW))|(MK1/(LBC|LBT|LFH|LGB|LJA|LMB|LSO|MHE|TJM|TPM))|(MK2/TRI)');
+is($doc->{type},'type:regex');
+
+$doc = _collection('FSP/~FSP/(ANG|ANR|EIN|GEB|KAR|REI|SCH|TYP|VER|VID)','fsp-pub','Fachsprachenkorpus');
+
+is($doc->{'@type'},'koral:docGroup');
+is($doc->{'operation'},'operation:and');
+is($doc->{operands}->[0]->{type},'type:string');
+is($doc->{operands}->[0]->{match},'match:eq');
+is($doc->{operands}->[0]->{key},'corpusSigle');
+is($doc->{operands}->[0]->{value},'FSP');
+is($doc->{operands}->[1]->{type},'type:regex');
+is($doc->{operands}->[1]->{match},'match:ne');
+is($doc->{operands}->[1]->{key},'docSigle');
+is($doc->{operands}->[1]->{value},'FSP/(ANG|ANR|EIN|GEB|KAR|REI|SCH|TYP|VER|VID)');
+
+done_testing;