Introduce object mechanism to VCs in vc conversion tool

Change-Id: If5d06a0b9b08b9c39700a0eb471a0edfb0d5daf5
diff --git a/list2vc.pl b/list2vc.pl
index 35a205a..3e31e50 100755
--- a/list2vc.pl
+++ b/list2vc.pl
@@ -1,251 +1,13 @@
 #!/usr/bin/env perl
-package KorAP::VirtualCorpus;
 use strict;
 use warnings;
-
-
-# Get or set name of the VC
-sub name {
-  my $self = shift;
-  unless (@_) {
-    return $self->{name};
-  };
-  $self->{name} = shift;
-  return $self;
-};
-
-
-# Comment
-sub comment {
-  my $self = shift;
-  unless (@_) {
-    return $self->{comment};
-  };
-  $self->{comment} //= [];
-
-  push @{$self->{comment}}, shift;
-  return $self;
-};
-
-
-# Quote utility function
-sub quote {
-  shift;
-  my $str = shift;
-  $str =~ s/(["\\])/\\$1/g;
-  return qq{"$str"};
-};
-
-
-# Escaped quote utility function
-sub equote {
-  shift;
-  my $str = shift;
-  $str =~ s/(["\\])/\\$1/g;
-  $str =~ s/(["\\])/\\$1/g;
-  return '\\"' . $str . '\\"';
-};
-
-
-sub _commentparam_to_string {
-  my $self = shift;
-  my $comment = $self->_comment_to_string;
-  if ($comment) {
-    return qq!,"comment":"$comment"!;
-  };
-  return '';
-};
-
-
-sub _comment_to_string {
-  my $self = shift;
-  if (!$self->name && !$self->comment) {
-    return '';
-  };
-
-  my $json = '';
-  $json .= 'name:' . $self->equote($self->name) if $self->name;
-  if ($self->name && $self->comment) {
-    $json .= ','
-  };
-  $json .= join(',', @{$self->{comment}}) if $self->{comment};
-
-  return $json;
-};
-
-
-# Stringify globally
-sub to_string {
-  my $self = shift;
-  ## Create collection object
-
-  my $json = '{';
-  $json .= '"@context":"http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",';
-  $json .= '"collection":';
-  $json .= $self->_to_fragment;
-  # Set at the end, when all comments are done
-  $json .= $self->_commentparam_to_string;
-  return $json .= '}';
-};
-
-
-package KorAP::VirtualCorpus::Group;
-use strict;
-use warnings;
-use base 'KorAP::VirtualCorpus';
-
-
-# Construct a new VC group
-sub new {
-  my $class = shift;
-  bless {
-    with => [],
-    with_fields => {},
-    without => [],
-    without_fields => {},
-  }, $class;
-};
-
-
-# Define an operand to be "or"ed
-sub with {
-  my $self = shift;
-  push @{$self->{with}}, shift;
-};
-
-
-# Define a field that should be "or"ed
-sub with_field {
-  my $self = shift;
-  my $field = shift;
-  push @{$self->{with_fields}->{$field}}, shift;
-};
-
-# Define an operand to be "and"ed
-sub without {
-  my $self = shift;
-  push @{$self->{without}}, shift;
-};
-
-
-# Define a field that should be "and"ed
-sub without_field {
-  my $self = shift;
-  my $field = shift;
-  push @{$self->{without_fields}->{$field}}, shift;
-};
-
-
-# VC contains only with fields
-sub only_with_fields {
-  my $self = shift;
-
-  if (keys %{$self->{without_fields}} || @{$self->{with}} || @{$self->{without}}) {
-    return 0;
-  };
-
-  return 1;
-};
-
-
-# Create a document vector field
-sub _doc_vec {
-  my $field = shift;
-  my $vec = shift;
-  my $json = '{';
-  $json .= '"@type":"koral:doc",';
-  $json .= '"key":"' . $field . '",';
-  $json .= '"match":"match:eq",';
-  $json .= '"value":[';
-  $json .= join ',', map { '"' . $_ . '"' } @$vec;
-  $json .=  ']';
-  $json .= '},';
-  return $json;
-}
-
-
-# Stringify fragment
-sub _to_fragment {
-  my $self = shift;
-
-  my $json = '{';
-  $json .= '"@type":"koral:docGroup",';
-
-  # Make the outer group "and"
-  if (keys %{$self->{without_fields}}) {
-    $json .= '"operation":"operation:and",';
-    $json .= '"operands":[';
-
-    foreach my $field (sort keys %{$self->{without_fields}}) {
-      unless (@{$self->{without_fields}->{$field}}) {
-        next;
-      };
-      $json .= _doc_vec($field, $self->{without_fields}->{$field});
-    };
-
-    # Remove the last comma
-    chop $json;
-
-    $json .= ']';
-  }
-
-  elsif (keys %{$self->{with_fields}} || @{$self->{with}}) {
-    $json .= '"operation":"operation:or",';
-
-    $json .= '"operands":[';
-
-    # Flatten embedded "or"-VCs
-    foreach my $op (@{$self->{with}}) {
-
-      # The embedded VC has only extending fields
-      if ($op->only_with_fields) {
-
-        $self->comment('embed:[' . $op->_comment_to_string . ']');
-
-        foreach my $k (keys %{$op->{with_fields}}) {
-          foreach my $v (@{$op->{with_fields}->{$k}}) {
-            $self->with_field($k, $v);
-          };
-        };
-      }
-
-      # Embed complex VC
-      else {
-        $json .= $op->_to_fragment . ',';
-      };
-    };
-
-    foreach my $field (sort keys %{$self->{with_fields}}) {
-      unless (@{$self->{with_fields}->{$field}}) {
-        next;
-      };
-      $json .= _doc_vec($field, $self->{with_fields}->{$field});
-    };
-
-    # Remove the last comma
-    chop $json;
-
-    $json .= ']';
-  }
-
-  # No operands in the group
-  else {
-    # Remove the last comma after the comment
-    chop $json;
-  };
-
-  # Set at the end, when all comments are done
-  $json .= $self->_commentparam_to_string;
-  return $json . '}';
-};
-
-
-package main;
-use strict;
-use warnings;
+use lib 'lib';
+use KorAP::VirtualCorpus::Group;
 
 # 2020-05-20
 #   Preliminary support for C2 def-files.
+# 2020-05-29
+#   Introduce optimizable object system.
 
 our @ARGV;
 
@@ -352,24 +114,24 @@
 
     # Convert C2 sigle to KorAP form
     $value =~ s!^([^/]+?/[^\.]+?)\.(.+?)$!$1\/$2!;
-    ${$vc}->with_field(textSigle => $value);
+    ${$vc}->union_field(textSigle => $value);
   }
 
   # Add doc field
   elsif ($key eq 'doc') {
-    ${$vc}->with_field(docSigle => $value);
+    ${$vc}->union_field(docSigle => $value);
   }
 
   # Add corpus field
   elsif ($key eq 'corpus') {
-    ${$vc}->with_field(corpusSigle => $value);
+    ${$vc}->union_field(corpusSigle => $value);
   }
 
   # Add corpus field
   elsif ($key eq 'cn') {
     # Korpussigle, z.B. 'F97 Frankfurter Allgemeine 1997'
     if ($value =~ m!^([^\/\s]+)(?:\s.+?)?$!) {
-      ${$vc}->with_field(corpusSigle => $1);
+      ${$vc}->union_field(corpusSigle => $1);
     };
   }
 
@@ -430,19 +192,108 @@
       next;
     };
 
-    $$vc->with($all_vcs{$value});
+    $$vc->union($all_vcs{$value}->clone->to_koral);
   }
 
+  # AND definition
+  elsif ($key eq 'and') {
+    unless (defined $all_vcs{$value}) {
+      #       warn 'VC ' . $value . ' not defined';
+      # exit(1);
+      next;
+    };
+
+    $$vc->joint($all_vcs{$value}->clone->to_koral);
+  }
+
+  # Source of the corpus
+  elsif ($key eq 'ql') {
+    # Quellenname, z.B. "Neue Zürcher Zeitung"
+    $$vc->union_field(corpusTitle => $value);
+  }
+
+  elsif ($key eq 'sub') {
+    # "Sub" is the difference - it is the "and not" operation.
+    warn $key . ' is not yet supported';
+  }
+
+  elsif ($key eq 'co') {
+    # Country,	z.B. DE für Text in Deutschland erschienen
+    warn $key . ' is not yet supported';
+  }
+
+  elsif ($key eq 'tl') {
+    # Textlength, Bereich von Texten der angegebenen Länge [in Anz. Wörtern]
+    warn $key . ' is not yet supported';
+  }
+
+  elsif ($key eq 'ts') {
+    # Textsorte, 	z.B. "Bericht"
+    warn $key . ' is not yet supported';
+  }
+
+  elsif ($key eq 'th') {
+    # Thema, z.B. "Sport - Fußball"
+    warn $key . ' is not yet supported';
+  }
+
+  elsif ($key eq 'red') {
+    # Reduktionsfaktor
+    # Wert zw. 1-99%: virt. Korpus wird auf diesen Wert
+    # reduziert. Modus: feste Reduzierung, nicht variabel.
+    warn $key . ' is not yet supported';
+  }
+
+  elsif ($key eq 'thprob') {
+    # ThemaProbability
+    # Wert, der für <th>Thema verwendet wird um zu bestimmen, ab welchem
+    # Zuverläßigkeitswert ein Thema übernommen wird
+  }
+
+
   # Add reduction value as a comment
   elsif ($key eq 'redabs') {
     # "red. Anz. Texte
     # absoluter Wert der durch Reduktion zu erzielende Anzahl Texte"
     $$vc->comment('redabs:' . $value);
+    warn $key . ' is not yet supported';
+  }
+
+  # Add reduction value as a comment
+  elsif ($key eq 'date') {
+    # Supports two pattern schemes:
+    # m1=Year1/Month1 bis Year2/Month2
+    #   Datumsbereich Schema 1: z.B. "2000/01 bis 2010/12"
+
+    # Schema 1
+    if ($value =~ m!^(?:m1\s*=\s*)?\s*(\d+)\/(\d+) bis (\d+)\/(\d+)\s*$!s) {
+      my ($y1, $m1, $y2, $m2) = ($1, $2, $3, $4);
+      if ($m1 < 10) {
+        $m1 = '0' . (0+$m1);
+      };
+      if ($m2 < 10) {
+        $m2 = '0' . (0+$m2);
+      };
+      $$vc->from($y1, $m1);
+      $$vc->to($y2, $m2);
+    }
+
+    # Scheme 2
+    elsif ($value =~ m!^\s*\d{4}-\d{4}\s+und\s+\d{1,2}-\d{1,2}\s*$!) {
+      # m2=Year1-Year2 und Month1-Month2
+      #   Datumsbereich Schema 2: z.B. "1990-2000 und 06-06"
+
+      warn 'Second date scheme not yet supported!'
+    }
+
+    else {
+      warn 'Unknown date scheme ' . $value;
+    };
   }
 
   # Unknown
   else {
-    # warn $key . ' is an unknown field';
+    warn $key . ' is an unknown field';
   };
 };