Cleanup and additions regarding requirements of COSMAS II
Change-Id: Id55b9861838aed2978f095d18d83284b4ab1af08
diff --git a/lib/Krawfish/Meta/Cluster/EnrichGroup/Calc.pm b/lib/Krawfish/Meta/Cluster/EnrichGroup/Calc.pm
new file mode 100644
index 0000000..4f66a05
--- /dev/null
+++ b/lib/Krawfish/Meta/Cluster/EnrichGroup/Calc.pm
@@ -0,0 +1,5 @@
+# Take fields and create a new field based on calculations, like:
+# take the hit frequency and calculate the relation to the number
+# of tokens in the text.
+# This is necessary for the calculation of e.g. "Relative Häufigkeit"
+# see http://www.ids-mannheim.de/cosmas2/server/themen/haeufigkeitsmasse/#Zusammenfassung
diff --git a/lib/Krawfish/Meta/Segment/Aggregate/Length.pm b/lib/Krawfish/Meta/Segment/Aggregate/Length.pm
index 5b7fb6d..ef92dec 100644
--- a/lib/Krawfish/Meta/Segment/Aggregate/Length.pm
+++ b/lib/Krawfish/Meta/Segment/Aggregate/Length.pm
@@ -4,7 +4,7 @@
use strict;
use warnings;
-# This will check the segments length -
+# This will check the hits length in subtokens -
# currently other word lengths are not supported
# See https://en.wikipedia.org/wiki/Selection_algorithm
diff --git a/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm b/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm
index f1e8594..fe9ed04 100644
--- a/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm
+++ b/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm
@@ -3,4 +3,6 @@
# This probably requires a single list of term queries, that can be
# closed, once a match occurs.
+# Probably better suited in Group
+
__END__
diff --git a/lib/Krawfish/Meta/Segment/Aggregate/Values.pm b/lib/Krawfish/Meta/Segment/Aggregate/Values.pm
index 0f60e6c..9523887 100644
--- a/lib/Krawfish/Meta/Segment/Aggregate/Values.pm
+++ b/lib/Krawfish/Meta/Segment/Aggregate/Values.pm
@@ -11,6 +11,10 @@
# TODO:
# Support corpus classes
+# TODO:
+# This is rather a group query or better:
+# An aggregation on groups!
+
use constant {
DEBUG => 1
};
diff --git a/lib/Krawfish/Meta/Segment/Enrich/Terms.pm b/lib/Krawfish/Meta/Segment/Enrich/Terms.pm
index 527bb16..0076566 100644
--- a/lib/Krawfish/Meta/Segment/Enrich/Terms.pm
+++ b/lib/Krawfish/Meta/Segment/Enrich/Terms.pm
@@ -6,7 +6,7 @@
use warnings;
# TODO:
-# Potentially rename to ::Terms!
+# Potentially rename to ::Terms! or ::Classes!
# Enrich each match with all term ids for a specific region and
# for a specific class
diff --git a/lib/Krawfish/Meta/Segment/EnrichGroup/Values.pm b/lib/Krawfish/Meta/Segment/EnrichGroup/Values.pm
new file mode 100644
index 0000000..f94461c
--- /dev/null
+++ b/lib/Krawfish/Meta/Segment/EnrichGroup/Values.pm
@@ -0,0 +1,4 @@
+# Add per group values from fields,
+# like in a group on documents add the min and max values
+# of a field, e.g. the date span, or the total number
+# of sentences in a corpus.
diff --git a/lib/Krawfish/Meta/Group/AnnotationClasses.pm b/lib/Krawfish/Meta/Segment/Group/AnnotationClasses.pm
similarity index 100%
rename from lib/Krawfish/Meta/Group/AnnotationClasses.pm
rename to lib/Krawfish/Meta/Segment/Group/AnnotationClasses.pm
diff --git a/lib/Krawfish/Meta/Group/Characters.pm b/lib/Krawfish/Meta/Segment/Group/Characters.pm
similarity index 79%
rename from lib/Krawfish/Meta/Group/Characters.pm
rename to lib/Krawfish/Meta/Segment/Group/Characters.pm
index 9b0ed91..0594f17 100644
--- a/lib/Krawfish/Meta/Group/Characters.pm
+++ b/lib/Krawfish/Meta/Segment/Group/Characters.pm
@@ -1,8 +1,16 @@
-package Krawfish::Meta::Group::Character;
+package Krawfish::Meta::Segment::Group::Character;
use Krawfish::Log;
use strict;
use warnings;
+
+# This groups on prefix or suffixes of subterms.
+# Necessary to support "Ansicht nach Wortendungen" for example.
+# It's possible to first group on terms and then - per term,
+# request the term surface in the dictionary and group by
+# the result.
+
+
use constant DEBUG => 0;
sub new {
diff --git a/lib/Krawfish/Meta/Group/Spans.pm b/lib/Krawfish/Meta/Segment/Group/Spans.pm
similarity index 82%
rename from lib/Krawfish/Meta/Group/Spans.pm
rename to lib/Krawfish/Meta/Segment/Group/Spans.pm
index b1deda5..debaa69 100644
--- a/lib/Krawfish/Meta/Group/Spans.pm
+++ b/lib/Krawfish/Meta/Segment/Group/Spans.pm
@@ -1,10 +1,10 @@
-package Krawfish::Meta::Group::Spans;
+package Krawfish::Meta::Segment::Group::Spans;
use parent 'Krawfish::Meta';
use Krawfish::Log;
use strict;
use warnings;
-# This may be genralizable, but for the moment
+# This may be generalizable, but for the moment
# It should make it possible to group the span positions
# of a query based on a nesting query.
#
@@ -18,14 +18,14 @@
# 100: 2
#
# can be returned, where each class 1 is sliced in
-# 100 pieces and for each pieces there is a dot, in case
+# 100 pieces and for each piece there is a dot, in case
# class 2 occurs in that slice.
#
# By doing that it's easy to visualize the position of expressions
# in sentences or documents etc.
#
-# For example to answer questions like (where in documents does
-# the phrase "Herzlichen Dank" occur.
+# For example to answer questions like 'where in documents does
+# the phrase "Herzlichen Dank" occur?'
#
# If the span spans more than 1 slice, the result can be
#
diff --git a/lib/Krawfish/Meta/Group/Segment/TermExistence.pm b/lib/Krawfish/Meta/Segment/Group/TermExistence.pm
similarity index 96%
rename from lib/Krawfish/Meta/Group/Segment/TermExistence.pm
rename to lib/Krawfish/Meta/Segment/Group/TermExistence.pm
index ab9dbbc..3570166 100644
--- a/lib/Krawfish/Meta/Group/Segment/TermExistence.pm
+++ b/lib/Krawfish/Meta/Segment/Group/TermExistence.pm
@@ -1,5 +1,5 @@
-package Krawfish::Meta::Group::Segment::TermExistence;
-use parent 'Krawfish::Query';
+package Krawfish::Meta::Segment::Group::TermExistence;
+use parent 'Krawfish::Meta';
use strict;
use warnings;