Cleanup and additions regarding requirements of COSMAS II Change-Id: Id55b9861838aed2978f095d18d83284b4ab1af08

commit: 31e088b2db9300f4132e02bd4a774a004199a3d5 [log] [tgz]
author: Akron <nils@diewald-online.de> Fri Sep 29 14:48:49 2017 +0200
committer: Akron <nils@diewald-online.de> Fri Sep 29 14:48:49 2017 +0200
tree: 8cf94a18ea590815aa49ea797c6492c1e82bdb90
parent: bc287e5fc3c59ee42a61df60246bb9606b6faf83 [diff]
diff --git a/lib/Krawfish/Meta/Cluster/EnrichGroup/Calc.pm b/lib/Krawfish/Meta/Cluster/EnrichGroup/Calc.pm
new file mode 100644
index 0000000..4f66a05
--- /dev/null
+++ b/lib/Krawfish/Meta/Cluster/EnrichGroup/Calc.pm

@@ -0,0 +1,5 @@
+# Take fields and create a new field based on calculations, like:
+# take the hit frequency and calculate the relation to the number
+# of tokens in the text.
+# This is necessary for the calculation of e.g. "Relative Häufigkeit"
+# see http://www.ids-mannheim.de/cosmas2/server/themen/haeufigkeitsmasse/#Zusammenfassung

diff --git a/lib/Krawfish/Meta/Segment/Aggregate/Length.pm b/lib/Krawfish/Meta/Segment/Aggregate/Length.pm
index 5b7fb6d..ef92dec 100644
--- a/lib/Krawfish/Meta/Segment/Aggregate/Length.pm
+++ b/lib/Krawfish/Meta/Segment/Aggregate/Length.pm

@@ -4,7 +4,7 @@
 use strict;
 use warnings;
 
-# This will check the segments length -
+# This will check the hits length in subtokens -
 # currently other word lengths are not supported
 
 # See https://en.wikipedia.org/wiki/Selection_algorithm

diff --git a/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm b/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm
index f1e8594..fe9ed04 100644
--- a/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm
+++ b/lib/Krawfish/Meta/Segment/Aggregate/TermExistence.pm

@@ -3,4 +3,6 @@
 # This probably requires a single list of term queries, that can be
 # closed, once a match occurs.
 
+# Probably better suited in Group
+
 __END__

diff --git a/lib/Krawfish/Meta/Segment/Aggregate/Values.pm b/lib/Krawfish/Meta/Segment/Aggregate/Values.pm
index 0f60e6c..9523887 100644
--- a/lib/Krawfish/Meta/Segment/Aggregate/Values.pm
+++ b/lib/Krawfish/Meta/Segment/Aggregate/Values.pm

@@ -11,6 +11,10 @@
 # TODO:
 #   Support corpus classes
 
+# TODO:
+#   This is rather a group query or better:
+#   An aggregation on groups!
+
 use constant {
   DEBUG          => 1
 };

diff --git a/lib/Krawfish/Meta/Segment/Enrich/Terms.pm b/lib/Krawfish/Meta/Segment/Enrich/Terms.pm
index 527bb16..0076566 100644
--- a/lib/Krawfish/Meta/Segment/Enrich/Terms.pm
+++ b/lib/Krawfish/Meta/Segment/Enrich/Terms.pm

@@ -6,7 +6,7 @@
 use warnings;
 
 # TODO:
-#   Potentially rename to ::Terms!
+#   Potentially rename to ::Terms! or ::Classes!
 
 # Enrich each match with all term ids for a specific region and
 # for a specific class

diff --git a/lib/Krawfish/Meta/Segment/EnrichGroup/Values.pm b/lib/Krawfish/Meta/Segment/EnrichGroup/Values.pm
new file mode 100644
index 0000000..f94461c
--- /dev/null
+++ b/lib/Krawfish/Meta/Segment/EnrichGroup/Values.pm

@@ -0,0 +1,4 @@
+# Add per group values from fields,
+# like in a group on documents add the min and max values
+# of a field, e.g. the date span, or the total number
+# of sentences in a corpus.

diff --git a/lib/Krawfish/Meta/Group/AnnotationClasses.pm b/lib/Krawfish/Meta/Segment/Group/AnnotationClasses.pm
similarity index 100%
rename from lib/Krawfish/Meta/Group/AnnotationClasses.pm
rename to lib/Krawfish/Meta/Segment/Group/AnnotationClasses.pm


diff --git a/lib/Krawfish/Meta/Group/Characters.pm b/lib/Krawfish/Meta/Segment/Group/Characters.pm
similarity index 79%
rename from lib/Krawfish/Meta/Group/Characters.pm
rename to lib/Krawfish/Meta/Segment/Group/Characters.pm
index 9b0ed91..0594f17 100644
--- a/lib/Krawfish/Meta/Group/Characters.pm
+++ b/lib/Krawfish/Meta/Segment/Group/Characters.pm

@@ -1,8 +1,16 @@
-package Krawfish::Meta::Group::Character;
+package Krawfish::Meta::Segment::Group::Character;
 use Krawfish::Log;
 use strict;
 use warnings;
 
+
+# This groups on prefix or suffixes of subterms.
+# Necessary to support "Ansicht nach Wortendungen" for example.
+# It's possible to first group on terms and then - per term,
+# request the term surface in the dictionary and group by
+# the result.
+
+
 use constant DEBUG => 0;
 
 sub new {

diff --git a/lib/Krawfish/Meta/Group/Spans.pm b/lib/Krawfish/Meta/Segment/Group/Spans.pm
similarity index 82%
rename from lib/Krawfish/Meta/Group/Spans.pm
rename to lib/Krawfish/Meta/Segment/Group/Spans.pm
index b1deda5..debaa69 100644
--- a/lib/Krawfish/Meta/Group/Spans.pm
+++ b/lib/Krawfish/Meta/Segment/Group/Spans.pm

@@ -1,10 +1,10 @@
-package Krawfish::Meta::Group::Spans;
+package Krawfish::Meta::Segment::Group::Spans;
 use parent 'Krawfish::Meta';
 use Krawfish::Log;
 use strict;
 use warnings;
 
-# This may be genralizable, but for the moment
+# This may be generalizable, but for the moment
 # It should make it possible to group the span positions
 # of a query based on a nesting query.
 #
@@ -18,14 +18,14 @@
 #   100: 2
 #
 # can be returned, where each class 1 is sliced in
-# 100 pieces and for each pieces there is a dot, in case
+# 100 pieces and for each piece there is a dot, in case
 # class 2 occurs in that slice.
 #
 # By doing that it's easy to visualize the position of expressions
 # in sentences or documents etc.
 #
-# For example to answer questions like (where in documents does
-# the phrase "Herzlichen Dank" occur.
+# For example to answer questions like 'where in documents does
+# the phrase "Herzlichen Dank" occur?'
 #
 # If the span spans more than 1 slice, the result can be
 #

diff --git a/lib/Krawfish/Meta/Group/Segment/TermExistence.pm b/lib/Krawfish/Meta/Segment/Group/TermExistence.pm
similarity index 96%
rename from lib/Krawfish/Meta/Group/Segment/TermExistence.pm
rename to lib/Krawfish/Meta/Segment/Group/TermExistence.pm
index ab9dbbc..3570166 100644
--- a/lib/Krawfish/Meta/Group/Segment/TermExistence.pm
+++ b/lib/Krawfish/Meta/Segment/Group/TermExistence.pm

@@ -1,5 +1,5 @@
-package Krawfish::Meta::Group::Segment::TermExistence;
-use parent 'Krawfish::Query';
+package Krawfish::Meta::Segment::Group::TermExistence;
+use parent 'Krawfish::Meta';
 use strict;
 use warnings;
commit	31e088b2db9300f4132e02bd4a774a004199a3d5	[log] [tgz]
author	Akron <nils@diewald-online.de>	Fri Sep 29 14:48:49 2017 +0200
committer	Akron <nils@diewald-online.de>	Fri Sep 29 14:48:49 2017 +0200
tree	8cf94a18ea590815aa49ea797c6492c1e82bdb90
parent	bc287e5fc3c59ee42a61df60246bb9606b6faf83 [diff]