blob: 3ff7978e81af50175ebfa22f77a1c2a9ad17b3c9 [file] [log] [blame]
Akronb5809f42017-05-03 01:26:08 +02001package Krawfish::Controller::Corpus;
2use Mojo::Base 'Mojolicious::Controller';
3use Mojo::ByteStream 'b';
4
5use Krawfish::Koral::Corpus::Builder;
6use Krawfish::Koral::Meta;
7
8use strict;
9use warnings;
10
11sub corpus {
12 my $c = shift;
13 my $v = $c->validation;
14 $v->optional('fields');
15 $v->optional('count');
16 $v->optional('page');
17 $v->optional('sortBy');
18
19 my $corpus_id = $c->stash('corpus_id');
20
21 my $koral = Krawfish::Koral->new;
22
23 # set corpus
24 $koral->corpus(
25 $koral->corpus_builder->string('corpus_id' => $corpus_id)
26 );
27
28 my $meta = $koral->meta_builder;
29 $meta->items_per_page($v->param('count'));
30 $meta->start_index($v->param('page')); # TODO!
31 # if ($v->param('sortBy')) {
32 # $meta->field_sort()
33 # };
34 # etc.
35
36 my $fields = b($v->param('fields'))->split(',')->uniq->to_array;
37 if ($fields->[0]) {
38 $meta->fields($fields);
39 };
40
41 # Set meta
42 $koral->meta($meta);
43
44 # Get segment index
45 my $index = $c->index->segment;
46
47 # Prepare query on index
48 $c->render(json => $koral->to_result($index));
49};
50
Akron61e8bce2017-05-24 15:55:27 +020051
52# Get information per text
53sub text {
54 my $self = shift;
55
56 my $koral = Krawfish::Koral->new;
57 my $meta = $koral->meta_builder;
58
59 my $v = $c->validation;
60 $v->optional('fields');
61
62
63 # Get the text sigle from the stash
64 my $corpus_id = $c->stash('corpus_id');
65 my $doc_id = $c->stash('doc_id');
66 my $text_id = $c->stash('text_id');
67
68 my $sigle = join('/', $corpus_id, $doc_id, $text_id);
69
70 # Set corpus
71 $koral->corpus(
72 $koral->corpus_builder->string('text_sigle' => $text_sigle)
73 );
74
75 # Get the field information
76 my $fields = b($v->param('fields'))->split(',')->uniq->to_array;
77 if ($fields->[0]) {
78 $meta->fields($fields);
79 };
80
81 # Limit to a single match
82 $meta->limit(1);
83
84 # Set meta
85 $koral->meta($meta);
86
87 # Get segment index
88 my $index = $c->index->segment;
89
90 # Prepare query on index
91 $c->render(json => $koral->to_result($index));
92};
93
94
Akronc4bf5fb2017-07-18 02:20:40 +020095# Get a virtual corpus and a list of terms -
96# returns the frequency per term in the virtual corpus
97# (potentially per corpus class)
98sub frequencies {
99 my $c = shift;
100
101 # This is a very important endpoint as it is used for
102 # statistics on a virtual corpus (number of sentences in a corpus)
Akronfe62a8e2017-08-11 14:08:26 +0200103 # as well as for co-occurrence search and potentially systems like glemm.
Akronc4bf5fb2017-07-18 02:20:40 +0200104 #
105 # Beside terms, this also support the frequency count of tokens
106 # for certain foundries.
107 #
108 # This uses Result::Aggregate::TermFreq and
109 # Result::Aggregate::TokenFreq
Akronfe62a8e2017-08-11 14:08:26 +0200110 #
111 # It may be beneficial to sort terms in advance to use the
112 # potentially faster collection() API in dict. In this case,
113 # a flag may need to be provided, marking the parameter list as "sorted".
Akronc4bf5fb2017-07-18 02:20:40 +0200114};
115
Akronb5809f42017-05-03 01:26:08 +02001161;