blob: a41550bd464fc51428f8f42418e13ae353994461 [file] [log] [blame]
Nils Diewald7148c6f2015-05-04 15:07:53 +00001package Kalamar::API;
Nils Diewald996aa552014-12-02 03:26:44 +00002use Mojo::Base 'Mojolicious::Plugin';
Nils Diewald791b5902014-12-04 04:47:24 +00003use Scalar::Util qw/blessed weaken/;
Nils Diewald996aa552014-12-02 03:26:44 +00004use strict;
5use warnings;
6
7# KorAP Search engine for Mojolicious::Plugin::Search
8
Nils Diewald7148c6f2015-05-04 15:07:53 +00009# TODO: Add fixtures
10# TODO: Support search in corpus and virtualcollection
11# TODO: Support caching everywhere!
12# TODO: Correct use of stash info everywhere!
Nils Diewald996aa552014-12-02 03:26:44 +000013
14# Register the plugin
15sub register {
16 my ($plugin, $mojo, $index_class, $param) = @_;
17 $param ||= {};
18
19 # Add attributes to the index class
20 $index_class->attr(api => $param->{api});
21 $index_class->attr([qw/cutoff
22 query_language
23 time_exceeded
24 api_request
25 _api_cache
26 api_response
27 benchmark
Akron9cc3eaf2015-06-10 22:15:52 +020028 query_jsonld
29 collection_jsonld/]);
Nils Diewald996aa552014-12-02 03:26:44 +000030 $index_class->attr(no_cache => 0);
31};
32
33
34# Search the index
35sub search {
36 my $self = shift;
37 my $index = shift;
38
Nils Diewald8f4b5da2014-12-03 22:13:39 +000039 # Get controller
Nils Diewald996aa552014-12-02 03:26:44 +000040 my $c = $index->controller;
41
Nils Diewald996aa552014-12-02 03:26:44 +000042 # If there is a callback, do async
43 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
44
Nils Diewald8f4b5da2014-12-03 22:13:39 +000045 # No query defined
46 unless ($index->query) {
47 return $cb->($index) if $cb;
48 return;
49 };
Nils Diewald996aa552014-12-02 03:26:44 +000050
Nils Diewald8f4b5da2014-12-03 22:13:39 +000051 # Get query url
52 my $url = _query_url($index, @_);
Nils Diewald996aa552014-12-02 03:26:44 +000053
54 # Cache based on URL
55 $index->_api_cache('total-' . $url->to_string);
Nils Diewald8f4b5da2014-12-03 22:13:39 +000056 my %param = @_;
Nils Diewald996aa552014-12-02 03:26:44 +000057
58 # Set context based on parameter
Nils Diewald8f4b5da2014-12-03 22:13:39 +000059 $url->query({ context => $param{'context'} // 'paragraph' });
60
61 # Set path to search
62 $url->path('search');
Nils Diewald996aa552014-12-02 03:26:44 +000063
64 # Check cache for total results
65 my $total_results;
66
67 if (!$index->no_cache &&
68 defined ($total_results = $c->chi->get($index->_api_cache))) {
69
70 # Set total results from cache
71 $index->total_results($total_results);
72 $c->app->log->debug('Get total result from cache');
73
74 # Set cutoff unless already set
75 $url->query({cutoff => 'true'}) unless defined $index->cutoff;
76 };
77
78 # Set api request for debugging
79 $index->api_request($url->to_string);
80
81 # Create new user agent and set timeout to 2 minutes
Nils Diewald87507832015-05-01 23:36:41 +000082 my $ua = $c->ua;
Nils Diewald996aa552014-12-02 03:26:44 +000083 $ua->inactivity_timeout(120);
84
Nils Diewald8f4b5da2014-12-03 22:13:39 +000085 # Debugging
Nils Diewald996aa552014-12-02 03:26:44 +000086 $c->app->log->debug('Search for ' . $index->api_request);
87
88 # Search non-blocking
89 if ($cb) {
90
Nils Diewald996aa552014-12-02 03:26:44 +000091 $ua->get(
Nils Diewald8f4b5da2014-12-03 22:13:39 +000092 $url => sub {
93 my $tx = pop;
94 $self->_process_response('matches', $index, $tx);
Nils Diewald791b5902014-12-04 04:47:24 +000095 weaken $index;
Nils Diewald996aa552014-12-02 03:26:44 +000096 return $cb->($index);
97 });
Nils Diewald996aa552014-12-02 03:26:44 +000098 }
Nils Diewald87507832015-05-01 23:36:41 +000099
Nils Diewald996aa552014-12-02 03:26:44 +0000100 # Search blocking
101 else {
102 my $tx = $ua->get($url);
Nils Diewald034ea702015-01-16 19:41:52 +0000103 $self->_process_response('matches', $index, $tx);
104 return $index;
Nils Diewald996aa552014-12-02 03:26:44 +0000105 };
106};
107
108
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000109# Trace query serialization
110sub trace {
111 my $self = shift;
112 my $index = shift;
113
114 # Get controller
115 my $c = $index->controller;
116
117 # If there is a callback, do async
118 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
119
120 my %param = @_;
121
122 # No query defined
123 unless ($index->query(delete $param{query})) {
124 return $cb->($index) if $cb;
125 return;
126 };
127
128 # Get query url
129 my $url = _query_url($index, @_);
130
131 $url->path('search');
132
133 # Create new user agent and set timeout to 30 seconds
134 my $ua = $c->ua; # Mojo::UserAgent->new;
135 $ua->inactivity_timeout(30);
136
137 # Build transaction
138 my $tx = $ua->build_tx(TRACE => $url);
139
140 # non-blocking
141 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000142 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000143
144 # Trace non-blocking
145 $ua->start(
146 $tx => sub {
147 $self->_process_response('trace', $index, pop);
148 return $cb->($index);
149 });
150 }
151 # Trace blocking
152 else {
153 my $tx = $ua->start($url);
154 return $self->_process_response('trace', $index, $tx);
155 };
156};
157
158
159# Get match info
160sub match {
161 my $self = shift;
162 my $index = shift;
163
164 # Get controller
165 my $c = $index->controller;
166
167 # If there is a callback, do async
168 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
169
170 my %param = @_;
171
172 my $url = Mojo::URL->new($index->api);
173
Nils Diewald87507832015-05-01 23:36:41 +0000174 # Legacy: In old versions, doc_id contained text_id
175 $param{doc_id} .= '.' . $param{text_id} if $param{text_id};
176
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000177 # Use hash slice to create path
178 $url->path(join('/', 'corpus', @param{qw/corpus_id doc_id match_id/}, 'matchInfo'));
179
180 # Build match id
181 # $match = 'match-' . $corpus . '!' . $corpus . '_' . $doc . '-' . $match;
182
183 my %query;
184 $query{foundry} = $param{foundry};
185 $query{layer} = $param{layer} if defined $param{layer};
186 $query{spans} = $param{spans} ? 'true' : 'false';
187
188 # Add query
189 $url->query(\%query);
190
191 $c->app->log->debug('Match info: ' . $url);
192
193 # Create new user agent and set timeout to 30 seconds
194 my $ua = $c->ua; # Mojo::UserAgent->new;
195 $ua->inactivity_timeout(30);
196
197 # non-blocking
198 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000199 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000200 $ua->get(
201 $url => sub {
202 my $tx = pop;
203 $self->_process_response('match', $index, $tx);
204 return $cb->($index);
205 });
206 }
207
208 # Match info blocking
209 else {
210 my $tx = $ua->get($url);
211 return $self->_process_response('match', $index, $tx);
212 };
213};
214
215
Nils Diewald87507832015-05-01 23:36:41 +0000216# Get resource information
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000217sub resource {
218 my $self = shift;
219 my $index = shift;
220
221 # Get controller
222 my $c = $index->controller;
223
224 # If there is a callback, do async
225 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
226
227 my %param = @_;
228
229 # Rename info endpoints regarding resource
230 my $type = $param{type} // 'collection';
231 $type = 'virtualcollection' if $type eq 'collection';
232
Nils Diewald87507832015-05-01 23:36:41 +0000233 # Create resource URL
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000234 my $url = Mojo::URL->new($index->api)->path($type);
235
Nils Diewald87507832015-05-01 23:36:41 +0000236 # Debugging
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000237 $c->app->log->debug('Get resource info on '. $url);
238
239 # Check for cached information
240 if (my $json = $c->chi->get($url->to_string)) {
241
Nils Diewald87507832015-05-01 23:36:41 +0000242 # TODO: That's unfortunate, as it prohibits caching of multiple resources
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000243 $c->app->log->debug('Get resource info from cache');
244 $c->stash('search.resource' => $json);
245 return $cb->($index) if $cb;
246 return $json;
247 };
248
249 $c->stash('search._resource_cache' => $url->to_string);
250
251 # Create new user agent and set timeout to 30 seconds
252 my $ua = $c->ua; # Mojo::UserAgent->new;
253 $ua->inactivity_timeout(30);
254
Nils Diewald87507832015-05-01 23:36:41 +0000255 # Get resource information async
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000256 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000257 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000258 $ua->get(
259 $url => sub {
260 $self->_process_response('resource', $index, pop);
261 return $cb->($index);
262 })
263 }
Nils Diewald87507832015-05-01 23:36:41 +0000264
265 # Get resource information blocking
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000266 else {
267 my $tx = $ua->get($url);
268 $self->_process_response('resource', $index, $tx);
269 };
270};
271
272
273# Process response - especially error messages etc.
Nils Diewald996aa552014-12-02 03:26:44 +0000274sub _process_response {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000275 my ($self, $type, $index, $tx) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000276 my $c = $index->controller;
277
278 # An error has occurded
279 if (my $e = $tx->error) {
280 $c->notify(
281 error =>
282 ($e->{code} ? $e->{code} . ': ' : '') .
283 $e->{message} . ' (remote)'
284 );
285 return;
286 };
287
288 # Response was fine
289 if (my $res = $tx->success) {
290
291 # Set api response for debugging
Nils Diewald89d88012015-03-10 21:03:36 +0000292 $index->api_response($res->body) if $c->kalamar_test_port;
Nils Diewald996aa552014-12-02 03:26:44 +0000293
294 # Json failure
295 my $json;
296 unless ($json = $res->json) {
297 $c->notify(error => 'JSON response is invalid');
298 return;
299 };
300
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000301 # expected response for matches
302 if ($type eq 'matches') {
303 $self->_process_response_matches($index, $json);
304 }
305 elsif ($type eq 'trace') {
306 $self->_process_response_trace($index, $json);
307 }
308 elsif ($type eq 'match') {
309 $self->_process_response_match($index, $json);
310 }
311 elsif ($type eq 'resource') {
312 $self->_process_response_resource($index, $json);
Nils Diewald996aa552014-12-02 03:26:44 +0000313 };
314
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000315 return 1 if ref $json ne 'HASH';
Nils Diewald996aa552014-12-02 03:26:44 +0000316
317 # Add warnings (Legacy)
318 if ($json->{warning}) {
319 $json->{warning} =~ s/;\s+null$//;
320 $c->notify(warn => $json->{warning});
321 };
322
323 $self->_notify_on_error($c, 0, $json);
324 }
325
326 # Request failed
327 else {
328 $self->_notify_on_error($c, 1, $tx->res);
329 };
330 return 1;
331};
332
333
Nils Diewald87507832015-05-01 23:36:41 +0000334# Handle match results
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000335sub _process_response_matches {
336 my ($self, $index, $json) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000337
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000338 # Reformat benchmark counter
339 my $benchmark = $json->{benchmark};
340 if ($benchmark && $benchmark =~ s/\s+(m)?s$//) {
341 $benchmark = sprintf("%.2f", $benchmark) . ($1 ? $1 : '') . 's';
342 };
343
344 # Set benchmark
345 $index->benchmark($benchmark);
346
347 # Set time exceeded
348 if ($json->{timeExceeded} && $json->{timeExceeded} eq Mojo::JSON::true) {
349 $index->time_exceeded(1);
350 };
351
352 # Set result values
353 $index->items_per_page($json->{itemsPerPage});
Akron9cc3eaf2015-06-10 22:15:52 +0200354
Akronc1457bf2015-06-11 19:24:00 +0200355 # Bouncing query
356 if ($json->{query}) {
357 $index->query_jsonld($json->{query});
358 }
359 # Legacy
360 elsif ($json->{request}->{query}) {
361 $index->query_jsonld($json->{request}->{query});
362 };
363
364 # Temporary:
365 my $collection_query = {
Akron9cc3eaf2015-06-10 22:15:52 +0200366 '@type' => "koral:docGroup",
367 "operation" => "operation:or",
368 "operands" => [
369 {
370 '@type' => "koral:docGroup",
371 "operation" => "operation:and",
372 "operands" => [
373 {
374 '@type' => "koral:doc",
375 "key" => "title",
376 "match" => "match:eq",
377 "value" => "Der Birnbaum",
378 "type" => "type:string"
379 },
380 {
381 '@type' => "koral:doc",
382 "key" => "pubPlace",
383 "match" => "match:eq",
384 "value" => "Mannheim",
385 "type" => "type:string"
386 },
387 {
388 '@type' => "koral:docGroup",
389 "operation" => "operation:or",
390 "operands" => [
391 {
392 '@type' => "koral:doc",
393 "key" => "subTitle",
394 "match" => "match:eq",
395 "value" => "Aufzucht oder Pflege",
396 "type" => "type:string"
397 },
398 {
399 '@type' => "koral:doc",
400 "key" => "subTitle",
401 "match" => "match:eq",
402 "value" => "Gedichte",
403 "type" => "type:string"
404 }
405 ]
406 }
407 ]
408 },
409 {
410 '@type' => "koral:doc",
411 "key" => "pubDate",
412 "match" => "match:geq",
413 "value" => "2015-03-05",
414 "type" => "type:date"
415 }
416 ]
Akronc1457bf2015-06-11 19:24:00 +0200417 };
418
419
420 # Bouncing collection query
421 if ($json->{collection}) {
422 $index->collection_jsonld($json->{collection});
423 }
424
425 # Legacy
426 elsif ($json->{request}->{collection}) {
427 $index->collection_jsonld($json->{request}->{collection});
428 };
Akron9cc3eaf2015-06-10 22:15:52 +0200429
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000430 $index->results(_map_matches($json->{matches}));
431
432 # Total results not set by stash
433 if ($index->total_results == -1) {
434
435 if ($json->{totalResults} && $json->{totalResults} > -1) {
436 my $c = $index->controller;
437
438 $c->app->log->debug('Cache total result');
439 $c->chi->set($index->_api_cache => $json->{totalResults}, '120min');
440 $index->total_results($json->{totalResults});
441 };
442 };
443};
444
445
446# Process query serialization response
447sub _process_response_match {
448 my ($self, $index, $json) = @_;
449 $index->results(_map_match($json));
450};
451
452
Nils Diewald87507832015-05-01 23:36:41 +0000453# Process trace response
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000454sub _process_response_trace {
455 my ($self, $index, $json) = @_;
456 $index->query_jsonld($json);
457};
458
Nils Diewald87507832015-05-01 23:36:41 +0000459
460# Process resource response
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000461sub _process_response_resource {
462 my ($self, $index, $json) = @_;
463 my $c = $index->controller;
464
465 # TODO: That's unfortunate, as it prohibits multiple resources
466 $c->stash('search.resource' => $json);
467 $c->app->log->debug('Cache resource info');
468 $c->chi->set($c->stash('search._resource_cache') => $json, '24 hours');
469};
470
471
Nils Diewald87507832015-05-01 23:36:41 +0000472# Parse error messages and forward them to the user
Nils Diewald996aa552014-12-02 03:26:44 +0000473sub _notify_on_error {
474 my ($self, $c, $failure, $res) = @_;
475 my $json = $res;
476
477 my $log = $c->app->log;
478
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000479 # Check if the response is already json
Nils Diewald996aa552014-12-02 03:26:44 +0000480 if (blessed $res) {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000481 $json = $res->json if blessed $res ne 'Mojo::JSON';
Nils Diewald996aa552014-12-02 03:26:44 +0000482 };
483
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000484 # Chec json response error message
Nils Diewald996aa552014-12-02 03:26:44 +0000485 if ($json) {
486 if ($json->{error}) {
487 # Temp
488 $json->{error} =~ s/;\s+null$//;
489 $c->notify(error => $json->{error});
490 return;
491 }
492
493 # New error messages
494 elsif ($json->{errstr}) {
495 # Temp
496 $json->{errstr} =~ s/;\s+null$//;
497 $c->notify(error => $json->{errstr});
498 return;
499 }
500
501 # policy service error messages
502 elsif ($json->{status}) {
503 $c->notify(error => 'Middleware error ' . $json->{status});
504 return;
505 };
506 };
507
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000508 # Doesn't matter what - there is a failure!
Nils Diewald996aa552014-12-02 03:26:44 +0000509 if ($failure) {
510 $c->notify(error => (
511 ($res->{code} ? $res->{code} . ': ' : '') .
512 ($res->{message} ? $res->{message} : 'Unknown error') .
513 ' (remote)'
514 ));
515 };
516};
517
518
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000519# Cleanup array of matches
Nils Diewald996aa552014-12-02 03:26:44 +0000520sub _map_matches {
521 return () unless $_[0];
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000522 map { _map_match($_) } @{ shift() };
523};
524
525
526# Cleanup single match
527sub _map_match {
528 my $x = shift or return;
529 $x->{ID} =~ s/^match\-[^!]+![^-]+-//;
530 $x->{docID} =~ s/^[^_]+_//;
Nils Diewald87507832015-05-01 23:36:41 +0000531
532 # Legacy: In old versions the text_id was part of the doc_id
533 unless ($x->{textID}) {
Nils Diewald4347ee92015-05-04 20:32:48 +0000534 ($x->{docID}, $x->{textID}) = split '\.', $x->{docID};
Nils Diewald87507832015-05-01 23:36:41 +0000535 };
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000536 $x;
537};
538
539
Nils Diewald87507832015-05-01 23:36:41 +0000540# Build query url
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000541sub _query_url {
542 my ($index, %param) = @_;
543
544 # Set cutoff from param
545 $index->cutoff(delete $param{cutoff});
546
547 # Set query language
548 $index->query_language(delete $param{query_language} // 'poliqarp');
549
550 # Should results be cached? Defaults to "yes"
551 $index->no_cache(1) if $param{no_cache};
552
553 # Init the query with stuff coming from the index
554 my %query;
555 $query{q} = $index->query;
556 $query{ql} = $index->query_language;
557 $query{page} = $index->start_page if $index->start_page;
558 $query{count} = $index->items_per_page if $index->items_per_page;
559 $query{cutoff} = 'true' if $index->cutoff;
560
561 # Todo: support corpus and collection
562 # Create query url
563 my $url = Mojo::URL->new($index->api);
564 $url->query(\%query);
565 return $url;
Nils Diewald996aa552014-12-02 03:26:44 +0000566};
567
568
5691;
570
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000571
Nils Diewald996aa552014-12-02 03:26:44 +0000572__END__
573
574=pod
575
Nils Diewald9dfe0102015-05-19 16:14:06 +0000576=encoding utf8
Nils Diewald996aa552014-12-02 03:26:44 +0000577
Nils Diewald9dfe0102015-05-19 16:14:06 +0000578=head1 NAME
579
580Kalamar::API
581
582=head1 DESCRIPTION
583
584L<Kalamar::API> is a search engine class for L<Mojolicious::Plugin::Search>
585that uses the KorAP Web API.
586
587B<The Web API as well as L<Mojolicious::Plugin::Search> are not stable yet,
588so this class is expected to change in the near future. Do not rely on its API!>
589
590
591=head1 METHODS
592
593L<Kalamar::API> inherits all methods from L<Mojolicious::Plugin> and
594implements the following new ones.
595
596
597=head2 register
598
599See L<Mojolicious::Plugin::Search> for registering search engines.
600In addition to the mentioned query parameters, the following parameters are supported:
601
602
603=over 2
604
605=item B<query_language>
606
607One of the supported query languages, like C<poliqarp> or C<annis>.
608
609
610=item B<cutoff>
611
612Cut off results following the current page (i.e. don't count the number of results).
613
614
615=item B<no_cache>
616
617Do not cache search results. Defaults to C<0>.
618
619
620=back
621
622In addition to the mentioned index attributes, the following attributes are supported:
623
Akron456abd92015-06-02 15:07:21 +0200624=over 2
Nils Diewald9dfe0102015-05-19 16:14:06 +0000625
626=item B<api>
627
628The API address.
629
630
631=item B<time_exceeded>
632
633Report on time outs, that may mean, not all results were retrieved.
634
635
636=item B<api_request>
637
638Report the whole API request.
639
640
641=item B<api_response>
642
643Report the whole API response (a KoralQuery object).
644
645
646=item B<benchmarks>
647
648Report on processing time for benchmarking.
649
650
651=item B<query_jsonld>
652
653The KoralQuery realization of the C<query> object.
654
655=back
656
657=head2 search
658
659Search the index.
660
661=head2 trace
662
663Trace query serializations.
664
665=head2 match
666
667Get match information.
668
669=head2 resource
670
671Get resource information.
672
673
674=head1 COPYRIGHT AND LICENSE
675
676Copyright (C) 2015, L<IDS Mannheim|http://www.ids-mannheim.de/>
677Author: L<Nils Diewald|http://nils-diewald.de/>
678
679Kalamar is developed as part of the L<KorAP|http://korap.ids-mannheim.de/>
680Corpus Analysis Platform at the
681L<Institute for the German Language (IDS)|http://ids-mannheim.de/>,
682member of the
683L<Leibniz-Gemeinschaft|http://www.leibniz-gemeinschaft.de/en/about-us/leibniz-competition/projekte-2011/2011-funding-line-2/>
684and supported by the L<KobRA|http://www.kobra.tu-dortmund.de> project,
685funded by the
686L<Federal Ministry of Education and Research (BMBF)|http://www.bmbf.de/en/>.
687
688Kalamar is free software published under the
Akron456abd92015-06-02 15:07:21 +0200689L<BSD-2 License|https://raw.githubusercontent.com/KorAP/Kalamar/master/LICENSE>.
Nils Diewald9dfe0102015-05-19 16:14:06 +0000690
691=cut