blob: 9392f4da6b830ed254e3665f08aea36028118baf [file] [log] [blame]
Nils Diewald996aa552014-12-02 03:26:44 +00001package Korap::API;
2use Mojo::Base 'Mojolicious::Plugin';
Nils Diewald791b5902014-12-04 04:47:24 +00003use Scalar::Util qw/blessed weaken/;
Nils Diewald996aa552014-12-02 03:26:44 +00004use strict;
5use warnings;
6
7# KorAP Search engine for Mojolicious::Plugin::Search
8
9# Todo: Add fixtures
10# Todo: Support search in corpus and virtualcollection
Nils Diewald8f4b5da2014-12-03 22:13:39 +000011# Todo: Support caching everywhere!
12# Todo: Correct use of stash info everywhere!
Nils Diewald996aa552014-12-02 03:26:44 +000013
14# Register the plugin
15sub register {
16 my ($plugin, $mojo, $index_class, $param) = @_;
17 $param ||= {};
18
19 # Add attributes to the index class
20 $index_class->attr(api => $param->{api});
21 $index_class->attr([qw/cutoff
22 query_language
23 time_exceeded
24 api_request
25 _api_cache
26 api_response
27 benchmark
28 query_jsonld/]);
29 $index_class->attr(no_cache => 0);
30};
31
32
33# Search the index
34sub search {
35 my $self = shift;
36 my $index = shift;
37
Nils Diewald8f4b5da2014-12-03 22:13:39 +000038 # Get controller
Nils Diewald996aa552014-12-02 03:26:44 +000039 my $c = $index->controller;
40
Nils Diewald996aa552014-12-02 03:26:44 +000041 # If there is a callback, do async
42 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
43
Nils Diewald8f4b5da2014-12-03 22:13:39 +000044 # No query defined
45 unless ($index->query) {
46 return $cb->($index) if $cb;
47 return;
48 };
Nils Diewald996aa552014-12-02 03:26:44 +000049
Nils Diewald8f4b5da2014-12-03 22:13:39 +000050 # Get query url
51 my $url = _query_url($index, @_);
Nils Diewald996aa552014-12-02 03:26:44 +000052
53 # Cache based on URL
54 $index->_api_cache('total-' . $url->to_string);
Nils Diewald8f4b5da2014-12-03 22:13:39 +000055 my %param = @_;
Nils Diewald996aa552014-12-02 03:26:44 +000056
57 # Set context based on parameter
Nils Diewald8f4b5da2014-12-03 22:13:39 +000058 $url->query({ context => $param{'context'} // 'paragraph' });
59
60 # Set path to search
61 $url->path('search');
Nils Diewald996aa552014-12-02 03:26:44 +000062
63 # Check cache for total results
64 my $total_results;
65
66 if (!$index->no_cache &&
67 defined ($total_results = $c->chi->get($index->_api_cache))) {
68
69 # Set total results from cache
70 $index->total_results($total_results);
71 $c->app->log->debug('Get total result from cache');
72
73 # Set cutoff unless already set
74 $url->query({cutoff => 'true'}) unless defined $index->cutoff;
75 };
76
77 # Set api request for debugging
78 $index->api_request($url->to_string);
79
80 # Create new user agent and set timeout to 2 minutes
Nils Diewald8f4b5da2014-12-03 22:13:39 +000081 my $ua = $c->ua; # Mojo::UserAgent->new;
Nils Diewald996aa552014-12-02 03:26:44 +000082 $ua->inactivity_timeout(120);
83
Nils Diewald8f4b5da2014-12-03 22:13:39 +000084 # Debugging
Nils Diewald996aa552014-12-02 03:26:44 +000085 $c->app->log->debug('Search for ' . $index->api_request);
86
87 # Search non-blocking
88 if ($cb) {
89
Nils Diewald996aa552014-12-02 03:26:44 +000090 $ua->get(
Nils Diewald8f4b5da2014-12-03 22:13:39 +000091 $url => sub {
92 my $tx = pop;
93 $self->_process_response('matches', $index, $tx);
Nils Diewald791b5902014-12-04 04:47:24 +000094 weaken $index;
Nils Diewald996aa552014-12-02 03:26:44 +000095 return $cb->($index);
96 });
Nils Diewald996aa552014-12-02 03:26:44 +000097 }
Nils Diewald996aa552014-12-02 03:26:44 +000098 # Search blocking
99 else {
100 my $tx = $ua->get($url);
Nils Diewald034ea702015-01-16 19:41:52 +0000101 $self->_process_response('matches', $index, $tx);
102 return $index;
Nils Diewald996aa552014-12-02 03:26:44 +0000103 };
104};
105
106
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000107# Trace query serialization
108sub trace {
109 my $self = shift;
110 my $index = shift;
111
112 # Get controller
113 my $c = $index->controller;
114
115 # If there is a callback, do async
116 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
117
118 my %param = @_;
119
120 # No query defined
121 unless ($index->query(delete $param{query})) {
122 return $cb->($index) if $cb;
123 return;
124 };
125
126 # Get query url
127 my $url = _query_url($index, @_);
128
129 $url->path('search');
130
131 # Create new user agent and set timeout to 30 seconds
132 my $ua = $c->ua; # Mojo::UserAgent->new;
133 $ua->inactivity_timeout(30);
134
135 # Build transaction
136 my $tx = $ua->build_tx(TRACE => $url);
137
138 # non-blocking
139 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000140 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000141
142 # Trace non-blocking
143 $ua->start(
144 $tx => sub {
145 $self->_process_response('trace', $index, pop);
146 return $cb->($index);
147 });
148 }
149 # Trace blocking
150 else {
151 my $tx = $ua->start($url);
152 return $self->_process_response('trace', $index, $tx);
153 };
154};
155
156
157# Get match info
158sub match {
159 my $self = shift;
160 my $index = shift;
161
162 # Get controller
163 my $c = $index->controller;
164
165 # If there is a callback, do async
166 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
167
168 my %param = @_;
169
170 my $url = Mojo::URL->new($index->api);
171
172 # Use hash slice to create path
173 $url->path(join('/', 'corpus', @param{qw/corpus_id doc_id match_id/}, 'matchInfo'));
174
175 # Build match id
176 # $match = 'match-' . $corpus . '!' . $corpus . '_' . $doc . '-' . $match;
177
178 my %query;
179 $query{foundry} = $param{foundry};
180 $query{layer} = $param{layer} if defined $param{layer};
181 $query{spans} = $param{spans} ? 'true' : 'false';
182
183 # Add query
184 $url->query(\%query);
185
186 $c->app->log->debug('Match info: ' . $url);
187
188 # Create new user agent and set timeout to 30 seconds
189 my $ua = $c->ua; # Mojo::UserAgent->new;
190 $ua->inactivity_timeout(30);
191
192 # non-blocking
193 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000194 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000195 $ua->get(
196 $url => sub {
197 my $tx = pop;
198 $self->_process_response('match', $index, $tx);
199 return $cb->($index);
200 });
201 }
202
203 # Match info blocking
204 else {
205 my $tx = $ua->get($url);
206 return $self->_process_response('match', $index, $tx);
207 };
208};
209
210
211# Trace query serialization
212sub resource {
213 my $self = shift;
214 my $index = shift;
215
216 # Get controller
217 my $c = $index->controller;
218
219 # If there is a callback, do async
220 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
221
222 my %param = @_;
223
224 # Rename info endpoints regarding resource
225 my $type = $param{type} // 'collection';
226 $type = 'virtualcollection' if $type eq 'collection';
227
228 my $url = Mojo::URL->new($index->api)->path($type);
229
230 $c->app->log->debug('Get resource info on '. $url);
231
232 # Check for cached information
233 if (my $json = $c->chi->get($url->to_string)) {
234
235 # TODO: That's unfortunate, as it prohibits multiple resources
236 $c->app->log->debug('Get resource info from cache');
237 $c->stash('search.resource' => $json);
238 return $cb->($index) if $cb;
239 return $json;
240 };
241
242 $c->stash('search._resource_cache' => $url->to_string);
243
244 # Create new user agent and set timeout to 30 seconds
245 my $ua = $c->ua; # Mojo::UserAgent->new;
246 $ua->inactivity_timeout(30);
247
248 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000249 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000250 $ua->get(
251 $url => sub {
252 $self->_process_response('resource', $index, pop);
253 return $cb->($index);
254 })
255 }
256 else {
257 my $tx = $ua->get($url);
258 $self->_process_response('resource', $index, $tx);
259 };
260};
261
262
263# Process response - especially error messages etc.
Nils Diewald996aa552014-12-02 03:26:44 +0000264sub _process_response {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000265 my ($self, $type, $index, $tx) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000266 my $c = $index->controller;
267
268 # An error has occurded
269 if (my $e = $tx->error) {
270 $c->notify(
271 error =>
272 ($e->{code} ? $e->{code} . ': ' : '') .
273 $e->{message} . ' (remote)'
274 );
275 return;
276 };
277
278 # Response was fine
279 if (my $res = $tx->success) {
280
281 # Set api response for debugging
282 $index->api_response($res->body) if $c->korap_test_port;
283
284 # Json failure
285 my $json;
286 unless ($json = $res->json) {
287 $c->notify(error => 'JSON response is invalid');
288 return;
289 };
290
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000291 # expected response for matches
292 if ($type eq 'matches') {
293 $self->_process_response_matches($index, $json);
294 }
295 elsif ($type eq 'trace') {
296 $self->_process_response_trace($index, $json);
297 }
298 elsif ($type eq 'match') {
299 $self->_process_response_match($index, $json);
300 }
301 elsif ($type eq 'resource') {
302 $self->_process_response_resource($index, $json);
Nils Diewald996aa552014-12-02 03:26:44 +0000303 };
304
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000305 return 1 if ref $json ne 'HASH';
Nils Diewald996aa552014-12-02 03:26:44 +0000306
307 # Add warnings (Legacy)
308 if ($json->{warning}) {
309 $json->{warning} =~ s/;\s+null$//;
310 $c->notify(warn => $json->{warning});
311 };
312
313 $self->_notify_on_error($c, 0, $json);
314 }
315
316 # Request failed
317 else {
318 $self->_notify_on_error($c, 1, $tx->res);
319 };
320 return 1;
321};
322
323
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000324sub _process_response_matches {
325 my ($self, $index, $json) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000326
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000327 # Reformat benchmark counter
328 my $benchmark = $json->{benchmark};
329 if ($benchmark && $benchmark =~ s/\s+(m)?s$//) {
330 $benchmark = sprintf("%.2f", $benchmark) . ($1 ? $1 : '') . 's';
331 };
332
333 # Set benchmark
334 $index->benchmark($benchmark);
335
336 # Set time exceeded
337 if ($json->{timeExceeded} && $json->{timeExceeded} eq Mojo::JSON::true) {
338 $index->time_exceeded(1);
339 };
340
341 # Set result values
342 $index->items_per_page($json->{itemsPerPage});
343 $index->query_jsonld($json->{request}->{query});
344 $index->results(_map_matches($json->{matches}));
345
346 # Total results not set by stash
347 if ($index->total_results == -1) {
348
349 if ($json->{totalResults} && $json->{totalResults} > -1) {
350 my $c = $index->controller;
351
352 $c->app->log->debug('Cache total result');
353 $c->chi->set($index->_api_cache => $json->{totalResults}, '120min');
354 $index->total_results($json->{totalResults});
355 };
356 };
357};
358
359
360# Process query serialization response
361sub _process_response_match {
362 my ($self, $index, $json) = @_;
363 $index->results(_map_match($json));
364};
365
366
367# Process query serialization response
368sub _process_response_trace {
369 my ($self, $index, $json) = @_;
370 $index->query_jsonld($json);
371};
372
373sub _process_response_resource {
374 my ($self, $index, $json) = @_;
375 my $c = $index->controller;
376
377 # TODO: That's unfortunate, as it prohibits multiple resources
378 $c->stash('search.resource' => $json);
379 $c->app->log->debug('Cache resource info');
380 $c->chi->set($c->stash('search._resource_cache') => $json, '24 hours');
381};
382
383
384# Parse the error messages
Nils Diewald996aa552014-12-02 03:26:44 +0000385sub _notify_on_error {
386 my ($self, $c, $failure, $res) = @_;
387 my $json = $res;
388
389 my $log = $c->app->log;
390
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000391 # Check if the response is already json
Nils Diewald996aa552014-12-02 03:26:44 +0000392 if (blessed $res) {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000393 $json = $res->json if blessed $res ne 'Mojo::JSON';
Nils Diewald996aa552014-12-02 03:26:44 +0000394 };
395
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000396 # Chec json response error message
Nils Diewald996aa552014-12-02 03:26:44 +0000397 if ($json) {
398 if ($json->{error}) {
399 # Temp
400 $json->{error} =~ s/;\s+null$//;
401 $c->notify(error => $json->{error});
402 return;
403 }
404
405 # New error messages
406 elsif ($json->{errstr}) {
407 # Temp
408 $json->{errstr} =~ s/;\s+null$//;
409 $c->notify(error => $json->{errstr});
410 return;
411 }
412
413 # policy service error messages
414 elsif ($json->{status}) {
415 $c->notify(error => 'Middleware error ' . $json->{status});
416 return;
417 };
418 };
419
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000420 # Doesn't matter what - there is a failure!
Nils Diewald996aa552014-12-02 03:26:44 +0000421 if ($failure) {
422 $c->notify(error => (
423 ($res->{code} ? $res->{code} . ': ' : '') .
424 ($res->{message} ? $res->{message} : 'Unknown error') .
425 ' (remote)'
426 ));
427 };
428};
429
430
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000431# Cleanup array of matches
Nils Diewald996aa552014-12-02 03:26:44 +0000432sub _map_matches {
433 return () unless $_[0];
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000434 map { _map_match($_) } @{ shift() };
435};
436
437
438# Cleanup single match
439sub _map_match {
440 my $x = shift or return;
441 $x->{ID} =~ s/^match\-[^!]+![^-]+-//;
442 $x->{docID} =~ s/^[^_]+_//;
443 $x;
444};
445
446
447sub _query_url {
448 my ($index, %param) = @_;
449
450 # Set cutoff from param
451 $index->cutoff(delete $param{cutoff});
452
453 # Set query language
454 $index->query_language(delete $param{query_language} // 'poliqarp');
455
456 # Should results be cached? Defaults to "yes"
457 $index->no_cache(1) if $param{no_cache};
458
459 # Init the query with stuff coming from the index
460 my %query;
461 $query{q} = $index->query;
462 $query{ql} = $index->query_language;
463 $query{page} = $index->start_page if $index->start_page;
464 $query{count} = $index->items_per_page if $index->items_per_page;
465 $query{cutoff} = 'true' if $index->cutoff;
466
467 # Todo: support corpus and collection
468 # Create query url
469 my $url = Mojo::URL->new($index->api);
470 $url->query(\%query);
471 return $url;
Nils Diewald996aa552014-12-02 03:26:44 +0000472};
473
474
4751;
476
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000477
Nils Diewald996aa552014-12-02 03:26:44 +0000478__END__
479
480=pod
481
482Additionally supported query parameters:
483- query_language
484- cutoff
485- no_cache
486
487Additional index attributes:
488- api
489- time_exceeded
490- api_request
491- api_response
492- benchmark
493- query_jsonld