blob: 01bda48c01573898f79a889533d954d78120b0bd [file] [log] [blame]
Nils Diewald996aa552014-12-02 03:26:44 +00001package Korap::API;
2use Mojo::Base 'Mojolicious::Plugin';
Nils Diewald791b5902014-12-04 04:47:24 +00003use Scalar::Util qw/blessed weaken/;
Nils Diewald996aa552014-12-02 03:26:44 +00004use strict;
5use warnings;
6
7# KorAP Search engine for Mojolicious::Plugin::Search
8
9# Todo: Add fixtures
10# Todo: Support search in corpus and virtualcollection
Nils Diewald8f4b5da2014-12-03 22:13:39 +000011# Todo: Support caching everywhere!
12# Todo: Correct use of stash info everywhere!
Nils Diewald996aa552014-12-02 03:26:44 +000013
14# Register the plugin
15sub register {
16 my ($plugin, $mojo, $index_class, $param) = @_;
17 $param ||= {};
18
19 # Add attributes to the index class
20 $index_class->attr(api => $param->{api});
21 $index_class->attr([qw/cutoff
22 query_language
23 time_exceeded
24 api_request
25 _api_cache
26 api_response
27 benchmark
28 query_jsonld/]);
29 $index_class->attr(no_cache => 0);
30};
31
32
33# Search the index
34sub search {
35 my $self = shift;
36 my $index = shift;
37
Nils Diewald8f4b5da2014-12-03 22:13:39 +000038 # Get controller
Nils Diewald996aa552014-12-02 03:26:44 +000039 my $c = $index->controller;
40
Nils Diewald996aa552014-12-02 03:26:44 +000041 # If there is a callback, do async
42 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
43
Nils Diewald8f4b5da2014-12-03 22:13:39 +000044 # No query defined
45 unless ($index->query) {
46 return $cb->($index) if $cb;
47 return;
48 };
Nils Diewald996aa552014-12-02 03:26:44 +000049
Nils Diewald8f4b5da2014-12-03 22:13:39 +000050 # Get query url
51 my $url = _query_url($index, @_);
Nils Diewald996aa552014-12-02 03:26:44 +000052
53 # Cache based on URL
54 $index->_api_cache('total-' . $url->to_string);
Nils Diewald8f4b5da2014-12-03 22:13:39 +000055 my %param = @_;
Nils Diewald996aa552014-12-02 03:26:44 +000056
57 # Set context based on parameter
Nils Diewald8f4b5da2014-12-03 22:13:39 +000058 $url->query({ context => $param{'context'} // 'paragraph' });
59
60 # Set path to search
61 $url->path('search');
Nils Diewald996aa552014-12-02 03:26:44 +000062
63 # Check cache for total results
64 my $total_results;
65
66 if (!$index->no_cache &&
67 defined ($total_results = $c->chi->get($index->_api_cache))) {
68
69 # Set total results from cache
70 $index->total_results($total_results);
71 $c->app->log->debug('Get total result from cache');
72
73 # Set cutoff unless already set
74 $url->query({cutoff => 'true'}) unless defined $index->cutoff;
75 };
76
77 # Set api request for debugging
78 $index->api_request($url->to_string);
79
80 # Create new user agent and set timeout to 2 minutes
Nils Diewald8f4b5da2014-12-03 22:13:39 +000081 my $ua = $c->ua; # Mojo::UserAgent->new;
Nils Diewald996aa552014-12-02 03:26:44 +000082 $ua->inactivity_timeout(120);
83
Nils Diewald8f4b5da2014-12-03 22:13:39 +000084 # Debugging
Nils Diewald996aa552014-12-02 03:26:44 +000085 $c->app->log->debug('Search for ' . $index->api_request);
86
87 # Search non-blocking
88 if ($cb) {
89
Nils Diewald996aa552014-12-02 03:26:44 +000090 $ua->get(
Nils Diewald8f4b5da2014-12-03 22:13:39 +000091 $url => sub {
92 my $tx = pop;
93 $self->_process_response('matches', $index, $tx);
Nils Diewald791b5902014-12-04 04:47:24 +000094 weaken $index;
Nils Diewald996aa552014-12-02 03:26:44 +000095 return $cb->($index);
96 });
Nils Diewald996aa552014-12-02 03:26:44 +000097 }
Nils Diewald996aa552014-12-02 03:26:44 +000098 # Search blocking
99 else {
100 my $tx = $ua->get($url);
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000101 return $self->_process_response('matches', $index, $tx);
Nils Diewald996aa552014-12-02 03:26:44 +0000102 };
103};
104
105
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000106# Trace query serialization
107sub trace {
108 my $self = shift;
109 my $index = shift;
110
111 # Get controller
112 my $c = $index->controller;
113
114 # If there is a callback, do async
115 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
116
117 my %param = @_;
118
119 # No query defined
120 unless ($index->query(delete $param{query})) {
121 return $cb->($index) if $cb;
122 return;
123 };
124
125 # Get query url
126 my $url = _query_url($index, @_);
127
128 $url->path('search');
129
130 # Create new user agent and set timeout to 30 seconds
131 my $ua = $c->ua; # Mojo::UserAgent->new;
132 $ua->inactivity_timeout(30);
133
134 # Build transaction
135 my $tx = $ua->build_tx(TRACE => $url);
136
137 # non-blocking
138 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000139 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000140
141 # Trace non-blocking
142 $ua->start(
143 $tx => sub {
144 $self->_process_response('trace', $index, pop);
145 return $cb->($index);
146 });
147 }
148 # Trace blocking
149 else {
150 my $tx = $ua->start($url);
151 return $self->_process_response('trace', $index, $tx);
152 };
153};
154
155
156# Get match info
157sub match {
158 my $self = shift;
159 my $index = shift;
160
161 # Get controller
162 my $c = $index->controller;
163
164 # If there is a callback, do async
165 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
166
167 my %param = @_;
168
169 my $url = Mojo::URL->new($index->api);
170
171 # Use hash slice to create path
172 $url->path(join('/', 'corpus', @param{qw/corpus_id doc_id match_id/}, 'matchInfo'));
173
174 # Build match id
175 # $match = 'match-' . $corpus . '!' . $corpus . '_' . $doc . '-' . $match;
176
177 my %query;
178 $query{foundry} = $param{foundry};
179 $query{layer} = $param{layer} if defined $param{layer};
180 $query{spans} = $param{spans} ? 'true' : 'false';
181
182 # Add query
183 $url->query(\%query);
184
185 $c->app->log->debug('Match info: ' . $url);
186
187 # Create new user agent and set timeout to 30 seconds
188 my $ua = $c->ua; # Mojo::UserAgent->new;
189 $ua->inactivity_timeout(30);
190
191 # non-blocking
192 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000193 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000194 $ua->get(
195 $url => sub {
196 my $tx = pop;
197 $self->_process_response('match', $index, $tx);
198 return $cb->($index);
199 });
200 }
201
202 # Match info blocking
203 else {
204 my $tx = $ua->get($url);
205 return $self->_process_response('match', $index, $tx);
206 };
207};
208
209
210# Trace query serialization
211sub resource {
212 my $self = shift;
213 my $index = shift;
214
215 # Get controller
216 my $c = $index->controller;
217
218 # If there is a callback, do async
219 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
220
221 my %param = @_;
222
223 # Rename info endpoints regarding resource
224 my $type = $param{type} // 'collection';
225 $type = 'virtualcollection' if $type eq 'collection';
226
227 my $url = Mojo::URL->new($index->api)->path($type);
228
229 $c->app->log->debug('Get resource info on '. $url);
230
231 # Check for cached information
232 if (my $json = $c->chi->get($url->to_string)) {
233
234 # TODO: That's unfortunate, as it prohibits multiple resources
235 $c->app->log->debug('Get resource info from cache');
236 $c->stash('search.resource' => $json);
237 return $cb->($index) if $cb;
238 return $json;
239 };
240
241 $c->stash('search._resource_cache' => $url->to_string);
242
243 # Create new user agent and set timeout to 30 seconds
244 my $ua = $c->ua; # Mojo::UserAgent->new;
245 $ua->inactivity_timeout(30);
246
247 if ($cb) {
Nils Diewald791b5902014-12-04 04:47:24 +0000248 weaken $index;
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000249 $ua->get(
250 $url => sub {
251 $self->_process_response('resource', $index, pop);
252 return $cb->($index);
253 })
254 }
255 else {
256 my $tx = $ua->get($url);
257 $self->_process_response('resource', $index, $tx);
258 };
259};
260
261
262# Process response - especially error messages etc.
Nils Diewald996aa552014-12-02 03:26:44 +0000263sub _process_response {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000264 my ($self, $type, $index, $tx) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000265 my $c = $index->controller;
266
267 # An error has occurded
268 if (my $e = $tx->error) {
269 $c->notify(
270 error =>
271 ($e->{code} ? $e->{code} . ': ' : '') .
272 $e->{message} . ' (remote)'
273 );
274 return;
275 };
276
277 # Response was fine
278 if (my $res = $tx->success) {
279
280 # Set api response for debugging
281 $index->api_response($res->body) if $c->korap_test_port;
282
283 # Json failure
284 my $json;
285 unless ($json = $res->json) {
286 $c->notify(error => 'JSON response is invalid');
287 return;
288 };
289
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000290 # expected response for matches
291 if ($type eq 'matches') {
292 $self->_process_response_matches($index, $json);
293 }
294 elsif ($type eq 'trace') {
295 $self->_process_response_trace($index, $json);
296 }
297 elsif ($type eq 'match') {
298 $self->_process_response_match($index, $json);
299 }
300 elsif ($type eq 'resource') {
301 $self->_process_response_resource($index, $json);
Nils Diewald996aa552014-12-02 03:26:44 +0000302 };
303
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000304 return 1 if ref $json ne 'HASH';
Nils Diewald996aa552014-12-02 03:26:44 +0000305
306 # Add warnings (Legacy)
307 if ($json->{warning}) {
308 $json->{warning} =~ s/;\s+null$//;
309 $c->notify(warn => $json->{warning});
310 };
311
312 $self->_notify_on_error($c, 0, $json);
313 }
314
315 # Request failed
316 else {
317 $self->_notify_on_error($c, 1, $tx->res);
318 };
319 return 1;
320};
321
322
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000323sub _process_response_matches {
324 my ($self, $index, $json) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000325
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000326 # Reformat benchmark counter
327 my $benchmark = $json->{benchmark};
328 if ($benchmark && $benchmark =~ s/\s+(m)?s$//) {
329 $benchmark = sprintf("%.2f", $benchmark) . ($1 ? $1 : '') . 's';
330 };
331
332 # Set benchmark
333 $index->benchmark($benchmark);
334
335 # Set time exceeded
336 if ($json->{timeExceeded} && $json->{timeExceeded} eq Mojo::JSON::true) {
337 $index->time_exceeded(1);
338 };
339
340 # Set result values
341 $index->items_per_page($json->{itemsPerPage});
342 $index->query_jsonld($json->{request}->{query});
343 $index->results(_map_matches($json->{matches}));
344
345 # Total results not set by stash
346 if ($index->total_results == -1) {
347
348 if ($json->{totalResults} && $json->{totalResults} > -1) {
349 my $c = $index->controller;
350
351 $c->app->log->debug('Cache total result');
352 $c->chi->set($index->_api_cache => $json->{totalResults}, '120min');
353 $index->total_results($json->{totalResults});
354 };
355 };
356};
357
358
359# Process query serialization response
360sub _process_response_match {
361 my ($self, $index, $json) = @_;
362 $index->results(_map_match($json));
363};
364
365
366# Process query serialization response
367sub _process_response_trace {
368 my ($self, $index, $json) = @_;
369 $index->query_jsonld($json);
370};
371
372sub _process_response_resource {
373 my ($self, $index, $json) = @_;
374 my $c = $index->controller;
375
376 # TODO: That's unfortunate, as it prohibits multiple resources
377 $c->stash('search.resource' => $json);
378 $c->app->log->debug('Cache resource info');
379 $c->chi->set($c->stash('search._resource_cache') => $json, '24 hours');
380};
381
382
383# Parse the error messages
Nils Diewald996aa552014-12-02 03:26:44 +0000384sub _notify_on_error {
385 my ($self, $c, $failure, $res) = @_;
386 my $json = $res;
387
388 my $log = $c->app->log;
389
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000390 # Check if the response is already json
Nils Diewald996aa552014-12-02 03:26:44 +0000391 if (blessed $res) {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000392 $json = $res->json if blessed $res ne 'Mojo::JSON';
Nils Diewald996aa552014-12-02 03:26:44 +0000393 };
394
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000395 # Chec json response error message
Nils Diewald996aa552014-12-02 03:26:44 +0000396 if ($json) {
397 if ($json->{error}) {
398 # Temp
399 $json->{error} =~ s/;\s+null$//;
400 $c->notify(error => $json->{error});
401 return;
402 }
403
404 # New error messages
405 elsif ($json->{errstr}) {
406 # Temp
407 $json->{errstr} =~ s/;\s+null$//;
408 $c->notify(error => $json->{errstr});
409 return;
410 }
411
412 # policy service error messages
413 elsif ($json->{status}) {
414 $c->notify(error => 'Middleware error ' . $json->{status});
415 return;
416 };
417 };
418
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000419 # Doesn't matter what - there is a failure!
Nils Diewald996aa552014-12-02 03:26:44 +0000420 if ($failure) {
421 $c->notify(error => (
422 ($res->{code} ? $res->{code} . ': ' : '') .
423 ($res->{message} ? $res->{message} : 'Unknown error') .
424 ' (remote)'
425 ));
426 };
427};
428
429
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000430# Cleanup array of matches
Nils Diewald996aa552014-12-02 03:26:44 +0000431sub _map_matches {
432 return () unless $_[0];
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000433 map { _map_match($_) } @{ shift() };
434};
435
436
437# Cleanup single match
438sub _map_match {
439 my $x = shift or return;
440 $x->{ID} =~ s/^match\-[^!]+![^-]+-//;
441 $x->{docID} =~ s/^[^_]+_//;
442 $x;
443};
444
445
446sub _query_url {
447 my ($index, %param) = @_;
448
449 # Set cutoff from param
450 $index->cutoff(delete $param{cutoff});
451
452 # Set query language
453 $index->query_language(delete $param{query_language} // 'poliqarp');
454
455 # Should results be cached? Defaults to "yes"
456 $index->no_cache(1) if $param{no_cache};
457
458 # Init the query with stuff coming from the index
459 my %query;
460 $query{q} = $index->query;
461 $query{ql} = $index->query_language;
462 $query{page} = $index->start_page if $index->start_page;
463 $query{count} = $index->items_per_page if $index->items_per_page;
464 $query{cutoff} = 'true' if $index->cutoff;
465
466 # Todo: support corpus and collection
467 # Create query url
468 my $url = Mojo::URL->new($index->api);
469 $url->query(\%query);
470 return $url;
Nils Diewald996aa552014-12-02 03:26:44 +0000471};
472
473
4741;
475
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000476
Nils Diewald996aa552014-12-02 03:26:44 +0000477__END__
478
479=pod
480
481Additionally supported query parameters:
482- query_language
483- cutoff
484- no_cache
485
486Additional index attributes:
487- api
488- time_exceeded
489- api_request
490- api_response
491- benchmark
492- query_jsonld