blob: 70ec277a3625685521a84fe87fac151f4f00a4f3 [file] [log] [blame]
Nils Diewald996aa552014-12-02 03:26:44 +00001package Korap::API;
2use Mojo::Base 'Mojolicious::Plugin';
3use Scalar::Util 'blessed';
4use strict;
5use warnings;
6
7# KorAP Search engine for Mojolicious::Plugin::Search
8
9# Todo: Add fixtures
10# Todo: Support search in corpus and virtualcollection
Nils Diewald8f4b5da2014-12-03 22:13:39 +000011# Todo: Support caching everywhere!
12# Todo: Correct use of stash info everywhere!
Nils Diewald996aa552014-12-02 03:26:44 +000013
14# Register the plugin
15sub register {
16 my ($plugin, $mojo, $index_class, $param) = @_;
17 $param ||= {};
18
19 # Add attributes to the index class
20 $index_class->attr(api => $param->{api});
21 $index_class->attr([qw/cutoff
22 query_language
23 time_exceeded
24 api_request
25 _api_cache
26 api_response
27 benchmark
28 query_jsonld/]);
29 $index_class->attr(no_cache => 0);
30};
31
32
33# Search the index
34sub search {
35 my $self = shift;
36 my $index = shift;
37
Nils Diewald8f4b5da2014-12-03 22:13:39 +000038 # Get controller
Nils Diewald996aa552014-12-02 03:26:44 +000039 my $c = $index->controller;
40
Nils Diewald996aa552014-12-02 03:26:44 +000041 # If there is a callback, do async
42 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
43
Nils Diewald8f4b5da2014-12-03 22:13:39 +000044 # No query defined
45 unless ($index->query) {
46 return $cb->($index) if $cb;
47 return;
48 };
Nils Diewald996aa552014-12-02 03:26:44 +000049
Nils Diewald8f4b5da2014-12-03 22:13:39 +000050 # Get query url
51 my $url = _query_url($index, @_);
Nils Diewald996aa552014-12-02 03:26:44 +000052
53 # Cache based on URL
54 $index->_api_cache('total-' . $url->to_string);
Nils Diewald8f4b5da2014-12-03 22:13:39 +000055 my %param = @_;
Nils Diewald996aa552014-12-02 03:26:44 +000056
57 # Set context based on parameter
Nils Diewald8f4b5da2014-12-03 22:13:39 +000058 $url->query({ context => $param{'context'} // 'paragraph' });
59
60 # Set path to search
61 $url->path('search');
Nils Diewald996aa552014-12-02 03:26:44 +000062
63 # Check cache for total results
64 my $total_results;
65
66 if (!$index->no_cache &&
67 defined ($total_results = $c->chi->get($index->_api_cache))) {
68
69 # Set total results from cache
70 $index->total_results($total_results);
71 $c->app->log->debug('Get total result from cache');
72
73 # Set cutoff unless already set
74 $url->query({cutoff => 'true'}) unless defined $index->cutoff;
75 };
76
77 # Set api request for debugging
78 $index->api_request($url->to_string);
79
80 # Create new user agent and set timeout to 2 minutes
Nils Diewald8f4b5da2014-12-03 22:13:39 +000081 my $ua = $c->ua; # Mojo::UserAgent->new;
Nils Diewald996aa552014-12-02 03:26:44 +000082 $ua->inactivity_timeout(120);
83
Nils Diewald8f4b5da2014-12-03 22:13:39 +000084 # Debugging
Nils Diewald996aa552014-12-02 03:26:44 +000085 $c->app->log->debug('Search for ' . $index->api_request);
86
87 # Search non-blocking
88 if ($cb) {
89
Nils Diewald996aa552014-12-02 03:26:44 +000090 $ua->get(
Nils Diewald8f4b5da2014-12-03 22:13:39 +000091 $url => sub {
92 my $tx = pop;
93 $self->_process_response('matches', $index, $tx);
Nils Diewald996aa552014-12-02 03:26:44 +000094 return $cb->($index);
95 });
Nils Diewald996aa552014-12-02 03:26:44 +000096 }
Nils Diewald996aa552014-12-02 03:26:44 +000097 # Search blocking
98 else {
99 my $tx = $ua->get($url);
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000100 return $self->_process_response('matches', $index, $tx);
Nils Diewald996aa552014-12-02 03:26:44 +0000101 };
102};
103
104
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000105# Trace query serialization
106sub trace {
107 my $self = shift;
108 my $index = shift;
109
110 # Get controller
111 my $c = $index->controller;
112
113 # If there is a callback, do async
114 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
115
116 my %param = @_;
117
118 # No query defined
119 unless ($index->query(delete $param{query})) {
120 return $cb->($index) if $cb;
121 return;
122 };
123
124 # Get query url
125 my $url = _query_url($index, @_);
126
127 $url->path('search');
128
129 # Create new user agent and set timeout to 30 seconds
130 my $ua = $c->ua; # Mojo::UserAgent->new;
131 $ua->inactivity_timeout(30);
132
133 # Build transaction
134 my $tx = $ua->build_tx(TRACE => $url);
135
136 # non-blocking
137 if ($cb) {
138
139 # Trace non-blocking
140 $ua->start(
141 $tx => sub {
142 $self->_process_response('trace', $index, pop);
143 return $cb->($index);
144 });
145 }
146 # Trace blocking
147 else {
148 my $tx = $ua->start($url);
149 return $self->_process_response('trace', $index, $tx);
150 };
151};
152
153
154# Get match info
155sub match {
156 my $self = shift;
157 my $index = shift;
158
159 # Get controller
160 my $c = $index->controller;
161
162 # If there is a callback, do async
163 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
164
165 my %param = @_;
166
167 my $url = Mojo::URL->new($index->api);
168
169 # Use hash slice to create path
170 $url->path(join('/', 'corpus', @param{qw/corpus_id doc_id match_id/}, 'matchInfo'));
171
172 # Build match id
173 # $match = 'match-' . $corpus . '!' . $corpus . '_' . $doc . '-' . $match;
174
175 my %query;
176 $query{foundry} = $param{foundry};
177 $query{layer} = $param{layer} if defined $param{layer};
178 $query{spans} = $param{spans} ? 'true' : 'false';
179
180 # Add query
181 $url->query(\%query);
182
183 $c->app->log->debug('Match info: ' . $url);
184
185 # Create new user agent and set timeout to 30 seconds
186 my $ua = $c->ua; # Mojo::UserAgent->new;
187 $ua->inactivity_timeout(30);
188
189 # non-blocking
190 if ($cb) {
191 $ua->get(
192 $url => sub {
193 my $tx = pop;
194 $self->_process_response('match', $index, $tx);
195 return $cb->($index);
196 });
197 }
198
199 # Match info blocking
200 else {
201 my $tx = $ua->get($url);
202 return $self->_process_response('match', $index, $tx);
203 };
204};
205
206
207# Trace query serialization
208sub resource {
209 my $self = shift;
210 my $index = shift;
211
212 # Get controller
213 my $c = $index->controller;
214
215 # If there is a callback, do async
216 my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';
217
218 my %param = @_;
219
220 # Rename info endpoints regarding resource
221 my $type = $param{type} // 'collection';
222 $type = 'virtualcollection' if $type eq 'collection';
223
224 my $url = Mojo::URL->new($index->api)->path($type);
225
226 $c->app->log->debug('Get resource info on '. $url);
227
228 # Check for cached information
229 if (my $json = $c->chi->get($url->to_string)) {
230
231 # TODO: That's unfortunate, as it prohibits multiple resources
232 $c->app->log->debug('Get resource info from cache');
233 $c->stash('search.resource' => $json);
234 return $cb->($index) if $cb;
235 return $json;
236 };
237
238 $c->stash('search._resource_cache' => $url->to_string);
239
240 # Create new user agent and set timeout to 30 seconds
241 my $ua = $c->ua; # Mojo::UserAgent->new;
242 $ua->inactivity_timeout(30);
243
244 if ($cb) {
245 $ua->get(
246 $url => sub {
247 $self->_process_response('resource', $index, pop);
248 return $cb->($index);
249 })
250 }
251 else {
252 my $tx = $ua->get($url);
253 $self->_process_response('resource', $index, $tx);
254 };
255};
256
257
258# Process response - especially error messages etc.
Nils Diewald996aa552014-12-02 03:26:44 +0000259sub _process_response {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000260 my ($self, $type, $index, $tx) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000261 my $c = $index->controller;
262
263 # An error has occurded
264 if (my $e = $tx->error) {
265 $c->notify(
266 error =>
267 ($e->{code} ? $e->{code} . ': ' : '') .
268 $e->{message} . ' (remote)'
269 );
270 return;
271 };
272
273 # Response was fine
274 if (my $res = $tx->success) {
275
276 # Set api response for debugging
277 $index->api_response($res->body) if $c->korap_test_port;
278
279 # Json failure
280 my $json;
281 unless ($json = $res->json) {
282 $c->notify(error => 'JSON response is invalid');
283 return;
284 };
285
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000286 # expected response for matches
287 if ($type eq 'matches') {
288 $self->_process_response_matches($index, $json);
289 }
290 elsif ($type eq 'trace') {
291 $self->_process_response_trace($index, $json);
292 }
293 elsif ($type eq 'match') {
294 $self->_process_response_match($index, $json);
295 }
296 elsif ($type eq 'resource') {
297 $self->_process_response_resource($index, $json);
Nils Diewald996aa552014-12-02 03:26:44 +0000298 };
299
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000300 return 1 if ref $json ne 'HASH';
Nils Diewald996aa552014-12-02 03:26:44 +0000301
302 # Add warnings (Legacy)
303 if ($json->{warning}) {
304 $json->{warning} =~ s/;\s+null$//;
305 $c->notify(warn => $json->{warning});
306 };
307
308 $self->_notify_on_error($c, 0, $json);
309 }
310
311 # Request failed
312 else {
313 $self->_notify_on_error($c, 1, $tx->res);
314 };
315 return 1;
316};
317
318
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000319sub _process_response_matches {
320 my ($self, $index, $json) = @_;
Nils Diewald996aa552014-12-02 03:26:44 +0000321
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000322 # Reformat benchmark counter
323 my $benchmark = $json->{benchmark};
324 if ($benchmark && $benchmark =~ s/\s+(m)?s$//) {
325 $benchmark = sprintf("%.2f", $benchmark) . ($1 ? $1 : '') . 's';
326 };
327
328 # Set benchmark
329 $index->benchmark($benchmark);
330
331 # Set time exceeded
332 if ($json->{timeExceeded} && $json->{timeExceeded} eq Mojo::JSON::true) {
333 $index->time_exceeded(1);
334 };
335
336 # Set result values
337 $index->items_per_page($json->{itemsPerPage});
338 $index->query_jsonld($json->{request}->{query});
339 $index->results(_map_matches($json->{matches}));
340
341 # Total results not set by stash
342 if ($index->total_results == -1) {
343
344 if ($json->{totalResults} && $json->{totalResults} > -1) {
345 my $c = $index->controller;
346
347 $c->app->log->debug('Cache total result');
348 $c->chi->set($index->_api_cache => $json->{totalResults}, '120min');
349 $index->total_results($json->{totalResults});
350 };
351 };
352};
353
354
355# Process query serialization response
356sub _process_response_match {
357 my ($self, $index, $json) = @_;
358 $index->results(_map_match($json));
359};
360
361
362# Process query serialization response
363sub _process_response_trace {
364 my ($self, $index, $json) = @_;
365 $index->query_jsonld($json);
366};
367
368sub _process_response_resource {
369 my ($self, $index, $json) = @_;
370 my $c = $index->controller;
371
372 # TODO: That's unfortunate, as it prohibits multiple resources
373 $c->stash('search.resource' => $json);
374 $c->app->log->debug('Cache resource info');
375 $c->chi->set($c->stash('search._resource_cache') => $json, '24 hours');
376};
377
378
379# Parse the error messages
Nils Diewald996aa552014-12-02 03:26:44 +0000380sub _notify_on_error {
381 my ($self, $c, $failure, $res) = @_;
382 my $json = $res;
383
384 my $log = $c->app->log;
385
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000386 # Check if the response is already json
Nils Diewald996aa552014-12-02 03:26:44 +0000387 if (blessed $res) {
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000388 $json = $res->json if blessed $res ne 'Mojo::JSON';
Nils Diewald996aa552014-12-02 03:26:44 +0000389 };
390
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000391 # Chec json response error message
Nils Diewald996aa552014-12-02 03:26:44 +0000392 if ($json) {
393 if ($json->{error}) {
394 # Temp
395 $json->{error} =~ s/;\s+null$//;
396 $c->notify(error => $json->{error});
397 return;
398 }
399
400 # New error messages
401 elsif ($json->{errstr}) {
402 # Temp
403 $json->{errstr} =~ s/;\s+null$//;
404 $c->notify(error => $json->{errstr});
405 return;
406 }
407
408 # policy service error messages
409 elsif ($json->{status}) {
410 $c->notify(error => 'Middleware error ' . $json->{status});
411 return;
412 };
413 };
414
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000415 # Doesn't matter what - there is a failure!
Nils Diewald996aa552014-12-02 03:26:44 +0000416 if ($failure) {
417 $c->notify(error => (
418 ($res->{code} ? $res->{code} . ': ' : '') .
419 ($res->{message} ? $res->{message} : 'Unknown error') .
420 ' (remote)'
421 ));
422 };
423};
424
425
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000426# Cleanup array of matches
Nils Diewald996aa552014-12-02 03:26:44 +0000427sub _map_matches {
428 return () unless $_[0];
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000429 map { _map_match($_) } @{ shift() };
430};
431
432
433# Cleanup single match
434sub _map_match {
435 my $x = shift or return;
436 $x->{ID} =~ s/^match\-[^!]+![^-]+-//;
437 $x->{docID} =~ s/^[^_]+_//;
438 $x;
439};
440
441
442sub _query_url {
443 my ($index, %param) = @_;
444
445 # Set cutoff from param
446 $index->cutoff(delete $param{cutoff});
447
448 # Set query language
449 $index->query_language(delete $param{query_language} // 'poliqarp');
450
451 # Should results be cached? Defaults to "yes"
452 $index->no_cache(1) if $param{no_cache};
453
454 # Init the query with stuff coming from the index
455 my %query;
456 $query{q} = $index->query;
457 $query{ql} = $index->query_language;
458 $query{page} = $index->start_page if $index->start_page;
459 $query{count} = $index->items_per_page if $index->items_per_page;
460 $query{cutoff} = 'true' if $index->cutoff;
461
462 # Todo: support corpus and collection
463 # Create query url
464 my $url = Mojo::URL->new($index->api);
465 $url->query(\%query);
466 return $url;
Nils Diewald996aa552014-12-02 03:26:44 +0000467};
468
469
4701;
471
Nils Diewald8f4b5da2014-12-03 22:13:39 +0000472
Nils Diewald996aa552014-12-02 03:26:44 +0000473__END__
474
475=pod
476
477Additionally supported query parameters:
478- query_language
479- cutoff
480- no_cache
481
482Additional index attributes:
483- api
484- time_exceeded
485- api_request
486- api_response
487- benchmark
488- query_jsonld