Exclude search results from robot indices (closes #120)
Change-Id: If6387bb7585afbba80743cad9cfcf8779517c6b3
diff --git a/Changes b/Changes
index c1db812..974abb5 100755
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.39 2020-08-28
+0.39 2020-09-07
- Add information on secret file to Readme.
- Change default API endpoint to korap.ids-mannheim.de.
- Fix label for toggle plugins.
@@ -7,6 +7,7 @@
- Introduce base css file for plugins.
- Refactoring of sass files.
- Added query form API to plugin server.
+ - Exclude search results from robot indices.
WARNING: If you relied on the former default API endpoint
being http://localhost:9999/, this will break your
diff --git a/Gruntfile.js b/Gruntfile.js
index d30b19e..8fa7819 100644
--- a/Gruntfile.js
+++ b/Gruntfile.js
@@ -122,20 +122,21 @@
{
expand: true,
cwd: 'dev/img/',
- src: 'favicon.ico',
- dest: 'public/',
- filter: 'isFile',
- nonull:true,
- timestamp:true
- },
- {
- expand: true,
- cwd: 'dev/img/',
src: '*.svg',
dest: 'public/img/',
filter: 'isFile',
nonull:true,
timestamp:true
+ },
+ {
+ src: 'dev/img/favicon.ico',
+ dest: 'public/favicon.ico',
+ timestamp:true
+ },
+ {
+ src: 'dev/robots.txt',
+ dest: 'public/robots.txt',
+ timestamp: true
}
]
}
diff --git a/dev/robots.txt b/dev/robots.txt
new file mode 100644
index 0000000..88e4d02
--- /dev/null
+++ b/dev/robots.txt
@@ -0,0 +1,10 @@
+# robots.txt only applies, when KorAP is accessible
+# from the domain root.
+
+User-agent: *
+Disallow: /api/
+Disallow: /css/
+Disallow: /font/
+Disallow: /js/
+Disallow: /settings/
+Disallow: /corpus/
diff --git a/lib/Kalamar.pm b/lib/Kalamar.pm
index 12ed96c..35c6637 100644
--- a/lib/Kalamar.pm
+++ b/lib/Kalamar.pm
@@ -276,7 +276,9 @@
if ($self->navi->exists('settings')) {
$r->get('/settings')->to(
cb => sub {
- return shift->render('settings')
+ my $c = shift;
+ $c->res->headers->header('X-Robots' => 'noindex');
+ return $c->render('settings');
}
)->name('settings_start');
$r->get('/settings/:scope/:page')->to(
diff --git a/lib/Kalamar/Controller/Documentation.pm b/lib/Kalamar/Controller/Documentation.pm
index c73677e..e70d8f3 100644
--- a/lib/Kalamar/Controller/Documentation.pm
+++ b/lib/Kalamar/Controller/Documentation.pm
@@ -30,6 +30,7 @@
$c->stash(sidebar_active => 1);
$c->stash(main_class => 'page tutorial');
$c->stash(documentation => 1);
+ $c->stash('robots' => 'index,follow');
return $c->render_maybe(
template => $c->loc('Template_' . join('_', @path), join('/', @path))
@@ -42,6 +43,9 @@
# Contact us
sub contact {
my $c = shift;
+
+ $c->res->headers->header('X-Robots', 'noindex');
+
$c->render(
template => $c->loc('contact', 'doc/contact')
);
diff --git a/lib/Kalamar/Controller/Proxy.pm b/lib/Kalamar/Controller/Proxy.pm
index 2078d85..c84b5b9 100644
--- a/lib/Kalamar/Controller/Proxy.pm
+++ b/lib/Kalamar/Controller/Proxy.pm
@@ -58,6 +58,7 @@
body => sub {
my $headers = $c->res->headers;
$headers->header('X-Proxy' => 'Kalamar');
+ $headers->header('X-Robots' => 'noindex');
# Response is a redirect
if ($c->res->is_redirect) {
diff --git a/lib/Kalamar/Controller/Search.pm b/lib/Kalamar/Controller/Search.pm
index 5508786..6f4d431 100644
--- a/lib/Kalamar/Controller/Search.pm
+++ b/lib/Kalamar/Controller/Search.pm
@@ -57,9 +57,14 @@
# No query (Check ignoring validation)
unless ($c->param('q')) {
- return $c->render($c->loc('Template_intro', 'intro'));
+ return $c->render(
+ $c->loc('Template_intro', 'intro'),
+ robots => 'index,follow'
+ );
};
+ $c->res->headers->header('X-Robots', 'noindex');
+
my %query = ();
$query{q} = $v->param('q') // '';
$query{ql} = $v->param('ql') // 'poliqarp';
@@ -317,6 +322,8 @@
# Async
$c->render_later;
+ $c->res->headers->header('X-Robots', 'noindex');
+
# Request koral, maybe cached
$c->cached_koral_p('get', $url)
@@ -408,6 +415,8 @@
sub match_info {
my $c = shift;
+ $c->res->headers->header('X-Robots', 'noindex');
+
# Validate user input
my $v = $c->validation;
$v->optional('foundry');
diff --git a/t/meta-robots.t b/t/meta-robots.t
new file mode 100644
index 0000000..75273b9
--- /dev/null
+++ b/t/meta-robots.t
@@ -0,0 +1,88 @@
+package Kalamar::Plugin::Test;
+use Mojo::Base 'Mojolicious::Plugin';
+
+
+sub register {
+ my ($plugin, $app, $param) = @_;
+
+ # Add entry to settings navigation
+ $app->navi->add(settings => (
+ 'OAuth Token Management', 'oauth'
+ ));
+
+ $app->routes->get('/settings/oauth')->to(
+ cb => sub {
+ my $c = shift;
+ $c->res->headers->header('X-Robots' => 'noindex');
+ $c->content_with(settings => '<p id="abc">My Settings</p>');
+ return $c->render('settings');
+ }
+ );
+};
+
+package main;
+use Mojo::Base -strict;
+use Test::More;
+use Test::Mojo;
+use Mojo::File qw/path/;
+
+
+#####################
+# Start Fake server #
+#####################
+my $mount_point = '/realapi/';
+$ENV{KALAMAR_API} = $mount_point;
+
+my $t = Test::Mojo->new('Kalamar' => {
+ Kalamar => {
+ plugins => ['Test']
+ }
+});
+
+# Mount fake backend
+# Get the fixture path
+my $fixtures_path = path(Mojo::File->new(__FILE__)->dirname, 'server');
+my $fake_backend = $t->app->plugin(
+ Mount => {
+ $mount_point =>
+ $fixtures_path->child('mock.pl')
+ }
+);
+# Configure fake backend
+$fake_backend->pattern->defaults->{app}->log($t->app->log);
+
+# Test robots meta tag
+
+$t->get_ok('/')
+ ->attr_is('meta[name=robots]', 'content', 'index,follow')
+ ->header_isnt('X-Robots', 'noindex')
+ ;
+
+$t->get_ok('/doc/ql/poliqarp-plus')
+ ->attr_is('meta[name=robots]', 'content', 'index,follow')
+ ->header_isnt('X-Robots', 'noindex')
+ ;
+
+$t->get_ok('/corpus')
+ ->status_is(200)
+ ->header_is('X-Robots', 'noindex')
+ ;
+
+$t->get_ok('/corpus/WPD15/232/39681/p2133-2134?spans=false&foundry=*&format=json')
+ ->status_is(200)
+ ->header_is('X-Robots', 'noindex')
+ ;
+
+$t->get_ok('/settings')
+ ->attr_is('meta[name=robots]', 'content', 'noindex')
+ ->header_is('X-Robots', 'noindex')
+ ;
+
+$t->get_ok('/?q=baum')
+ ->status_is(200)
+ ->text_is('#total-results', 51)
+ ->attr_is('meta[name=robots]', 'content', 'noindex')
+ ->header_is('X-Robots', 'noindex')
+ ;
+
+done_testing;
diff --git a/templates/layouts/main.html.ep b/templates/layouts/main.html.ep
index ee29e60..e23c589 100644
--- a/templates/layouts/main.html.ep
+++ b/templates/layouts/main.html.ep
@@ -13,7 +13,7 @@
<meta http-equiv="Content-Script-Type" content="text/javascript" />
<meta name="msapplication-TileImage" content="<%= url_for '/img/windows-tile.png' %>" />
<meta name="msapplication-TileColor" content="#9bad0b" />
- <meta name="robots" content="index,follow" />
+ <meta name="robots" content="<%= stash('robots') || 'noindex' %>" />
<meta name="apple-mobile-web-app-status-bar-style" content="default" />
<meta name="description" content="<%= $desc %>" />
<meta name="language" content="<%= $lang %>" />