Exclude search results from robot indices (closes #120)

Change-Id: If6387bb7585afbba80743cad9cfcf8779517c6b3
diff --git a/Changes b/Changes
index c1db812..974abb5 100755
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.39 2020-08-28
+0.39 2020-09-07
         - Add information on secret file to Readme.
         - Change default API endpoint to korap.ids-mannheim.de.
         - Fix label for toggle plugins.
@@ -7,6 +7,7 @@
         - Introduce base css file for plugins.
         - Refactoring of sass files.
         - Added query form API to plugin server.
+        - Exclude search results from robot indices.
 
         WARNING: If you relied on the former default API endpoint
           being http://localhost:9999/, this will break your
diff --git a/Gruntfile.js b/Gruntfile.js
index d30b19e..8fa7819 100644
--- a/Gruntfile.js
+++ b/Gruntfile.js
@@ -122,20 +122,21 @@
 	        {
 	          expand: true,
 	          cwd: 'dev/img/',
-	          src: 'favicon.ico',
-	          dest: 'public/',
-	          filter: 'isFile',
-	          nonull:true,
-	          timestamp:true
-	        },
-	        {
-	          expand: true,
-	          cwd: 'dev/img/',
 	          src: '*.svg',
 	          dest: 'public/img/',
 	          filter: 'isFile',
 	          nonull:true,
 	          timestamp:true
+	        },
+	        {
+	          src: 'dev/img/favicon.ico',
+	          dest: 'public/favicon.ico',
+	          timestamp:true
+	        },
+          {
+	          src: 'dev/robots.txt',
+	          dest: 'public/robots.txt',
+	          timestamp: true
 	        }
 	      ]
       }
diff --git a/dev/robots.txt b/dev/robots.txt
new file mode 100644
index 0000000..88e4d02
--- /dev/null
+++ b/dev/robots.txt
@@ -0,0 +1,10 @@
+# robots.txt only applies, when KorAP is accessible
+# from the domain root.
+
+User-agent: *
+Disallow: /api/
+Disallow: /css/
+Disallow: /font/
+Disallow: /js/
+Disallow: /settings/
+Disallow: /corpus/
diff --git a/lib/Kalamar.pm b/lib/Kalamar.pm
index 12ed96c..35c6637 100644
--- a/lib/Kalamar.pm
+++ b/lib/Kalamar.pm
@@ -276,7 +276,9 @@
   if ($self->navi->exists('settings')) {
     $r->get('/settings')->to(
       cb => sub {
-        return shift->render('settings')
+        my $c = shift;
+        $c->res->headers->header('X-Robots' => 'noindex');
+        return $c->render('settings');
       }
     )->name('settings_start');
     $r->get('/settings/:scope/:page')->to(
diff --git a/lib/Kalamar/Controller/Documentation.pm b/lib/Kalamar/Controller/Documentation.pm
index c73677e..e70d8f3 100644
--- a/lib/Kalamar/Controller/Documentation.pm
+++ b/lib/Kalamar/Controller/Documentation.pm
@@ -30,6 +30,7 @@
   $c->stash(sidebar_active => 1);
   $c->stash(main_class => 'page tutorial');
   $c->stash(documentation => 1);
+  $c->stash('robots' => 'index,follow');
 
   return $c->render_maybe(
     template => $c->loc('Template_' . join('_', @path), join('/', @path))
@@ -42,6 +43,9 @@
 # Contact us
 sub contact {
   my $c = shift;
+
+  $c->res->headers->header('X-Robots', 'noindex');
+
   $c->render(
     template => $c->loc('contact', 'doc/contact')
   );
diff --git a/lib/Kalamar/Controller/Proxy.pm b/lib/Kalamar/Controller/Proxy.pm
index 2078d85..c84b5b9 100644
--- a/lib/Kalamar/Controller/Proxy.pm
+++ b/lib/Kalamar/Controller/Proxy.pm
@@ -58,6 +58,7 @@
     body => sub {
       my $headers = $c->res->headers;
       $headers->header('X-Proxy' => 'Kalamar');
+      $headers->header('X-Robots' => 'noindex');
 
       # Response is a redirect
       if ($c->res->is_redirect) {
diff --git a/lib/Kalamar/Controller/Search.pm b/lib/Kalamar/Controller/Search.pm
index 5508786..6f4d431 100644
--- a/lib/Kalamar/Controller/Search.pm
+++ b/lib/Kalamar/Controller/Search.pm
@@ -57,9 +57,14 @@
 
   # No query (Check ignoring validation)
   unless ($c->param('q')) {
-    return $c->render($c->loc('Template_intro', 'intro'));
+    return $c->render(
+      $c->loc('Template_intro', 'intro'),
+      robots => 'index,follow'
+    );
   };
 
+  $c->res->headers->header('X-Robots', 'noindex');
+
   my %query = ();
   $query{q}  = $v->param('q')  // '';
   $query{ql} = $v->param('ql') // 'poliqarp';
@@ -317,6 +322,8 @@
   # Async
   $c->render_later;
 
+  $c->res->headers->header('X-Robots', 'noindex');
+
   # Request koral, maybe cached
   $c->cached_koral_p('get', $url)
 
@@ -408,6 +415,8 @@
 sub match_info {
   my $c = shift;
 
+  $c->res->headers->header('X-Robots', 'noindex');
+
   # Validate user input
   my $v = $c->validation;
   $v->optional('foundry');
diff --git a/t/meta-robots.t b/t/meta-robots.t
new file mode 100644
index 0000000..75273b9
--- /dev/null
+++ b/t/meta-robots.t
@@ -0,0 +1,88 @@
+package Kalamar::Plugin::Test;
+use Mojo::Base 'Mojolicious::Plugin';
+
+
+sub register {
+  my ($plugin, $app, $param) = @_;
+
+  # Add entry to settings navigation
+  $app->navi->add(settings => (
+    'OAuth Token Management', 'oauth'
+  ));
+
+  $app->routes->get('/settings/oauth')->to(
+    cb => sub {
+      my $c = shift;
+      $c->res->headers->header('X-Robots' => 'noindex');
+      $c->content_with(settings => '<p id="abc">My Settings</p>');
+      return $c->render('settings');
+    }
+  );
+};
+
+package main;
+use Mojo::Base -strict;
+use Test::More;
+use Test::Mojo;
+use Mojo::File qw/path/;
+
+
+#####################
+# Start Fake server #
+#####################
+my $mount_point = '/realapi/';
+$ENV{KALAMAR_API} = $mount_point;
+
+my $t = Test::Mojo->new('Kalamar' => {
+  Kalamar => {
+    plugins => ['Test']
+  }
+});
+
+# Mount fake backend
+# Get the fixture path
+my $fixtures_path = path(Mojo::File->new(__FILE__)->dirname, 'server');
+my $fake_backend = $t->app->plugin(
+  Mount => {
+    $mount_point =>
+      $fixtures_path->child('mock.pl')
+  }
+);
+# Configure fake backend
+$fake_backend->pattern->defaults->{app}->log($t->app->log);
+
+# Test robots meta tag
+
+$t->get_ok('/')
+  ->attr_is('meta[name=robots]', 'content', 'index,follow')
+  ->header_isnt('X-Robots', 'noindex')
+  ;
+
+$t->get_ok('/doc/ql/poliqarp-plus')
+  ->attr_is('meta[name=robots]', 'content', 'index,follow')
+  ->header_isnt('X-Robots', 'noindex')
+  ;
+
+$t->get_ok('/corpus')
+  ->status_is(200)
+  ->header_is('X-Robots', 'noindex')
+  ;
+
+$t->get_ok('/corpus/WPD15/232/39681/p2133-2134?spans=false&foundry=*&format=json')
+  ->status_is(200)
+  ->header_is('X-Robots', 'noindex')
+  ;
+
+$t->get_ok('/settings')
+  ->attr_is('meta[name=robots]', 'content', 'noindex')
+  ->header_is('X-Robots', 'noindex')
+  ;
+
+$t->get_ok('/?q=baum')
+  ->status_is(200)
+  ->text_is('#total-results', 51)
+  ->attr_is('meta[name=robots]', 'content', 'noindex')
+  ->header_is('X-Robots', 'noindex')
+  ;
+
+done_testing;
diff --git a/templates/layouts/main.html.ep b/templates/layouts/main.html.ep
index ee29e60..e23c589 100644
--- a/templates/layouts/main.html.ep
+++ b/templates/layouts/main.html.ep
@@ -13,7 +13,7 @@
     <meta http-equiv="Content-Script-Type" content="text/javascript" />
     <meta name="msapplication-TileImage" content="<%= url_for '/img/windows-tile.png' %>" />
     <meta name="msapplication-TileColor" content="#9bad0b" />
-    <meta name="robots" content="index,follow" />
+    <meta name="robots" content="<%= stash('robots') || 'noindex' %>" />
     <meta name="apple-mobile-web-app-status-bar-style" content="default" />
     <meta name="description" content="<%= $desc %>" />
     <meta name="language" content="<%= $lang %>" />