Replace recursion and non-essential regexes with index/substr
(as a very minor performance improvement) and fix a bug
where a single-line regex would ignore the newline symbol.

Change-Id: I573b50b85b7dd2732c2cad3f50e22b0e9e33e2ea
diff --git a/lib/KorAP/XML/TEI.pm b/lib/KorAP/XML/TEI.pm
index 7a2174d..ac63ba8 100644
--- a/lib/KorAP/XML/TEI.pm
+++ b/lib/KorAP/XML/TEI.pm
@@ -9,31 +9,42 @@
   # sometimes is not desirable (e.g.: '...<!-- comment -->\n<w>token</w>...' would lead to '... <w>token</w>...' in $buf_in).
   # removing comments before processing the line, prevents this situation.
 
-  my ( $pfx, $sfx ) = ('','');
+  my $pfx = '';
+  my $i = 0;
 
  CHECK:
 
-  while ( $html =~ s/<!--.*?-->//g ){}; # remove all comments in actual line
+  $html =~ s/<!--.*?-->//g; # remove all comments in actual line
 
-  if ( $html =~ /^(.*)<!--/ && $html !~ /-->/ ){ # remove comment spanning over several lines
+  # Remove comment spanning over several lines
+  # No closing comment found
+  if ( index($html, '-->') == -1) {
 
-    $pfx = $1;
+    # Opening comment found
+    $i = index($html, '<!--');
+    if ($i != -1) {
+      $pfx = substr($html, 0, $i);
 
-    while ( $html = <$fh> ){
+      # Consume all lines until the closing comment is found
+      while ( $html = <$fh> ){
 
-      if ( $html =~ /-->(.*)$/ ){
-        $sfx = $1; last
+        $i = index($html, '-->');
+        if ($i != -1){
+          $html = substr($html, $i + 3);
+          last;
+        }
+
       }
 
+      $html = $pfx . ($html // '');
+      goto CHECK;
     }
-
-    $html = "$pfx$sfx";
-    goto CHECK;
   }
 
-  if ( $html =~ s/^\s*$// ){ # get next line and feed it also to this sub, if actual line is empty or only contains whitespace
+  if ( $html =~ /^\s*$/ ){ # get next line and feed it also to this sub, if actual line is empty or only contains whitespace
 
-    $html = <$fh>; delHTMLcom ( $fh, $html );
+    $html = <$fh>;
+    goto CHECK;
   }
 
   return $html
diff --git a/t/tei.t b/t/tei.t
index fcfd32e..5022478 100644
--- a/t/tei.t
+++ b/t/tei.t
@@ -38,7 +38,7 @@
 
 seek($fh, 0, 0);
 
-is(KorAP::XML::TEI::delHTMLcom($fh, 'Dies <!--'), "Dies ist  ein Test");
+is(KorAP::XML::TEI::delHTMLcom($fh, 'Dies <!--'), "Dies ist  ein Test\n");
 
 close($fh);