Broken commit: Prepending in TextPrependTokenizer

Change-Id: Id84d258a433eb536239262682d7d2f4000caf077
diff --git a/pom.xml b/pom.xml
index 1094693..5b72473 100644
--- a/pom.xml
+++ b/pom.xml
@@ -196,19 +196,23 @@
 			annotation <dependency> <groupId>KorapAnnotationProcessor</groupId> <artifactId>KorapAnnotationProcessor</artifactId> 
 			<version>0.0.1-SNAPSHOT</version> <scope>compile</scope> </dependency> -->
 
-		<!-- Some language extensions like StringUtil -->
+		<!-- Some language extensions like StringUtil, IOUtils ... -->
 		<dependency>
 			<groupId>commons-lang</groupId>
 			<artifactId>commons-lang</artifactId>
 			<version>2.3</version>
 		</dependency>
-
 		<dependency>
 			<groupId>commons-cli</groupId>
 			<artifactId>commons-cli</artifactId>
 			<version>1.3.1</version>
 		</dependency>
-	</dependencies>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>1.3.2</version>
+    </dependency>
+  </dependencies>
 
 	<build>
 		<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
diff --git a/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java b/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
index 7ea159c..5875c3e 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
@@ -23,16 +23,8 @@
     protected TokenStreamComponents createComponents (final String fieldName) {
 		final Tokenizer source = new TextPrependTokenizer();
         TokenStream sink = new LowerCaseFilter(source);
-		// sink = new TextTokenFilter(sink);
+		sink = new TextTokenFilter(sink);
 		// source.setVerbatim(this.verbatim);
 		return new TokenStreamComponents(source, sink);
     };
-
-
-	// Set verbatim
-	/*
-	public void setVerbatim (String value) {
-		this.verbatim = value;
-	}
-	*/
 };
diff --git a/src/main/java/de/ids_mannheim/korap/index/TextPrependTokenizer.java b/src/main/java/de/ids_mannheim/korap/index/TextPrependTokenizer.java
index 6747584..0613d2c 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TextPrependTokenizer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TextPrependTokenizer.java
@@ -5,14 +5,16 @@
 // but prepends a verbatim string to the TokenStream
 
 import java.io.IOException;
+import java.io.Reader;
 
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-//  de.ids_mannheim.korap.index.VerbatimAttr
 import org.apache.lucene.analysis.standard.StandardTokenizerImpl;
-
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.commons.io.IOUtils;
+import java.io.StringReader;
 
 
 public final class TextPrependTokenizer extends Tokenizer {
@@ -37,6 +39,14 @@
 	}
 
 	private void init() {
+		/*try {
+			System.err.println(IOUtils.toString(reader));
+		}
+		catch (IOException io) {
+			System.err.println("Exception: " + io);
+		};
+		System.err.println(input.reset());
+		*/
 		this.scanner = new StandardTokenizerImpl(input);
 		this.init = true;
 	}
diff --git a/src/main/java/de/ids_mannheim/korap/index/TextTokenFilter.java b/src/main/java/de/ids_mannheim/korap/index/TextTokenFilter.java
index 77f8d86..c86b13f 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TextTokenFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TextTokenFilter.java
@@ -25,13 +25,13 @@
   
 	@Override
 	public final boolean incrementToken() throws IOException {
+
 		// Prepend verbatim string
-		if (this.initTerm && this.verbatim != null) {
+		if (this.initTerm) {
 			clearAttributes();
-			termAtt.append(this.verbatim);
+			termAtt.append("[PREPEND2]");
             posIncrAtt.setPositionIncrement(10000);
 			this.initTerm = false;
-			this.verbatim = null;
 			return true;
 		};
 
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index a72033e..b7fba53 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -312,6 +312,8 @@
         ts.reset();
 
         ts.incrementToken();
+        assertEquals("[PREPEND2]", charTermAttribute.toString());
+        ts.incrementToken();
         assertEquals("[prepend]", charTermAttribute.toString());
         ts.incrementToken();
         assertEquals("der", charTermAttribute.toString());
@@ -844,7 +846,7 @@
         fd.addString("author", "Frank");
         fd.addKeyword("textClass", "Nachricht Kultur Reisen");
         fd.addInt("pubDate", 20051210);
-        fd.addText("text", "Der alte Mann ging über die Straße");
+        fd.addText("text", "Der alte  Mann ging über die Straße");
         return fd;
     };