Rename tokenizer class to KorAPDFATokenizer
Change-Id: Icad8af7582689f599eda27258cc397e6155aeefe
diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/KorAPTokenizer.java b/src/main/java/de/ids_mannheim/korap/tokenizer/KorAPTokenizer.java
index f5fa9cb..c754917 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/KorAPTokenizer.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/KorAPTokenizer.java
@@ -57,12 +57,12 @@
}
for (int i = 0; i < inputFiles.size() || (i == 0 && inputFiles.size() == 0); i++) {
- KorAPTokenizerImpl scanner = null;
+ KorAPDFATokenizer scanner = null;
String fn = (inputFiles.size() > 0 ? inputFiles.get(i) : "-");
try {
BufferedReader br = "-".equals(fn) ? new BufferedReader(new InputStreamReader(System.in)) :
new BufferedReader(new FileReader(fn));
- scanner = new KorAPTokenizerImpl(br, output_stream, true, tokens, sentencize, positions, ktt, normalize);
+ scanner = new KorAPDFATokenizer(br, output_stream, true, tokens, sentencize, positions, ktt, normalize);
scanner.scanThrough();
} catch (FileNotFoundException e) {
System.err.println("File not found : \"" + fn + "\"");
diff --git a/src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPTokenizerImpl.jflex b/src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPDFATokenizer.jflex
similarity index 99%
rename from src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPTokenizerImpl.jflex
rename to src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPDFATokenizer.jflex
index 85bcadc..a79d44b 100644
--- a/src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPTokenizerImpl.jflex
+++ b/src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPDFATokenizer.jflex
@@ -61,7 +61,7 @@
* ... which is ...
* Based on Lucene's StandardTokenizerImpl, but heavily modified.
*/
-%class KorAPTokenizerImpl
+%class KorAPDFATokenizer
%unicode
%public
%implements opennlp.tools.tokenize.Tokenizer, opennlp.tools.sentdetect.SentenceDetector
@@ -84,11 +84,11 @@
private boolean tokens = false;
private PrintStream outputStream = System.out;
- public KorAPTokenizerImpl() {
+ public KorAPDFATokenizer() {
this.zzReader = null;
}
- public KorAPTokenizerImpl(java.io.Reader in, PrintStream outputStream, boolean echo, boolean tokens, boolean sentencize, boolean positions, boolean xmlEcho, boolean normalize) {
+ public KorAPDFATokenizer(java.io.Reader in, PrintStream outputStream, boolean echo, boolean tokens, boolean sentencize, boolean positions, boolean xmlEcho, boolean normalize) {
this.zzReader = in;
if (outputStream != null)
this.outputStream = outputStream;
diff --git a/src/test/java/de/ids_mannheim/korap/tokenizer/SentenceSplitterTest.java b/src/test/java/de/ids_mannheim/korap/tokenizer/SentenceSplitterTest.java
index cc5d4a5..59a181a 100644
--- a/src/test/java/de/ids_mannheim/korap/tokenizer/SentenceSplitterTest.java
+++ b/src/test/java/de/ids_mannheim/korap/tokenizer/SentenceSplitterTest.java
@@ -12,21 +12,21 @@
@Test
public void testSentSplitterSimple () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Der alte Mann.");
assertEquals(sentences.length, 1);
}
@Test
public void testSentSplitterAbbr () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Der Vorsitzende der Abk. hat gewählt.");
assertEquals(sentences.length, 1);
}
@Test
public void testSentSplitterHost1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Gefunden auf wikipedia.org.");
assertEquals(sentences.length, 1);
}
@@ -34,14 +34,14 @@
@Test
@Ignore
public void testSentSplitterHost2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Gefunden auf www.wikipedia.org");
assertEquals(sentences.length, 1);
}
@Test
public void testSentSplitterEmail1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Ich bin unter korap@ids-mannheim.de erreichbar.");
assertEquals(sentences.length, 1);
}
@@ -49,28 +49,28 @@
@Test
public void testSentSplitterWeb1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Unsere Website ist https://korap.ids-mannheim.de/?q=Baum");
assertEquals(sentences.length, 1);
}
@Test
public void testSentSplitterServer () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Unser Server ist 10.0.10.51.");
assertEquals(sentences.length, 1);
}
@Test
public void testSentSplitterNum () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Zu 50.4% ist es sicher");
assertEquals(sentences.length, 1);
}
@Test
public void testSentSplitterDate () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Der Termin ist am 5.9.2018");
assertEquals(sentences.length, 1);
}
@@ -78,14 +78,14 @@
@Test
// Probably interpreted as HOST
public void testSentSplitterFileExtension1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Ich habe die readme.txt heruntergeladen");
assertEquals(sentences.length, 1);
}
@Test
public void testSentMultiMarker () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("Ausschalten!!! Hast Du nicht gehört???");
assertEquals("Ausschalten!!!", sentences[0]);
assertEquals("Hast Du nicht gehört???", sentences[1]);
@@ -95,7 +95,7 @@
@Test
@Ignore
public void testSentSplitterQuote () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] sentences = tok.sentDetect("\"Ausschalten!!!\", sagte er. \"Hast Du nicht gehört???\"");
assertEquals("\"Ausschalten!!!\", sagte er.", sentences[0]);
assertEquals("\"Hast Du nicht gehört???\"", sentences[1]);
diff --git a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerCoverTest.java b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerCoverTest.java
index b635ee5..a2da7fb 100644
--- a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerCoverTest.java
+++ b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerCoverTest.java
@@ -116,7 +116,7 @@
* with the gold standard and return the sum of
* levenshtein distances.
*/
- public int distanceToGoldStandard (KorAPTokenizerImpl tok, String suite, String postings) {
+ public int distanceToGoldStandard (KorAPDFATokenizer tok, String suite, String postings) {
// Load raw postings
EmpiristScanner esRaw = new EmpiristScanner(
@@ -159,7 +159,7 @@
public void testTokenizerCoverEmpiristCmc () {
// Create tokenizer object
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String test = "cmc_test_blog_comment";
int dist = distanceToGoldStandard(tok, "test_cmc", test);
@@ -191,7 +191,7 @@
public void testTokenizerCoverEmpiristWeb () {
// Create tokenizer object
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String test = "web_test_001";
int dist = distanceToGoldStandard(tok, "test_web", test);
diff --git a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
index fe694be..bbe8080 100644
--- a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
+++ b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
@@ -16,7 +16,7 @@
@Test
public void testTokenizerSimple () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Der alte Mann");
assertEquals(tokens[0], "Der");
assertEquals(tokens[1], "alte");
@@ -34,7 +34,7 @@
@Test
@Ignore
public void testTokenizerAbbr () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Der Vorsitzende der F.D.P. hat gewählt");
assertEquals(tokens[0], "Der");
assertEquals(tokens[1], "Vorsitzende");
@@ -47,7 +47,7 @@
@Test
public void testTokenizerHost1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Gefunden auf wikipedia.org");
assertEquals(tokens[0], "Gefunden");
assertEquals(tokens[1], "auf");
@@ -58,7 +58,7 @@
@Test
@Ignore
public void testTokenizerHost2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Gefunden auf www.wikipedia.org");
assertEquals(tokens[0], "Gefunden");
assertEquals(tokens[1], "auf");
@@ -68,7 +68,7 @@
@Test
public void testTokenizerDash () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Das war -- spitze");
assertEquals(tokens[0], "Das");
assertEquals(tokens[1], "war");
@@ -79,7 +79,7 @@
@Test
public void testTokenizerEmail1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Ich bin unter korap@ids-mannheim.de erreichbar.");
assertEquals(tokens[0], "Ich");
assertEquals(tokens[1], "bin");
@@ -92,7 +92,7 @@
@Test
public void testTokenizerEmail2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Oder unter korap[at]ids-mannheim[dot]de.");
assertEquals(tokens[0], "Oder");
assertEquals(tokens[1], "unter");
@@ -104,7 +104,7 @@
@Test
@Ignore
public void testTokenizerEmail3 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Oder unter korap(at)ids-mannheim(dot)de.");
assertEquals(tokens[0], "Oder");
assertEquals(tokens[1], "unter");
@@ -115,7 +115,7 @@
@Test
public void testTokenizerDoNotAcceptQuotedEmailNames () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("\"John Doe\"@xx.com");
assertEquals("\"", tokens[0]);
assertEquals("John", tokens[1]);
@@ -129,7 +129,7 @@
@Test
public void testTokenizerTwitter () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Folgt @korap und #korap");
assertEquals(tokens[0], "Folgt");
assertEquals(tokens[1], "@korap");
@@ -140,7 +140,7 @@
@Test
public void testTokenizerWeb1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Unsere Website ist https://korap.ids-mannheim.de/?q=Baum");
assertEquals(tokens[0], "Unsere");
assertEquals(tokens[1], "Website");
@@ -152,7 +152,7 @@
@Test
@Ignore
public void testTokenizerWeb2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Wir sind auch im Internet (https://korap.ids-mannheim.de/?q=Baum)");
assertEquals(tokens[0], "Wir");
assertEquals(tokens[1], "sind");
@@ -168,7 +168,7 @@
@Test
@Ignore
public void testTokenizerWeb3 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Die Adresse ist https://korap.ids-mannheim.de/?q=Baum.");
assertEquals(tokens[0], "Die");
assertEquals(tokens[1], "Adresse");
@@ -180,7 +180,7 @@
@Test
public void testTokenizerServer () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Unser Server ist 10.0.10.51.");
assertEquals(tokens[0], "Unser");
assertEquals(tokens[1], "Server");
@@ -192,7 +192,7 @@
@Test
public void testTokenizerNum () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Zu 50,4% ist es sicher");
assertEquals(tokens[0], "Zu");
assertEquals(tokens[1], "50,4");
@@ -205,7 +205,7 @@
@Test
public void testTokenizerDate () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Der Termin ist am 5.9.2018");
assertEquals(tokens[0], "Der");
assertEquals(tokens[1], "Termin");
@@ -226,7 +226,7 @@
@Test
@Ignore
public void testTokenizerDateRange () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Der Termin war vom 4.-5.9.2018");
assertEquals(tokens[0], "Der");
assertEquals(tokens[1], "Termin");
@@ -240,7 +240,7 @@
@Test
public void testTokenizerEmoji1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Das ist toll! ;)");
assertEquals(tokens[0], "Das");
assertEquals(tokens[1], "ist");
@@ -252,7 +252,7 @@
@Test
public void testTokenizerRef1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Kupietz und Schmidt (2018): Korpuslinguistik");
assertEquals(tokens[0], "Kupietz");
assertEquals(tokens[1], "und");
@@ -267,7 +267,7 @@
@Test
public void testTokenizerRef2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Kupietz und Schmidt [2018]: Korpuslinguistik");
assertEquals(tokens[0], "Kupietz");
assertEquals(tokens[1], "und");
@@ -282,7 +282,7 @@
@Test
public void testTokenizerOmission1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Er ist ein A****loch!");
assertEquals(tokens[0], "Er");
assertEquals(tokens[1], "ist");
@@ -294,7 +294,7 @@
@Test
public void testTokenizerOmission2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("F*ck!");
assertEquals(tokens[0], "F*ck");
assertEquals(tokens[1], "!");
@@ -303,7 +303,7 @@
@Test
public void testTokenizerOmission3 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Dieses verf***** Kleid!");
assertEquals(tokens[0], "Dieses");
assertEquals(tokens[1], "verf*****");
@@ -315,7 +315,7 @@
@Test
// Probably interpreted as HOST
public void testTokenizerFileExtension1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Ich habe die readme.txt heruntergeladen");
assertEquals(tokens[0], "Ich");
assertEquals(tokens[1], "habe");
@@ -328,7 +328,7 @@
@Test
// Probably interpreted as HOST
public void testTokenizerFileExtension2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Nimm die README.TXT!");
assertEquals(tokens[0], "Nimm");
assertEquals(tokens[1], "die");
@@ -340,7 +340,7 @@
@Test
// Probably interpreted as HOST
public void testTokenizerFileExtension3 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Zeig mir profile.jpeg");
assertEquals(tokens[0], "Zeig");
assertEquals(tokens[1], "mir");
@@ -350,7 +350,7 @@
@Test
public void testTokenizerFile1 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Zeig mir c:\\Dokumente\\profile.docx");
assertEquals(tokens[0], "Zeig");
assertEquals(tokens[1], "mir");
@@ -360,7 +360,7 @@
@Test
public void testTokenizerFile2 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Gehe zu /Dokumente/profile.docx");
assertEquals(tokens[0], "Gehe");
assertEquals(tokens[1], "zu");
@@ -371,7 +371,7 @@
@Test
@Ignore
public void testTokenizerFile3 () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Zeig mir c:\\Dokumente\\profile.jpeg");
assertEquals(tokens[0], "Zeig");
assertEquals(tokens[1], "mir");
@@ -381,7 +381,7 @@
@Test
public void testTokenizerPunct () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Er sagte: \"Es geht mir gut!\", daraufhin ging er.");
assertEquals(tokens[0], "Er");
assertEquals(tokens[1], "sagte");
@@ -403,7 +403,7 @@
@Test
public void testTokenizerPlusAmpersand () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize(""Das ist von C&A!"");
assertEquals(tokens[0], """);
assertEquals(tokens[1], "Das");
@@ -417,7 +417,7 @@
@Test
public void testTokenizerLongEnd () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Siehst Du?!!?");
assertEquals(tokens[0], "Siehst");
assertEquals(tokens[1], "Du");
@@ -427,7 +427,7 @@
@Test
public void testTokenizerIrishO () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Peter O'Toole");
assertEquals(tokens[0], "Peter");
assertEquals(tokens[1], "O'Toole");
@@ -436,7 +436,7 @@
@Test
public void testTokenizerAbr () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Früher bzw. später ...");
assertEquals(tokens[0], "Früher");
assertEquals(tokens[1], "bzw.");
@@ -448,7 +448,7 @@
@Test
@Ignore
public void testTokenizerUppercaseRule () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Es war spät.Morgen ist es früh.");
assertEquals(tokens[0], "Es");
assertEquals(tokens[1], "war");
@@ -464,7 +464,7 @@
@Test
public void testTokenizerOrd () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Sie erreichte den 1. Platz!");
assertEquals(tokens[0], "Sie");
assertEquals(tokens[1], "erreichte");
@@ -477,7 +477,7 @@
@Test
public void testNoZipOuputArchive () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
String[] tokens = tok.tokenize("Archive: Ich bin kein zip\n");
assertEquals(tokens[0], "Archive");
assertEquals(tokens[1], ":");
@@ -490,7 +490,7 @@
@Test
public void testZipOuputArchive () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl();
+ KorAPDFATokenizer tok = new KorAPDFATokenizer();
final ByteArrayOutputStream clearOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(clearOut));
String[] tokens = tok.tokenize("Archive: ich/bin/ein.zip\n");
@@ -499,7 +499,7 @@
@Test
public void testTextBreakOutputArchive () {
- KorAPTokenizerImpl tok = new KorAPTokenizerImpl(null, null, false, false, false, true, false, false);
+ KorAPDFATokenizer tok = new KorAPDFATokenizer(null, null, false, false, false, true, false, false);
Span[] tokens = tok.tokenizePos("Text1\004\nText2 Hallo\004Rumsdibums\004Das freut mich sehr.\n");
assertEquals("Text1", tokens[0].getType());
assertEquals(tokens.length, 9 );
diff --git a/target/generated-sources/jflex/de/ids_mannheim/korap/tokenizer/KorAPTokenizerImpl.java b/target/generated-sources/jflex/de/ids_mannheim/korap/tokenizer/KorAPDFATokenizer.java
similarity index 99%
rename from target/generated-sources/jflex/de/ids_mannheim/korap/tokenizer/KorAPTokenizerImpl.java
rename to target/generated-sources/jflex/de/ids_mannheim/korap/tokenizer/KorAPDFATokenizer.java
index 0c47090..7b45aa1 100644
--- a/target/generated-sources/jflex/de/ids_mannheim/korap/tokenizer/KorAPTokenizerImpl.java
+++ b/target/generated-sources/jflex/de/ids_mannheim/korap/tokenizer/KorAPDFATokenizer.java
@@ -1,6 +1,6 @@
// DO NOT EDIT
// Generated by JFlex 1.8.2 http://jflex.de/
-// source: src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPTokenizerImpl.jflex
+// source: src/main/jflex/de/ids_mannheim/korap/tokenizer/KorAPDFATokenizer.jflex
package de.ids_mannheim.korap.tokenizer;
/**
@@ -61,7 +61,7 @@
// See https://github.com/jflex-de/jflex/issues/222
@SuppressWarnings("FallThrough")
-public class KorAPTokenizerImpl implements opennlp.tools.tokenize.Tokenizer, opennlp.tools.sentdetect.SentenceDetector {
+public class KorAPDFATokenizer implements opennlp.tools.tokenize.Tokenizer, opennlp.tools.sentdetect.SentenceDetector {
/** This character denotes the end of file. */
public static final int YYEOF = -1;
@@ -34151,11 +34151,11 @@
private boolean tokens = false;
private PrintStream outputStream = System.out;
- public KorAPTokenizerImpl() {
+ public KorAPDFATokenizer() {
this.zzReader = null;
}
- public KorAPTokenizerImpl(java.io.Reader in, PrintStream outputStream, boolean echo, boolean tokens, boolean sentencize, boolean positions, boolean xmlEcho, boolean normalize) {
+ public KorAPDFATokenizer(java.io.Reader in, PrintStream outputStream, boolean echo, boolean tokens, boolean sentencize, boolean positions, boolean xmlEcho, boolean normalize) {
this.zzReader = in;
if (outputStream != null)
this.outputStream = outputStream;
@@ -34368,7 +34368,7 @@
*
* @param in the java.io.Reader to read input from.
*/
- public KorAPTokenizerImpl(java.io.Reader in) {
+ public KorAPDFATokenizer(java.io.Reader in) {
this.zzReader = in;
}