Restrict term length to never exceed Lucene's fixed term length
Change-Id: Icc3be552e95ca15967b544168e0c3be4d533d00f
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index c83d87d..8f931d5 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -24,6 +24,7 @@
private final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
private String info = "usage: Krill indexer";
private File outputDirectory = new File("test-index");
+ private File outputDirectory2 = new File("test-index2");
@Test
public void testArguments () throws IOException {
@@ -91,6 +92,17 @@
assertEquals(true, outputStream.toString().startsWith(info));
}
+ @Test
+ public void testUnicodeProblem () throws IOException {
+ Indexer.main(new String[] {
+ "-c", "src/test/resources/krill.properties",
+ "-i", "src/test/resources/bug",
+ "-o", "test-index2"
+ });
+ logger.info(outputStream.toString());
+ assertEquals(outputStream.toString(), "Added 1 file.\n");
+ }
+
@Before
public void setOutputStream () {
System.setOut(new PrintStream(outputStream));
@@ -107,6 +119,11 @@
if (outputDirectory.exists()) {
logger.debug("Output directory exists");
deleteFile(outputDirectory);
+ deleteFile(outputDirectory2);
+ }
+ if (outputDirectory2.exists()) {
+ logger.debug("Output directory 2 exists");
+ deleteFile(outputDirectory2);
}
}
diff --git a/src/test/resources/bug/BSP-2013-01-32-longatt.json.gz b/src/test/resources/bug/BSP-2013-01-32-longatt.json.gz
new file mode 100644
index 0000000..9982a53
--- /dev/null
+++ b/src/test/resources/bug/BSP-2013-01-32-longatt.json.gz
Binary files differ