Restrict term length to never exceed Lucene's fixed term length

Change-Id: Icc3be552e95ca15967b544168e0c3be4d533d00f
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index c83d87d..8f931d5 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -24,6 +24,7 @@
     private final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

     private String info = "usage: Krill indexer";

     private File outputDirectory = new File("test-index");

+    private File outputDirectory2 = new File("test-index2");

 

     @Test

     public void testArguments () throws IOException {

@@ -91,6 +92,17 @@
         assertEquals(true, outputStream.toString().startsWith(info));

     }

 

+    @Test

+    public void testUnicodeProblem () throws IOException {

+        Indexer.main(new String[] {

+                "-c", "src/test/resources/krill.properties",

+                "-i", "src/test/resources/bug",

+                "-o", "test-index2"

+            });

+        logger.info(outputStream.toString());

+        assertEquals(outputStream.toString(), "Added 1 file.\n");

+    }

+

     @Before

     public void setOutputStream () {

         System.setOut(new PrintStream(outputStream));

@@ -107,6 +119,11 @@
         if (outputDirectory.exists()) {

             logger.debug("Output directory exists");

             deleteFile(outputDirectory);

+            deleteFile(outputDirectory2);

+        }

+        if (outputDirectory2.exists()) {

+            logger.debug("Output directory 2 exists");

+            deleteFile(outputDirectory2);

         }

     }

 

diff --git a/src/test/resources/bug/BSP-2013-01-32-longatt.json.gz b/src/test/resources/bug/BSP-2013-01-32-longatt.json.gz
new file mode 100644
index 0000000..9982a53
--- /dev/null
+++ b/src/test/resources/bug/BSP-2013-01-32-longatt.json.gz
Binary files differ