Minor fix for subspan deserialization
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
index 14da689..b568fbe 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTerm.java
@@ -16,107 +16,106 @@
public class TestMultiTerm {
@Test
public void multiTermSimple () {
- MultiTerm mt = new MultiTerm("test");
- assertEquals(mt.term, "test");
- assertNull(mt.payload);
- assertEquals(mt.start, 0);
- assertEquals(mt.end, 0);
+ MultiTerm mt = new MultiTerm("test");
+ assertEquals(mt.term, "test");
+ assertNull(mt.payload);
+ assertEquals(mt.start, 0);
+ assertEquals(mt.end, 0);
};
@Test
public void multiTermPayload () {
- MultiTerm mt = new MultiTerm("test$5");
- assertEquals("test", mt.term);
- assertEquals(new BytesRef("5"), mt.payload);
- assertEquals(mt.start, 0);
- assertEquals(mt.end, 0);
+ MultiTerm mt = new MultiTerm("test$5");
+ assertEquals("test", mt.term);
+ assertEquals(new BytesRef("5"), mt.payload);
+ assertEquals(mt.start, 0);
+ assertEquals(mt.end, 0);
};
@Test
public void multiTermOffset () {
- MultiTerm mt = new MultiTerm("versuch#2-34");
- assertEquals(mt.term, "versuch");
- assertNull(mt.payload);
- assertEquals(mt.start, 2);
- assertEquals(mt.end, 34);
+ MultiTerm mt = new MultiTerm("versuch#2-34");
+ assertEquals(mt.term, "versuch");
+ assertNull(mt.payload);
+ assertEquals(mt.start, 2);
+ assertEquals(mt.end, 34);
};
@Test
public void multiTermOffsetPayload () {
- MultiTerm mt = new MultiTerm("example#6-42$hihi");
- assertEquals(mt.term, "example");
- assertEquals(new BytesRef("hihi"), mt.payload);
- assertEquals(mt.start,6);
- assertEquals(mt.end, 42);
+ MultiTerm mt = new MultiTerm("example#6-42$hihi");
+ assertEquals(mt.term, "example");
+ assertEquals(new BytesRef("hihi"), mt.payload);
+ assertEquals(mt.start,6);
+ assertEquals(mt.end, 42);
};
@Test
public void multiTermString () {
- MultiTerm mt = new MultiTerm("example#6-42$hihi");
- assertEquals("example#6-42$hihi", mt.toString());
- mt.term = "spassmacher";
- assertEquals("spassmacher#6-42$hihi", mt.toString());
+ MultiTerm mt = new MultiTerm("example#6-42$hihi");
+ assertEquals("example#6-42$hihi", mt.toString());
+ mt.term = "spassmacher";
+ assertEquals("spassmacher#6-42$hihi", mt.toString());
};
@Test
public void multiTermStringPayloadType () {
- MultiTerm mt = new MultiTerm("example$<i>4000");
- assertEquals("example$<?>[0,0,f,a0]", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<i>4000");
+ assertEquals("example$<?>[0,0,f,a0]", mt.toString());
- mt = new MultiTerm("example$<l>757574643438");
- assertEquals("example$<?>[0,0,0,b0,62,f7,ae,ee]", mt.toString());
+ mt = new MultiTerm("example$<l>757574643438");
+ assertEquals("example$<?>[0,0,0,b0,62,f7,ae,ee]", mt.toString());
};
-
+
@Test
public void multiTermStringPayloadType2 () {
- MultiTerm mt = new MultiTerm();
- mt.setTerm("beispiel");
- mt.setStart(40);
- assertEquals(mt.getStart(), mt.start);
- mt.setEnd(50);
- assertEquals(mt.getEnd(), mt.end);
- mt.setPayload((int) 4000);
- assertEquals("beispiel#40-50$<?>[0,0,f,a0]", mt.toString());
+ MultiTerm mt = new MultiTerm();
+ mt.setTerm("beispiel");
+ mt.setStart(40);
+ assertEquals(mt.getStart(), mt.start);
+ mt.setEnd(50);
+ assertEquals(mt.getEnd(), mt.end);
+ mt.setPayload((int) 4000);
+ assertEquals("beispiel#40-50$<?>[0,0,f,a0]", mt.toString());
};
@Test
public void multiTermStringPayloadType3 () {
- MultiTerm mt = new MultiTerm("example$<b>120");
- assertEquals("example$x", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<b>120");
+ assertEquals("example$x", mt.toString());
};
@Test
public void multiTermStringPayloadType4 () {
- MultiTerm mt = new MultiTerm("example$<i>420<b>120");
- assertEquals("example$<?>[0,0,1,a4,78]", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<i>420<b>120");
+ assertEquals("example$<?>[0,0,1,a4,78]", mt.toString());
};
-
@Test
public void multiTermStringPayloadType5 () {
- MultiTerm mt = new MultiTerm("example$<i>4000");
- assertEquals("example$<?>[0,0,f,a0]", mt.toString());
+ MultiTerm mt = new MultiTerm("example$<i>4000");
+ assertEquals("example$<?>[0,0,f,a0]", mt.toString());
- mt = new MultiTerm("example$<i>4000<b>120");
- assertEquals("example$<?>[0,0,f,a0,78]", mt.toString());
+ mt = new MultiTerm("example$<i>4000<b>120");
+ assertEquals("example$<?>[0,0,f,a0,78]", mt.toString());
- mt = new MultiTerm("example$<l>4000<b>120");
- assertEquals("example$<?>[0,0,0,0,0,0,f,a0,78]", mt.toString());
+ mt = new MultiTerm("example$<l>4000<b>120");
+ assertEquals("example$<?>[0,0,0,0,0,0,f,a0,78]", mt.toString());
};
@Test
public void multiTermStringFail () {
- MultiTerm mt = new MultiTerm("example#56-66");
- assertEquals(56, mt.getStart());
- assertEquals(66,mt.getEnd());
+ MultiTerm mt = new MultiTerm("example#56-66");
+ assertEquals(56, mt.getStart());
+ assertEquals(66,mt.getEnd());
- mt = new MultiTerm("example#56-66$<i>a");
- assertEquals(56, mt.getStart());
- assertEquals(66, mt.getEnd());
+ mt = new MultiTerm("example#56-66$<i>a");
+ assertEquals(56, mt.getStart());
+ assertEquals(66, mt.getEnd());
- mt = new MultiTerm("example#56$<i>a");
- assertEquals(mt.getPayload(), null);
- assertEquals(mt.getStart(), 0);
- assertEquals(mt.getEnd(), 0);
+ mt = new MultiTerm("example#56$<i>a");
+ assertEquals(mt.getPayload(), null);
+ assertEquals(mt.getStart(), 0);
+ assertEquals(mt.getEnd(), 0);
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
index c3ab4c0..f88b194 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
@@ -14,27 +14,34 @@
@RunWith(JUnit4.class)
public class TestMultiTermToken {
+
@Test
public void multiTermTokenSimple () {
- MultiTermToken mtt = new MultiTermToken("t:test", "a:abbruch");
- assertEquals("[t:test|a:abbruch]", mtt.toString());
- mtt.add("b:banane");
- assertEquals("[t:test|a:abbruch|b:banane]", mtt.toString());
- mtt.add("c:chaos#21-26");
- assertEquals("[(21-26)t:test|a:abbruch|b:banane|c:chaos#21-26]", mtt.toString());
- mtt.add("d:dadaismus#21-28$vergleich");
- assertEquals("[(21-28)t:test|a:abbruch|b:banane|c:chaos#21-26|d:dadaismus#21-28$vergleich]", mtt.toString());
+ MultiTermToken mtt = new MultiTermToken("t:test", "a:abbruch");
+ assertEquals("[t:test|a:abbruch]", mtt.toString());
+ mtt.add("b:banane");
+ assertEquals("[t:test|a:abbruch|b:banane]", mtt.toString());
+ mtt.add("c:chaos#21-26");
+ assertEquals("[(21-26)t:test|a:abbruch|b:banane|c:chaos#21-26]",
+ mtt.toString());
+ mtt.add("d:dadaismus#21-28$vergleich");
+ assertEquals(
+ "[(21-28)t:test|a:abbruch|b:banane|c:chaos#21-26|" +
+ "d:dadaismus#21-28$vergleich]",
+ mtt.toString()
+ );
};
@Test
public void multiTermTokenOffsets () {
- MultiTermToken mtt = new MultiTermToken("t:test#23-27");
- assertEquals("[(23-27)t:test#23-27]", mtt.toString());
- mtt.add("b:baum#34-45");
- assertEquals("[(23-45)t:test#23-27|b:baum#34-45]", mtt.toString());
- mtt.add("c:cannonball#34-45$tatsache");
- assertEquals("[(23-45)t:test#23-27|b:baum#34-45|c:cannonball#34-45$tatsache]", mtt.toString());
- assertEquals(23, mtt.start);
- assertEquals(45, mtt.end);
+ MultiTermToken mtt = new MultiTermToken("t:test#23-27");
+ assertEquals("[(23-27)t:test#23-27]", mtt.toString());
+ mtt.add("b:baum#34-45");
+ assertEquals("[(23-45)t:test#23-27|b:baum#34-45]", mtt.toString());
+ mtt.add("c:cannonball#34-45$tatsache");
+ assertEquals("[(23-45)t:test#23-27|b:baum#34-45|" +
+ "c:cannonball#34-45$tatsache]", mtt.toString());
+ assertEquals(23, mtt.start);
+ assertEquals(45, mtt.end);
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
index c5b15c0..ff20192 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
Binary files differ
diff --git a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
index 6e85f27..1a28f7d 100644
--- a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
+++ b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
@@ -27,357 +27,353 @@
@Test
public void checkBenchmark1 () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
- int rounds = 100;
+ int rounds = 100;
- KorapResult kr = new KorapResult();
+ KorapResult kr = new KorapResult();
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- // assertEquals("TotalResults", 30751, kr.getTotalResults());
- assertEquals("TotalResults", kr.getTotalResults(), 4803739);
+ // assertEquals("TotalResults", 30751, kr.getTotalResults());
+ assertEquals("TotalResults", kr.getTotalResults(), 4803739);
- // System.err.println(kr.toJSON());
-
-
- // long seconds = (long) (t2 - t1 / 1000) % 60 ;
- double seconds = (double)(t2-t1) / 1000000000.0;
+ // long seconds = (long) (t2 - t1 / 1000) % 60 ;
+ double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
+ // System.out.println("It took " + seconds + " seconds");
- // 100 times:
- // 43,538 sec
- // 4.874
+ // 100 times:
+ // 43,538 sec
+ // 4.874
+
+ // 1000 times:
+ // 36.613 sec
- // 1000 times:
- // 36.613 sec
-
-
- // After refactoring
- // 100 times
- // 273.58114372 seconds
-
- // After intro of attributes
- // 100 times
- // 350.171506379 seconds
+ // After refactoring
+ // 100 times
+ // 273.58114372 seconds
+
+ // After intro of attributes
+ // 100 times
+ // 350.171506379 seconds
};
@Test
public void checkBenchmark2JSON () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark2.jsonld").getFile());
+
+ int rounds = 10000;
+
+ KorapResult kr = new KorapResult();
+ String result = new String("");
+
+ t1 = System.nanoTime();
+ double length = 0;
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ length += kr.toJsonString().length();
+ };
+ t2 = System.nanoTime();
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark2.jsonld").getFile());
+ // assertEquals("TotalResults", 30751, kr.getTotalResults());
- int rounds = 10000;
+ // System.err.println(kr.toJSON());
- KorapResult kr = new KorapResult();
- String result = new String("");
-
- t1 = System.nanoTime();
- double length = 0;
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- length += kr.toJsonString().length();
- };
- t2 = System.nanoTime();
-
- // assertEquals("TotalResults", 30751, kr.getTotalResults());
-
- // System.err.println(kr.toJSON());
-
- // long seconds = (long) (t2 - t1 / 1000) % 60 ;
- double seconds = (double)(t2-t1) / 1000000000.0;
+ // long seconds = (long) (t2 - t1 / 1000) % 60 ;
+ double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
+ // System.out.println("It took " + seconds + " seconds");
- // 10000 times:
- // 77.167124985 sec
+ // 10000 times:
+ // 77.167124985 sec
};
@Test
public void checkBenchmarkSentences () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark4.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark4.jsonld").getFile());
- int rounds = 10;
+ int rounds = 10;
- KorapResult kr = new KorapResult();
+ KorapResult kr = new KorapResult();
- t1 = System.nanoTime();
- double length = 0;
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ t1 = System.nanoTime();
+ double length = 0;
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- // System.err.println(kr.getMatch(0).toJSON());
+ // System.err.println(kr.getMatch(0).toJSON());
+
+ assertEquals("TotalResults1", kr.getTotalResults(), 4116282);
+ assertEquals("TotalResults2", kr.getTotalResults(), ki.numberOf("sentences"));
- assertEquals("TotalResults1", kr.getTotalResults(), 4116282);
- assertEquals("TotalResults2", kr.getTotalResults(), ki.numberOf("sentences"));
-
- double seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds");
- // 100 rounds
- // 56.253 secs
+ double seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds");
+ // 100 rounds
+ // 56.253 secs
};
@Test
public void checkBenchmarkClasses () throws IOException {
- // [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
+ // [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- // Without classes
- String json = getString(getClass().getResource("/queries/benchmark5-ohne.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ // Without classes
+ String json = getString(getClass().getResource("/queries/benchmark5-ohne.jsonld").getFile());
- int rounds = 2000;
+ int rounds = 2000;
+
+ KorapResult kr = new KorapResult();
- KorapResult kr = new KorapResult();
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ double seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds without classes");
- double seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds without classes");
+ t1 = 0;
+ t2 = 0;
+ // With classes
+ json = getString(getClass().getResource("/queries/benchmark5.jsonld").getFile());
+
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = 0;
- t2 = 0;
- // With classes
- json = getString(getClass().getResource("/queries/benchmark5.jsonld").getFile());
+ seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds with classes");
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ t1 = 0;
+ t2 = 0;
+ // With submatch
+ json = getString(getClass().getResource("/queries/benchmark5-submatch.jsonld").getFile());
- seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds with classes");
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = 0;
- t2 = 0;
- // With submatch
- json = getString(getClass().getResource("/queries/benchmark5-submatch.jsonld").getFile());
+ seconds = (double)(t2-t1) / 1000000000.0;
+
+ // System.out.println("It took " + seconds + " seconds with submatches");
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ /** HERE IS A BUG! */
+
+ // System.err.println(kr.toJsonString());
- seconds = (double)(t2-t1) / 1000000000.0;
-
- System.out.println("It took " + seconds + " seconds with submatches");
+ // System.err.println(kr.toJSON());
- /** HERE IS A BUG! */
-
- System.err.println(kr.toJsonString());
+ // System.err.println(kr.getMatch(3).getSnippetBrackets());
+ // 2000 rounds:
+ // It took 10.872934435 seconds without classes
+ // It took 22.581117396 seconds with classes
- // System.err.println(kr.toJSON());
+ // It took 10.703933598 seconds without classes
+ // It took 19.354674517 seconds with classes
- // System.err.println(kr.getMatch(3).getSnippetBrackets());
+ // It took 10.939948726 seconds without classes
+ // It took 16.998470662 seconds with classes
+ // It took 10.900975837 seconds without classes
+ // It took 14.902590949 seconds with classes
- // 2000 rounds:
- // It took 10.872934435 seconds without classes
- // It took 22.581117396 seconds with classes
+ // It took 10.365989238 seconds without classes
+ // It took 13.833405885 seconds with classes
- // It took 10.703933598 seconds without classes
- // It took 19.354674517 seconds with classes
-
- // It took 10.939948726 seconds without classes
- // It took 16.998470662 seconds with classes
-
- // It took 10.900975837 seconds without classes
- // It took 14.902590949 seconds with classes
-
- // It took 10.365989238 seconds without classes
- // It took 13.833405885 seconds with classes
-
- // It took 15.368675425 seconds without classes
- // It took 18.347603186 seconds with classes
- // It took 15.941057294 seconds with submatches
-
- // It took 15.241253549 seconds without classes
- // It took 17.30375624 seconds with classes
- // It took 15.367171254 seconds with submatches
+ // It took 15.368675425 seconds without classes
+ // It took 18.347603186 seconds with classes
+ // It took 15.941057294 seconds with submatches
+
+ // It took 15.241253549 seconds without classes
+ // It took 17.30375624 seconds with classes
+ // It took 15.367171254 seconds with submatches
};
-
-
-
@Test
public void checkBenchmarkIndexDocuments () throws IOException {
- long t1 = 0, t2 = 0;
+ long t1 = 0, t2 = 0;
- int rounds = 10;
+ int rounds = 10;
- ArrayList<String> docs = new ArrayList<String>(700);
+ ArrayList<String> docs = new ArrayList<String>(700);
- for (int a = 0; a < 50; a++) {
- for (String d : new String[] {"00001", "00002", "00003",
- "00004", "00005", "00006", "02439"}) {
- docs.add(d);
- };
- };
+ for (int a = 0; a < 50; a++) {
+ for (String d : new String[] {
+ "00001",
+ "00002",
+ "00003",
+ "00004",
+ "00005",
+ "00006",
+ "02439"}) {
+ docs.add(d);
+ };
+ };
- t1 = System.nanoTime();
- double length = 0;
- for (int i = 1; i <= rounds; i++) {
- // Construct index
- KorapIndex ki = new KorapIndex();
+ t1 = System.nanoTime();
+ double length = 0;
+ for (int i = 1; i <= rounds; i++) {
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+
+ // Indexing test files
+ for (String d : docs) {
+ FieldDocument fd = ki.addDocFile(
+ getClass().getResource("/wiki/" + d + ".json.gz").getFile(),
+ true
+ );
+ };
+ ki.commit();
+ };
+ t2 = System.nanoTime();
- // Indexing test files
- for (String d : docs) {
- FieldDocument fd = ki.addDocFile(
- getClass().getResource("/wiki/" + d + ".json.gz").getFile(),
- true
- );
- };
- ki.commit();
- };
- t2 = System.nanoTime();
+ double seconds = (double)(t2-t1) / 1000000000.0;
+ // System.out.println("It took " + seconds + " seconds");
- double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
-
- // 10 times / 350 docs:
- // 36.26158006 seconds
- // 32.52575097 seconds
- // 31.818091536 seconds
- // 32.055321123 seconds
- // 32.32125959 seconds
- // 31.726277979 seconds
- // 31.65826188 seconds
- // 31.287057537 seconds
+ // 10 times / 350 docs:
+ // 36.26158006 seconds
+ // 32.52575097 seconds
+ // 31.818091536 seconds
+ // 32.055321123 seconds
+ // 32.32125959 seconds
+ // 31.726277979 seconds
+ // 31.65826188 seconds
+ // 31.287057537 seconds
};
@Test
public void checkBenchmark3 () throws IOException {
- Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
- prop.load(fr);
+ Properties prop = new Properties();
+ InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
- // Get the real index
- KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
- // Create a container for virtual collections:
- KorapCollection kc = new KorapCollection(ki);
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
- long t1 = 0, t2 = 0;
- /// cosmas20.json!!!
- String json = getString(getClass().getResource("/queries/benchmark3.jsonld").getFile());
+ long t1 = 0, t2 = 0;
+ /// cosmas20.json!!!
+ String json = getString(getClass().getResource("/queries/benchmark3.jsonld").getFile());
- int rounds = 500;
+ int rounds = 500;
+
+ KorapResult kr = new KorapResult();
- KorapResult kr = new KorapResult();
+ t1 = System.nanoTime();
+ for (int i = 1; i <= rounds; i++) {
+ kr = new KorapSearch(json).run(ki);
+ };
+ t2 = System.nanoTime();
- t1 = System.nanoTime();
- for (int i = 1; i <= rounds; i++) {
- kr = new KorapSearch(json).run(ki);
- };
- t2 = System.nanoTime();
+ assertEquals("TotalResults", kr.getTotalResults(), 70229);
- assertEquals("TotalResults", kr.getTotalResults(), 70229);
+ // System.err.println(kr.toJSON());
- // System.err.println(kr.toJSON());
-
- // long seconds = (long) (t2 - t1 / 1000) % 60 ;
- double seconds = (double)(t2-t1) / 1000000000.0;
+ // long seconds = (long) (t2 - t1 / 1000) % 60 ;
+ double seconds = (double)(t2-t1) / 1000000000.0;
- System.out.println("It took " + seconds + " seconds");
+ System.out.println("It took " + seconds + " seconds");
- // 500 times:
- // 71.715862716 seconds
+ // 500 times:
+ // 71.715862716 seconds
};
public static String getString (String path) {
- StringBuilder contentBuilder = new StringBuilder();
- try {
- BufferedReader in = new BufferedReader(new FileReader(path));
- String str;
- while ((str = in.readLine()) != null) {
- contentBuilder.append(str);
- };
- in.close();
- } catch (IOException e) {
- fail(e.getMessage());
- }
- return contentBuilder.toString();
- };
-
- public static SpanQueryWrapper jsonQuery (String jsonFile) {
- SpanQueryWrapper sqwi;
-
- try {
- String json = getString(jsonFile);
- sqwi = new KorapQuery("tokens").fromJson(json);
- }
- catch (QueryException e) {
- fail(e.getMessage());
- sqwi = new KorapQuery("tokens").seg("???");
- };
- return sqwi;
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
};
+ public static SpanQueryWrapper jsonQuery (String jsonFile) {
+ SpanQueryWrapper sqwi;
+
+ try {
+ String json = getString(jsonFile);
+ sqwi = new KorapQuery("tokens").fromJson(json);
+ }
+ catch (QueryException e) {
+ fail(e.getMessage());
+ sqwi = new KorapQuery("tokens").seg("???");
+ };
+ return sqwi;
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
index c9f6d56..342a90f 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
@@ -10,14 +10,13 @@
import de.ids_mannheim.korap.util.QueryException;
/**
- * @author margaretha
- *
+ * @author margaretha, diewald
*/
public class TestSpanSubspanQueryJSON {
@Test
public void testCase1() throws QueryException {
- String filepath = getClass().getResource("/queries/submatch.jsonld")
+ String filepath = getClass().getResource("/queries/submatch/1.jsonld")
.getFile();
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
@@ -27,20 +26,58 @@
@Test
public void testCase2() throws QueryException {
- String filepath = getClass().getResource("/queries/submatch2.jsonld")
- .getFile();
+ String filepath = getClass().getResource("/queries/submatch/2.jsonld")
+ .getFile();
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
assertEquals(sq.toString(), "subspan(<tokens:s />,1,4)");
}
+ @Test
public void testCase3() throws QueryException {
- String filepath = getClass().getResource("/queries/submatch3.jsonld")
+ String filepath = getClass().getResource("/queries/submatch/3.jsonld")
.getFile();
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
assertEquals(sq.toString(), "subspan(<tokens:s />,1,0)");
-
}
+ @Test
+ public void testCaseWrapped() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/wrapped.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "shrink(129: spanElementDistance({129: tokens:s:der},"+
+ " {129: subspan(<tokens:s />,0,1)}, [(s[0:0], ordered, notExcluded)]))");
+ }
+
+
+ @Test
+ public void testCaseEmbedded() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/embedded.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "spanNext({1: tokens:s:die},"+
+ " {1: subspan(spanExpansion(tokens:s:der, []{1, 100}, right),2,3)})");
+ }
+
+ @Test
+ public void testCaseEmbeddedNull() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/embedded-null.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "tokens:s:die");
+ }
+
+ @Test
+ public void testCaseEmbeddedValidEmpty() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch/embedded-valid-empty.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "??? (Known issue)");
+ }
}
diff --git a/src/test/resources/queries/submatch.jsonld b/src/test/resources/queries/submatch/1.jsonld
similarity index 100%
rename from src/test/resources/queries/submatch.jsonld
rename to src/test/resources/queries/submatch/1.jsonld
diff --git a/src/test/resources/queries/submatch2.jsonld b/src/test/resources/queries/submatch/2.jsonld
similarity index 100%
rename from src/test/resources/queries/submatch2.jsonld
rename to src/test/resources/queries/submatch/2.jsonld
diff --git a/src/test/resources/queries/submatch3.jsonld b/src/test/resources/queries/submatch/3.jsonld
similarity index 100%
rename from src/test/resources/queries/submatch3.jsonld
rename to src/test/resources/queries/submatch/3.jsonld
diff --git a/src/test/resources/queries/submatch/embedded-null.jsonld b/src/test/resources/queries/submatch/embedded-null.jsonld
new file mode 100644
index 0000000..4672473
--- /dev/null
+++ b/src/test/resources/queries/submatch/embedded-null.jsonld
@@ -0,0 +1,39 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:group",
+ "operation":"operation:sequence",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "die",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:reference",
+ "operands" : [
+ {
+ "@type" : "korap:token"
+ }
+ ],
+ "operation" : "operation:focus",
+ "spanRef" : [
+ 5,
+ 6
+ ]
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/submatch/embedded-valid-empty.jsonld b/src/test/resources/queries/submatch/embedded-valid-empty.jsonld
new file mode 100644
index 0000000..a6f22bd
--- /dev/null
+++ b/src/test/resources/queries/submatch/embedded-valid-empty.jsonld
@@ -0,0 +1,40 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:group",
+ "operation":"operation:sequence",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "die",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:reference",
+ "spanRef" : [2],
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operation" : "operation:repetition",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "max" : 5,
+ "min" : 0
+ },
+ "operands" : [
+ {
+ "@type" : "korap:token"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/submatch/embedded.jsonld b/src/test/resources/queries/submatch/embedded.jsonld
new file mode 100644
index 0000000..e09bef5
--- /dev/null
+++ b/src/test/resources/queries/submatch/embedded.jsonld
@@ -0,0 +1,71 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "die",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ },
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:reference",
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "der",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:group",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "min" : 1
+ },
+ "operands" : [
+ {
+ "@type" : "korap:token"
+ }
+ ],
+ "operation" : "operation:repetition"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+ ],
+ "operation" : "operation:focus",
+ "spanRef" : [
+ 2,
+ 3
+ ]
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/submatch/wrapped.jsonld b/src/test/resources/queries/submatch/wrapped.jsonld
new file mode 100644
index 0000000..8bc7b0c
--- /dev/null
+++ b/src/test/resources/queries/submatch/wrapped.jsonld
@@ -0,0 +1,73 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "collection":null,
+ "query" : {
+ "@type" : "korap:reference",
+ "classRef" : [
+ 129
+ ],
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "distances" : [
+ {
+ "@type" : "cosmas:distance",
+ "boundary" : {
+ "@type" : "korap:boundary",
+ "max" : 0,
+ "min" : 0
+ },
+ "key" : "s",
+ "max" : 0,
+ "min" : 0
+ }
+ ],
+ "inOrder" : true,
+ "operands" : [
+ {
+ "@type" : "korap:group",
+ "class" : 129,
+ "classOut" : 129,
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "der",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:class"
+ },
+ {
+ "@type" : "korap:group",
+ "class" : 129,
+ "classOut" : 129,
+ "operands" : [
+ {
+ "@type" : "korap:reference",
+ "operands" : [
+ {
+ "@type" : "korap:span",
+ "key" : "s"
+ }
+ ],
+ "operation" : "operation:focus",
+ "spanRef" : [
+ 0,
+ 1
+ ]
+ }
+ ],
+ "operation" : "operation:class"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+ ],
+ "operation" : "operation:focus"
+ }
+}