| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.benchmark; |
| 2 | |
| 3 | import java.util.*; |
| 4 | import java.io.*; |
| 5 | |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 6 | import de.ids_mannheim.korap.KrillIndex; |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 7 | import de.ids_mannheim.korap.index.FieldDocument; |
| Nils Diewald | 2d5f810 | 2015-02-26 21:07:54 +0000 | [diff] [blame] | 8 | import de.ids_mannheim.korap.KrillCollection; |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 9 | import de.ids_mannheim.korap.Krill; |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 10 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | 0339d46 | 2015-02-26 14:53:56 +0000 | [diff] [blame] | 11 | import de.ids_mannheim.korap.KrillQuery; |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 12 | import de.ids_mannheim.korap.query.QueryBuilder; |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 13 | import org.apache.lucene.store.MMapDirectory; |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 14 | import org.apache.lucene.search.spans.SpanQuery; |
| Nils Diewald | 92729ce | 2014-10-06 16:00:17 +0000 | [diff] [blame] | 15 | import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper; |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.util.QueryException; |
| 17 | |
| 18 | import static org.junit.Assert.*; |
| 19 | import org.junit.Test; |
| 20 | import org.junit.Ignore; |
| 21 | import org.junit.runner.RunWith; |
| 22 | import org.junit.runners.JUnit4; |
| 23 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 24 | import java.nio.file.Paths; |
| 25 | |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 26 | @RunWith(JUnit4.class) |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 27 | public class TestBenchmarkSpans { |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 28 | |
| 29 | @Test |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 30 | public void checkBenchmark1 () throws IOException { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 31 | Properties prop = new Properties(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 32 | InputStream fr = new FileInputStream( |
| 33 | getClass().getResource("/krill.properties").getFile()); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 34 | prop.load(fr); |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 35 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 36 | // Get the real index |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 37 | KrillIndex ki = new KrillIndex(new MMapDirectory( |
| 38 | Paths.get(prop.getProperty("krill.indexDir")))); |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 39 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 40 | // Create a container for virtual collections: |
| Nils Diewald | 2d5f810 | 2015-02-26 21:07:54 +0000 | [diff] [blame] | 41 | KrillCollection kc = new KrillCollection(ki); |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 42 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 43 | long t1 = 0, t2 = 0; |
| 44 | /// cosmas20.json!!! |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 45 | String json = getString( |
| 46 | getClass().getResource("/queries/benchmark1.jsonld").getFile()); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 47 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 48 | int rounds = 100; |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 49 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 50 | Result kr = new Result(); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 51 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 52 | t1 = System.nanoTime(); |
| 53 | for (int i = 1; i <= rounds; i++) { |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 54 | kr = new Krill(json).apply(ki); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 55 | }; |
| 56 | t2 = System.nanoTime(); |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 57 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 58 | // assertEquals("TotalResults", 30751, kr.getTotalResults()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 59 | assertEquals("TotalResults", kr.getTotalResults(), 4803739); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 60 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 61 | // long seconds = (long) (t2 - t1 / 1000) % 60 ; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 62 | double seconds = (double) (t2 - t1) / 1000000000.0; |
| 63 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 64 | // System.out.println("It took " + seconds + " seconds"); |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 65 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 66 | // 100 times: |
| 67 | // 43,538 sec |
| 68 | // 4.874 |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 69 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 70 | // 1000 times: |
| 71 | // 36.613 sec |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 72 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 73 | // After refactoring |
| 74 | // 100 times |
| 75 | // 273.58114372 seconds |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 76 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 77 | // After intro of attributes |
| 78 | // 100 times |
| 79 | // 350.171506379 seconds |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 80 | }; |
| 81 | |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 82 | |
| 83 | @Test |
| 84 | public void checkBenchmark2JSON () throws IOException { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 85 | Properties prop = new Properties(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 86 | InputStream fr = new FileInputStream( |
| 87 | getClass().getResource("/krill.properties").getFile()); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 88 | prop.load(fr); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 89 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 90 | // Get the real index |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 91 | KrillIndex ki = new KrillIndex(new MMapDirectory( |
| 92 | Paths.get(prop.getProperty("krill.indexDir")))); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 93 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 94 | // Create a container for virtual collections: |
| Nils Diewald | 2d5f810 | 2015-02-26 21:07:54 +0000 | [diff] [blame] | 95 | KrillCollection kc = new KrillCollection(ki); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 96 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 97 | long t1 = 0, t2 = 0; |
| 98 | /// cosmas20.json!!! |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 99 | String json = getString( |
| 100 | getClass().getResource("/queries/benchmark2.jsonld").getFile()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 101 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 102 | int rounds = 10000; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 103 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 104 | Result kr = new Result(); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 105 | String result = new String(""); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 106 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 107 | t1 = System.nanoTime(); |
| 108 | double length = 0; |
| 109 | for (int i = 1; i <= rounds; i++) { |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 110 | kr = new Krill(json).apply(ki); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 111 | length += kr.toJsonString().length(); |
| 112 | }; |
| 113 | t2 = System.nanoTime(); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 114 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 115 | // assertEquals("TotalResults", 30751, kr.getTotalResults()); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 116 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 117 | // System.err.println(kr.toJSON()); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 118 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 119 | // long seconds = (long) (t2 - t1 / 1000) % 60 ; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 120 | double seconds = (double) (t2 - t1) / 1000000000.0; |
| 121 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 122 | // System.out.println("It took " + seconds + " seconds"); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 123 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 124 | // 10000 times: |
| 125 | // 77.167124985 sec |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 126 | }; |
| 127 | |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 128 | |
| 129 | @Test |
| 130 | public void checkBenchmarkSentences () throws IOException { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 131 | Properties prop = new Properties(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 132 | InputStream fr = new FileInputStream( |
| 133 | getClass().getResource("/krill.properties").getFile()); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 134 | prop.load(fr); |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 135 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 136 | // Get the real index |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 137 | KrillIndex ki = new KrillIndex(new MMapDirectory( |
| 138 | Paths.get(prop.getProperty("krill.indexDir")))); |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 139 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 140 | // Create a container for virtual collections: |
| Nils Diewald | 2d5f810 | 2015-02-26 21:07:54 +0000 | [diff] [blame] | 141 | KrillCollection kc = new KrillCollection(ki); |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 142 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 143 | long t1 = 0, t2 = 0; |
| 144 | /// cosmas20.json!!! |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 145 | String json = getString( |
| 146 | getClass().getResource("/queries/benchmark4.jsonld").getFile()); |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 147 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 148 | int rounds = 10; |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 149 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 150 | Result kr = new Result(); |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 151 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 152 | t1 = System.nanoTime(); |
| 153 | double length = 0; |
| 154 | for (int i = 1; i <= rounds; i++) { |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 155 | kr = new Krill(json).apply(ki); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 156 | }; |
| 157 | t2 = System.nanoTime(); |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 158 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 159 | // System.err.println(kr.getMatch(0).toJSON()); |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 160 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 161 | assertEquals("TotalResults1", kr.getTotalResults(), 4116282); |
| 162 | assertEquals("TotalResults2", kr.getTotalResults(), |
| 163 | ki.numberOf("sentences")); |
| 164 | |
| 165 | double seconds = (double) (t2 - t1) / 1000000000.0; |
| 166 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 167 | // System.out.println("It took " + seconds + " seconds"); |
| 168 | // 100 rounds |
| 169 | // 56.253 secs |
| Nils Diewald | 8c54343 | 2014-02-27 18:25:38 +0000 | [diff] [blame] | 170 | }; |
| 171 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 173 | @Test |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 174 | public void checkBenchmarkClasses () throws IOException { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 175 | // [orth=Der]{1:[orth=Mann]{2:[orth=und]}} |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 176 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 177 | Properties prop = new Properties(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 178 | InputStream fr = new FileInputStream( |
| 179 | getClass().getResource("/krill.properties").getFile()); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 180 | prop.load(fr); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 181 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 182 | // Get the real index |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 183 | KrillIndex ki = new KrillIndex(new MMapDirectory( |
| 184 | Paths.get(prop.getProperty("krill.indexDir")))); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 185 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 186 | // Create a container for virtual collections: |
| Nils Diewald | 2d5f810 | 2015-02-26 21:07:54 +0000 | [diff] [blame] | 187 | KrillCollection kc = new KrillCollection(ki); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 188 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 189 | long t1 = 0, t2 = 0; |
| 190 | // Without classes |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 191 | String json = getString(getClass() |
| 192 | .getResource("/queries/benchmark5-ohne.jsonld").getFile()); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 193 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 194 | int rounds = 2000; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 195 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 196 | Result kr = new Result(); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 197 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 198 | t1 = System.nanoTime(); |
| 199 | for (int i = 1; i <= rounds; i++) { |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 200 | kr = new Krill(json).apply(ki); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 201 | }; |
| 202 | t2 = System.nanoTime(); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 203 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 204 | double seconds = (double) (t2 - t1) / 1000000000.0; |
| 205 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 206 | // System.out.println("It took " + seconds + " seconds without classes"); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 207 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 208 | t1 = 0; |
| 209 | t2 = 0; |
| 210 | // With classes |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 211 | json = getString( |
| 212 | getClass().getResource("/queries/benchmark5.jsonld").getFile()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 213 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 214 | t1 = System.nanoTime(); |
| 215 | for (int i = 1; i <= rounds; i++) { |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 216 | kr = new Krill(json).apply(ki); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 217 | }; |
| 218 | t2 = System.nanoTime(); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 219 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 220 | seconds = (double) (t2 - t1) / 1000000000.0; |
| 221 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 222 | // System.out.println("It took " + seconds + " seconds with classes"); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 223 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 224 | t1 = 0; |
| 225 | t2 = 0; |
| 226 | // With submatch |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 227 | json = getString(getClass() |
| 228 | .getResource("/queries/benchmark5-submatch.jsonld").getFile()); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 229 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 230 | t1 = System.nanoTime(); |
| 231 | for (int i = 1; i <= rounds; i++) { |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 232 | kr = new Krill(json).apply(ki); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 233 | }; |
| 234 | t2 = System.nanoTime(); |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 235 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 236 | seconds = (double) (t2 - t1) / 1000000000.0; |
| 237 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 238 | // System.out.println("It took " + seconds + " seconds with submatches"); |
| Nils Diewald | 34eaa86 | 2014-06-03 10:56:27 +0000 | [diff] [blame] | 239 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 240 | /** HERE IS A BUG! */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 241 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 242 | // System.err.println(kr.toJsonString()); |
| Nils Diewald | 34eaa86 | 2014-06-03 10:56:27 +0000 | [diff] [blame] | 243 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 244 | // System.err.println(kr.toJSON()); |
| Nils Diewald | 34eaa86 | 2014-06-03 10:56:27 +0000 | [diff] [blame] | 245 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 246 | // System.err.println(kr.getMatch(3).getSnippetBrackets()); |
| Nils Diewald | 34eaa86 | 2014-06-03 10:56:27 +0000 | [diff] [blame] | 247 | |
| 248 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 249 | // 2000 rounds: |
| 250 | // It took 10.872934435 seconds without classes |
| 251 | // It took 22.581117396 seconds with classes |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 252 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 253 | // It took 10.703933598 seconds without classes |
| 254 | // It took 19.354674517 seconds with classes |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 255 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 256 | // It took 10.939948726 seconds without classes |
| 257 | // It took 16.998470662 seconds with classes |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 258 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 259 | // It took 10.900975837 seconds without classes |
| 260 | // It took 14.902590949 seconds with classes |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 261 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 262 | // It took 10.365989238 seconds without classes |
| 263 | // It took 13.833405885 seconds with classes |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 264 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 265 | // It took 15.368675425 seconds without classes |
| 266 | // It took 18.347603186 seconds with classes |
| 267 | // It took 15.941057294 seconds with submatches |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 268 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 269 | // It took 15.241253549 seconds without classes |
| 270 | // It took 17.30375624 seconds with classes |
| 271 | // It took 15.367171254 seconds with submatches |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 272 | }; |
| 273 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 274 | |
| Nils Diewald | c025a23 | 2014-02-28 19:01:14 +0000 | [diff] [blame] | 275 | @Test |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 276 | public void checkBenchmarkIndexDocuments () throws IOException { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 277 | long t1 = 0, t2 = 0; |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 278 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 279 | int rounds = 10; |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 280 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 281 | ArrayList<String> docs = new ArrayList<String>(700); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 282 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 283 | for (int a = 0; a < 50; a++) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 284 | for (String d : new String[] { "00001", "00002", "00003", "00004", |
| 285 | "00005", "00006", "02439" }) { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 286 | docs.add(d); |
| 287 | }; |
| 288 | }; |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 289 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 290 | t1 = System.nanoTime(); |
| 291 | double length = 0; |
| 292 | for (int i = 1; i <= rounds; i++) { |
| 293 | // Construct index |
| Nils Diewald | a14ecd6 | 2015-02-26 21:00:20 +0000 | [diff] [blame] | 294 | KrillIndex ki = new KrillIndex(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 295 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 296 | // Indexing test files |
| 297 | for (String d : docs) { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 298 | FieldDocument fd = ki.addDoc(getClass() |
| 299 | .getResourceAsStream("/wiki/" + d + ".json.gz"), true); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 300 | }; |
| 301 | ki.commit(); |
| 302 | }; |
| 303 | t2 = System.nanoTime(); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 304 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 305 | double seconds = (double) (t2 - t1) / 1000000000.0; |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 306 | // System.out.println("It took " + seconds + " seconds"); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 307 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 308 | // 10 times / 350 docs: |
| 309 | // 36.26158006 seconds |
| 310 | // 32.52575097 seconds |
| 311 | // 31.818091536 seconds |
| 312 | // 32.055321123 seconds |
| 313 | // 32.32125959 seconds |
| 314 | // 31.726277979 seconds |
| 315 | // 31.65826188 seconds |
| 316 | // 31.287057537 seconds |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 317 | }; |
| 318 | |
| 319 | |
| 320 | @Test |
| 321 | public void checkBenchmark3 () throws IOException { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 322 | Properties prop = new Properties(); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 323 | InputStream fr = new FileInputStream( |
| 324 | getClass().getResource("/krill.properties").getFile()); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 325 | prop.load(fr); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 326 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 327 | // Get the real index |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 328 | KrillIndex ki = new KrillIndex(new MMapDirectory( |
| 329 | Paths.get(prop.getProperty("krill.indexDir")))); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 330 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 331 | // Create a container for virtual collections: |
| Nils Diewald | 2d5f810 | 2015-02-26 21:07:54 +0000 | [diff] [blame] | 332 | KrillCollection kc = new KrillCollection(ki); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 333 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 334 | long t1 = 0, t2 = 0; |
| 335 | /// cosmas20.json!!! |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 336 | String json = getString( |
| 337 | getClass().getResource("/queries/benchmark3.jsonld").getFile()); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 338 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 339 | int rounds = 500; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 340 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 341 | Result kr = new Result(); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 342 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 343 | t1 = System.nanoTime(); |
| 344 | for (int i = 1; i <= rounds; i++) { |
| Nils Diewald | bbd39a5 | 2015-02-23 19:56:57 +0000 | [diff] [blame] | 345 | kr = new Krill(json).apply(ki); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 346 | }; |
| 347 | t2 = System.nanoTime(); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 348 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 349 | assertEquals("TotalResults", kr.getTotalResults(), 70229); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 350 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 351 | // System.err.println(kr.toJSON()); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 352 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 353 | // long seconds = (long) (t2 - t1 / 1000) % 60 ; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 354 | double seconds = (double) (t2 - t1) / 1000000000.0; |
| 355 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 356 | System.out.println("It took " + seconds + " seconds"); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 357 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 358 | // 500 times: |
| 359 | // 71.715862716 seconds |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 360 | }; |
| 361 | |
| 362 | |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 363 | public static String getString (String path) { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 364 | StringBuilder contentBuilder = new StringBuilder(); |
| 365 | try { |
| 366 | BufferedReader in = new BufferedReader(new FileReader(path)); |
| 367 | String str; |
| 368 | while ((str = in.readLine()) != null) { |
| 369 | contentBuilder.append(str); |
| 370 | }; |
| 371 | in.close(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 372 | } |
| 373 | catch (IOException e) { |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 374 | fail(e.getMessage()); |
| 375 | } |
| 376 | return contentBuilder.toString(); |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 377 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 378 | |
| 379 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 380 | public static SpanQueryWrapper jsonQuery (String jsonFile) { |
| 381 | SpanQueryWrapper sqwi; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 382 | |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 383 | try { |
| 384 | String json = getString(jsonFile); |
| Akron | 850b46e | 2016-06-08 10:08:55 +0200 | [diff] [blame] | 385 | sqwi = new KrillQuery("tokens").fromKoral(json); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 386 | } |
| 387 | catch (QueryException e) { |
| 388 | fail(e.getMessage()); |
| Nils Diewald | 8904c1d | 2015-02-26 16:13:18 +0000 | [diff] [blame] | 389 | sqwi = new QueryBuilder("tokens").seg("???"); |
| Nils Diewald | 6409a92 | 2015-01-29 20:50:42 +0000 | [diff] [blame] | 390 | }; |
| 391 | return sqwi; |
| 392 | }; |
| Nils Diewald | efb9c9a | 2014-02-20 15:05:18 +0000 | [diff] [blame] | 393 | }; |