blob: 9b5be8b95f50f5d08322da091af2d4dc259dd99b [file] [log] [blame]
package de.ids_mannheim.korap.search;
import java.util.*;
import java.io.*;
import de.ids_mannheim.korap.KorapSearch;
import de.ids_mannheim.korap.KorapQuery;
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.KorapFilter;
import de.ids_mannheim.korap.KorapResult;
import java.nio.file.Files;
import java.nio.file.FileSystem;
import java.nio.file.Path;
import java.nio.charset.StandardCharsets;
import java.nio.ByteBuffer;
import static org.junit.Assert.*;
import org.junit.Test;
import org.junit.Ignore;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class TestKorapSearch {
@Test
public void searchCount () {
KorapSearch ks = new KorapSearch(
new KorapQuery("field1").seg("a").with("b")
);
// Count:
ks.setCount(30);
assertEquals(ks.getCount(), 30);
ks.setCount(20);
assertEquals(ks.getCount(), 20);
ks.setCount(-50);
assertEquals(ks.getCount(), 20);
ks.setCount(500);
assertEquals(ks.getCount(), ks.getCountMax());
};
@Test
public void searchStartIndex () {
KorapSearch ks = new KorapSearch(
new KorapQuery("field1").seg("a").with("b")
);
// startIndex
ks.setStartIndex(5);
assertEquals(ks.getStartIndex(), 5);
ks.setStartIndex(1);
assertEquals(ks.getStartIndex(), 1);
ks.setStartIndex(0);
assertEquals(ks.getStartIndex(), 0);
ks.setStartIndex(70);
assertEquals(ks.getStartIndex(), 70);
ks.setStartIndex(-5);
assertEquals(ks.getStartIndex(), 0);
};
@Test
public void searchQuery () {
KorapSearch ks = new KorapSearch(
new KorapQuery("field1").seg("a").with("b")
);
// query
assertEquals(ks.getQuery().toString(), "spanSegment(field1:a, field1:b)");
};
@Test
public void searchIndex () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
KorapSearch ks = new KorapSearch(
new KorapQuery("tokens").seg("s:Buchstaben")
);
ks.getCollection().filter(
new KorapFilter().and("textClass", "reisen")
);
ks.setCount(3);
ks.setStartIndex(5);
ks.leftContext.setLength(1);
ks.rightContext.setLength(1);
KorapResult kr = ks.run(ki);
assertEquals(6, kr.totalResults());
assertEquals(kr.getMatch(0).getSnippetBrackets(), "... dem [Buchstaben] A ...");
};
@Test
public void searchJSON () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
String json = getString(getClass().getResource("/queries/metaquery3.jsonld").getFile());
KorapSearch ks = new KorapSearch(json);
KorapResult kr = ks.run(ki);
assertEquals(66, kr.getTotalResults());
assertEquals(5, kr.getItemsPerPage());
assertEquals(5, kr.getStartIndex());
assertEquals("... a: A ist [der klangreichste] der V ...", kr.getMatch(0).getSnippetBrackets());
};
@Test
public void searchJSON2 () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02035-substring", "02439", "05663-unbalanced", "07452-deep"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
String json = getString(getClass().getResource("/queries/metaquery4.jsonld").getFile());
KorapSearch ks = new KorapSearch(json);
KorapResult kr = ks.run(ki);
assertEquals(2, kr.getTotalResults());
json = getString(getClass().getResource("/queries/metaquery5.jsonld").getFile());
ks = new KorapSearch(json);
kr = ks.run(ki);
assertEquals(2, kr.getTotalResults());
json = getString(getClass().getResource("/queries/metaquery6.jsonld").getFile());
ks = new KorapSearch(json);
kr = ks.run(ki);
assertEquals(1, kr.getTotalResults());
};
@Test
public void searchJSONFailure () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
KorapResult kr = new KorapSearch("{ query").run(ki);
assertEquals(0, kr.getTotalResults());
assertNotNull(kr.getError());
};
@Test
public void searchJSONindexboundary () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
String json = getString(getClass().getResource("/queries/bsp-fail1.jsonld").getFile());
KorapResult kr = new KorapSearch(json).run(ki);
assertEquals(0, kr.getStartIndex());
assertEquals(0, kr.getTotalResults());
assertEquals(25, kr.getItemsPerPage());
};
@Test
public void searchJSONindexboundary2 () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
String json = getString(getClass().getResource("/queries/bsp-fail2.jsonld").getFile());
KorapResult kr = new KorapSearch(json).run(ki);
assertEquals(50, kr.getItemsPerPage());
assertEquals(49950, kr.getStartIndex());
assertEquals(0, kr.getTotalResults());
};
@Test
public void searchJSONcontext () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
String json = getString(getClass().getResource("/queries/bsp-context.jsonld").getFile());
KorapSearch ks = new KorapSearch(json);
KorapResult kr = ks.run(ki);
assertEquals(10, kr.getTotalResults());
assertEquals("A bzw. a ist der erste Buchstabe des lateinischen [Alphabets] und ein Vokal. Der Buchstabe A hat in deutschen Texten eine durchschnittliche Häufigkeit ...", kr.getMatch(0).getSnippetBrackets());
ks.setCount(5);
ks.setStartPage(2);
kr = ks.run(ki);
assertEquals(10, kr.getTotalResults());
assertEquals(5, kr.getStartIndex());
assertEquals(5, kr.getItemsPerPage());
json = getString(getClass().getResource("/queries/bsp-context-2.jsonld").getFile());
kr = new KorapSearch(json).run(ki);
assertEquals(-1, kr.getTotalResults());
assertEquals("... lls seit den Griechen beibehalten worden. 3. Bedeutungen in der Biologie steht A für das Nukleosid Adenosin steht A die Base Adenin steht A für die Aminosäure Alanin in der Informatik steht a für den dezimalen [Wert] 97 sowohl im ASCII- als auch im Unicode-Zeichensatz steht A für den dezimalen Wert 65 sowohl im ASCII- als auch im Unicode-Zeichensatz als Kfz-Kennzeichen steht A in Deutschland für Augsburg. in Österreich auf ...", kr.getMatch(0).getSnippetBrackets());
};
@Test
public void searchJSONstartPage () throws IOException {
// Construct index
KorapIndex ki = new KorapIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
String json = getString(getClass().getResource("/queries/bsp-paging.jsonld").getFile());
KorapSearch ks = new KorapSearch(json);
KorapResult kr = ks.run(ki);
assertEquals(10, kr.getTotalResults());
assertEquals(5, kr.getStartIndex());
assertEquals(5, kr.getItemsPerPage());
json = getString(getClass().getResource("/queries/bsp-cutoff.jsonld").getFile());
ks = ks = new KorapSearch(json);
kr = ks.run(ki);
assertEquals(-1, kr.getTotalResults());
assertEquals(2, kr.getStartIndex());
assertEquals(2, kr.getItemsPerPage());
};
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
try {
BufferedReader in = new BufferedReader(new FileReader(path));
String str;
while ((str = in.readLine()) != null) {
contentBuilder.append(str);
};
in.close();
} catch (IOException e) {
fail(e.getMessage());
}
return contentBuilder.toString();
};
};