blob: dbad545f37d37f6537b57e715bd519e4f163d587 [file] [log] [blame]
package de.ids_mannheim.korap.index;
import java.util.*;
import java.io.*;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Bits;
import static org.junit.Assert.*;
import org.junit.Test;
import org.junit.Ignore;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.KorapQuery;
import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.SpanMatchModifyClassQuery;
import de.ids_mannheim.korap.query.SpanClassQuery;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.analysis.MultiTermTokenStream;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.index.Term;
// mvn -Dtest=TestWithinIndex#indexExample1 test
// match is shrink and split
@RunWith(JUnit4.class)
public class TestMatchIndex {
@Test
public void indexExample1 () throws IOException {
KorapIndex ki = new KorapIndex();
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base",
"abcabcabac",
"[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
"[(1-2)s:b|i:b|_1#1-2]" +
"[(2-3)s:c|i:c|_2#2-3]" +
"[(3-4)s:a|i:a|_3#3-4]" +
"[(4-5)s:b|i:b|_4#4-5]" +
"[(5-6)s:c|i:c|_5#5-6]" +
"[(6-7)s:a|i:a|_6#6-7]" +
"[(7-8)s:b|i:b|_7#7-8]" +
"[(8-9)s:a|i:a|_8#8-9]" +
"[(9-10)s:c|i:c|_9#9-10]");
ki.addDoc(fd);
ki.commit();
SpanQuery sq;
KorapResult kr;
sq = new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:b")),
new SpanClassQuery(
new SpanTermQuery(new Term("base", "s:a"))
)
);
kr = ki.search(sq, (short) 10);
assertEquals("totalResults", 1, kr.totalResults());
assertEquals("StartPos (0)", 7, kr.match(0).startPos);
assertEquals("EndPos (0)", 9, kr.match(0).endPos);
assertEquals("SnippetBrackets (0)", "... bcabca[b{a}]c", kr.match(0).snippetBrackets());
assertEquals("Test no 'more' context", "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\">b<em class=\"class-0 level-0\">a</em></span><span class=\"context-right\">c</span>", kr.match(0).snippetHTML());
sq = new SpanMatchModifyClassQuery(
new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:b")),
new SpanClassQuery(
new SpanTermQuery(new Term("base", "s:a"))
)
)
);
kr = ki.search(sq, (short) 10);
assertEquals("totalResults", 1, kr.totalResults());
assertEquals("StartPos (0)", 8, kr.match(0).startPos);
assertEquals("EndPos (0)", 9, kr.match(0).endPos);
assertEquals("SnippetBrackets (0)", "... cabcab[a]c", kr.match(0).snippetBrackets());
sq = new SpanMatchModifyClassQuery(
new SpanNextQuery(
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 2),
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)
), (byte) 3
);
kr = ki.search(sq, (short) 10);
assertEquals("totalResults", 3, kr.totalResults());
assertEquals("StartPos (0)", 1, kr.match(0).startPos);
assertEquals("EndPos (0)", 2, kr.match(0).endPos);
assertEquals("SnippetBrackets (0)", "a[b]cabcab ...", kr.match(0).snippetBrackets());
assertEquals("<span class=\"context-left\">a</span><span class=\"match\">b</span><span class=\"context-right\">cabcab<span class=\"more\"></span></span>", kr.match(0).snippetHTML());
assertEquals("StartPos (1)", 4, kr.match(1).startPos);
assertEquals("EndPos (1)", 5, kr.match(1).endPos);
assertEquals("SnippetBrackets (1)", "abca[b]cabac", kr.match(1).snippetBrackets());
assertEquals("<span class=\"context-left\">abca</span><span class=\"match\">b</span><span class=\"context-right\">cabac</span>", kr.match(1).snippetHTML());
assertEquals("StartPos (2)", 7, kr.match(2).startPos);
assertEquals("EndPos (2)", 8, kr.match(2).endPos);
assertEquals("SnippetBrackets (2)", "... bcabca[b]ac", kr.match(2).snippetBrackets());
// abcabcabac
sq = new SpanMatchModifyClassQuery(
new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:a")),
new SpanClassQuery(
new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:b")),
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")))
), (byte) 2
)), (byte) 2);
kr = ki.search(sq, (short) 10);
// System.err.println(kr.toJSON());
assertEquals("totalResults", 1, kr.totalResults());
assertEquals("SnippetBrackets (0)", "... bcabca[b{a}]c", kr.match(0).snippetBrackets());
assertEquals("SnippetHTML (0) 1", "<span class=\"context-left\"><span class=\"more\"></span>bcabca</span><span class=\"match\">b<em class=\"class-0 level-0\">a</em></span><span class=\"context-right\">c</span>", kr.match(0).snippetHTML());
// Offset tokens
kr = ki.search(sq, 0, (short) 10, true, (short) 2, true, (short) 2);
assertEquals("totalResults", 1, kr.totalResults());
assertEquals("SnippetBrackets (0)", "... ca[b{a}]c", kr.match(0).snippetBrackets());
// Offset Characters
kr = ki.search(sq, 0, (short) 10, false, (short) 1, false, (short) 0);
assertEquals("totalResults", 1, kr.totalResults());
assertEquals("SnippetBrackets (0)", "... a[b{a}] ...", kr.match(0).snippetBrackets());
assertEquals("SnippetHTML (0) 2", "<span class=\"context-left\"><span class=\"more\"></span>a</span><span class=\"match\">b<em class=\"class-0 level-0\">a</em></span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(0).snippetHTML());
// System.err.println(kr.toJSON());
sq = new SpanMatchModifyClassQuery(
new SpanNextQuery(
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 1),
new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")), (byte) 2)
), (byte) 3
);
kr = ki.search(sq, (short) 10);
assertEquals("totalResults", 2, kr.totalResults());
assertEquals("StartPos (0)", 1, kr.match(0).startPos);
assertEquals("EndPos (0)", 3, kr.match(0).endPos);
assertEquals("SnippetBrackets (0)", "a[{1:b}{2:c}]abcaba ...", kr.match(0).snippetBrackets());
assertEquals("StartPos (1)", 4, kr.match(1).startPos);
assertEquals("EndPos (1)", 6, kr.match(1).endPos);
assertEquals("SnippetBrackets (1)", "abca[{1:b}{2:c}]abac", kr.match(1).snippetBrackets());
assertEquals("Document count", 1, ki.numberOf("base", "documents"));
assertEquals("Token count", 10, ki.numberOf("base", "t"));
sq = new SpanMatchModifyClassQuery(
new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:a")),
new SpanClassQuery(
new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:b")),
new SpanTermQuery(new Term("base", "s:c"))
)
)
)
);
kr = ki.search(sq, (short) 2);
assertEquals("totalResults", 2, kr.totalResults());
assertEquals("StartPos (0)", 1, kr.match(0).startPos);
assertEquals("EndPos (0)", 3, kr.match(0).endPos);
assertEquals("SnippetBrackets (0)", "a[bc]abcaba ...", kr.match(0).snippetBrackets());
assertEquals("StartPos (1)", 4, kr.match(1).startPos);
assertEquals("EndPos (1)", 6, kr.match(1).endPos);
assertEquals("SnippetBrackets (1)", "abca[bc]abac", kr.match(1).snippetBrackets());
assertEquals(1, ki.numberOf("base", "documents"));
assertEquals(10, ki.numberOf("base", "t"));
};
@Test
public void indexExample2 () throws IOException {
KorapIndex ki = new KorapIndex();
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base",
"abcabcabac",
"[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
"[(1-2)s:b|i:b|_1#1-2]" +
"[(2-3)s:c|i:c|_2#2-3]" +
"[(3-4)s:a|i:a|_3#3-4]" +
"[(4-5)s:b|i:b|_4#4-5]" +
"[(5-6)s:c|i:c|_5#5-6]" +
"[(6-7)s:a|i:a|_6#6-7]" +
"[(7-8)s:b|i:b|_7#7-8]" +
"[(8-9)s:a|i:a|_8#8-9]" +
"[(9-10)s:c|i:c|_9#9-10]");
ki.addDoc(fd);
ki.commit();
SpanQuery sq;
KorapResult kr;
// No contexts:
sq = new SpanOrQuery(
new SpanTermQuery(new Term("base", "s:a")),
new SpanTermQuery(new Term("base", "s:c"))
);
kr = ki.search(sq, (short) 20);
assertEquals("totalResults", 7, kr.totalResults());
assertEquals("SnippetBrackets (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\">bcabca<span class=\"more\"></span></span>", kr.match(0).snippetHTML());
assertEquals("SnippetBrackets (0)", "[a]bcabca ...", kr.match(0).snippetBrackets());
assertEquals("SnippetBrackets (1)", "ab[c]abcaba ...", kr.match(1).snippetBrackets());
assertEquals("SnippetBrackets (1)", "<span class=\"context-left\">ab</span><span class=\"match\">c</span><span class=\"context-right\">abcaba<span class=\"more\"></span></span>", kr.match(1).snippetHTML());
assertEquals("SnippetBrackets (6)", "... abcaba[c]", kr.match(6).snippetBrackets());
assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span>abcaba</span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.match(6).snippetHTML());
kr = ki.search(sq, 0, (short) 20, true, (short) 0, true, (short) 0);
assertEquals("totalResults", 7, kr.totalResults());
assertEquals("SnippetBrackets (0)", "[a] ...", kr.match(0).snippetBrackets());
assertEquals("SnippetHTML (0)", "<span class=\"context-left\"></span><span class=\"match\">a</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(0).snippetHTML());
assertEquals("SnippetBrackets (1)", "... [c] ...", kr.match(1).snippetBrackets());
assertEquals("SnippetHTML (1)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"><span class=\"more\"></span></span>", kr.match(1).snippetHTML());
assertEquals("SnippetBrackets (6)", "... [c]", kr.match(6).snippetBrackets());
assertEquals("SnippetBrackets (6)", "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\">c</span><span class=\"context-right\"></span>", kr.match(6).snippetHTML());
};
@Test
public void indexExample3 () throws IOException {
KorapIndex ki = new KorapIndex();
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base",
"abcabcabac",
"[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" +
"[(1-2)s:b|i:b|_1#1-2]" +
"[(2-3)s:c|i:c|_2#2-3]" +
"[(3-4)s:a|i:a|_3#3-4]" +
"[(4-5)s:b|i:b|_4#4-5]" +
"[(5-6)s:c|i:c|_5#5-6]" +
"[(6-7)s:a|i:a|_6#6-7]" +
"[(7-8)s:b|i:b|_7#7-8]" +
"[(8-9)s:a|i:a|_8#8-9]" +
"[(9-10)s:c|i:c|_9#9-10]");
ki.addDoc(fd);
ki.commit();
KorapResult kr;
KorapQuery kq = new KorapQuery("base");
SpanQuery sq = kq._(1,kq.seq(kq.seg("s:b")).append(kq.seg("s:a")).append(kq._(2,kq.seg("s:c")))).toQuery();
kr = ki.search(sq, 0, (short) 20, true, (short) 2, true, (short) 5);
assertEquals("totalResults", 1, kr.totalResults());
assertEquals("SnippetBrackets (0)", "... ca[{1:ba{2:c}}]", kr.match(0).snippetBrackets());
// System.err.println(kr.toJSON());
};
};