| package de.ids_mannheim.korap.index; |
| |
| import static org.junit.Assert.assertEquals; |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.spans.SpanQuery; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.JUnit4; |
| |
| import de.ids_mannheim.korap.KrillIndex; |
| import de.ids_mannheim.korap.query.DistanceConstraint; |
| import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| import de.ids_mannheim.korap.query.SpanElementQuery; |
| import de.ids_mannheim.korap.query.SpanSegmentQuery; |
| import de.ids_mannheim.korap.response.Result; |
| |
| @RunWith(JUnit4.class) |
| public class TestDistanceIndex { |
| Result kr; |
| KrillIndex ki; |
| |
| |
| private FieldDocument createFieldDoc0 () { |
| FieldDocument fd = new FieldDocument(); |
| fd.addString("ID", "doc-0"); |
| fd.addTV("base", "text", |
| "[(0-1)s:b|s:c|_1$<i>0<i>1]" + "[(1-2)s:b|_2$<i>1<i>2]" |
| + "[(2-3)s:c|_3$<i>2<i>3]" + "[(3-4)s:c|_4$<i>3<i>4]" |
| + "[(4-5)s:d|_5$<i>4<i>5]" + "[(5-6)s:d|_6$<i>5<i>6]"); |
| return fd; |
| } |
| |
| |
| private FieldDocument createFieldDoc1 () { |
| FieldDocument fd = new FieldDocument(); |
| fd.addString("ID", "doc-1"); |
| fd.addTV("base", "text", |
| "[(0-1)s:c|_1$<i>0<i>1]" + "[(1-2)s:e|_2$<i>1<i>2]" |
| + "[(2-3)s:c|_3$<i>2<i>3|<>:y$<b>64<i>2<i>4<i>4<b>0]" |
| + "[(3-4)s:c|_4$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]" |
| + "[(4-5)s:d|_5$<i>4<i>5|<>:y$<b>64<i>4<i>6<i>6<b>0]" |
| + "[(5-6)s:c|_6$<i>5<i>6|<>:y$<b>64<i>5<i>8<i>8<b>0]" |
| + "[(6-7)s:d|_7$<i>6<i>7]" |
| + "[(7-8)s:e|_8$<i>7<i>8|<>:x$<b>64<i>7<i>9<i>9<b>0]" |
| + "[(8-9)s:e|_9$<i>8<i>9|<>:x$<b>64<i>8<i>10<i>10<b>0]" |
| + "[(9-10)s:d|_10$<i>9<i>10]"); |
| return fd; |
| } |
| |
| |
| private FieldDocument createFieldDoc2 () { |
| FieldDocument fd = new FieldDocument(); |
| fd.addString("ID", "doc-2"); |
| fd.addTV("base", "text", |
| "[(0-1)s:b|_1$<i>0<i>1]" + "[(1-2)s:b|_2$<i>1<i>2]" |
| + "[(2-3)s:d|_3$<i>2<i>3]" + "[(3-4)s:e|_4$<i>3<i>4]" |
| + "[(4-5)s:d|_5$<i>4<i>5]" + "[(5-6)s:e|_6$<i>5<i>6]"); |
| return fd; |
| } |
| |
| |
| private SpanQuery createQuery (String x, String y, int min, int max, |
| boolean isOrdered) { |
| SpanQuery sq = new SpanDistanceQuery( |
| new SpanTermQuery(new Term("base", x)), |
| new SpanTermQuery(new Term("base", y)), |
| new DistanceConstraint(min, max, isOrdered, false), true); |
| return sq; |
| } |
| |
| |
| private SpanQuery createElementQuery (String x, String y, int min, int max, |
| boolean isOrdered) { |
| SpanQuery sq = new SpanDistanceQuery(new SpanElementQuery("base", x), |
| new SpanElementQuery("base", y), |
| new DistanceConstraint(min, max, isOrdered, false), true); |
| return sq; |
| } |
| |
| |
| /** |
| * - Intersection |
| * - Multiple occurrences in the same doc |
| * - hasMoreFirstSpans = false for the current secondspan |
| */ |
| @Test |
| public void testCase1 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.commit(); |
| SpanQuery sq; |
| // ---- Distance 0 to 1 |
| sq = createQuery("s:b", "s:c", 0, 1, true); |
| kr = ki.search(sq, (short) 10); |
| // System.out.println(sq); |
| assertEquals(kr.getTotalResults(), 2); |
| assertEquals(0, kr.getMatch(0).startPos); |
| assertEquals(1, kr.getMatch(0).endPos); |
| assertEquals(1, kr.getMatch(1).startPos); |
| assertEquals(3, kr.getMatch(1).endPos); |
| |
| // ---- Distance 2 to 2 |
| sq = createQuery("s:b", "s:c", 2, 2, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 2); |
| assertEquals(0, kr.getMatch(0).startPos); |
| assertEquals(3, kr.getMatch(0).endPos); |
| assertEquals(1, kr.getMatch(1).startPos); |
| assertEquals(4, kr.getMatch(1).endPos); |
| |
| // ---- Distance 2 to 3 |
| sq = createQuery("s:b", "s:c", 2, 3, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 3); |
| |
| ki.close(); |
| } |
| |
| |
| /** |
| * - Check candidate list: |
| * - CandidateList should not contain firstspans that are too far |
| * from |
| * the current secondspan |
| * - Add new candidates |
| */ |
| @Test |
| public void testCase2 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc1()); |
| ki.commit(); |
| |
| // ---- Distance 1 to 3 |
| // Candidate list for the current secondspan, is empty |
| SpanQuery sq = createQuery("s:c", "s:d", 1, 3, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals((long) 4, kr.getTotalResults()); |
| assertEquals(2, kr.getMatch(0).startPos); |
| assertEquals(5, kr.getMatch(0).endPos); |
| assertEquals(3, kr.getMatch(2).startPos); |
| assertEquals(7, kr.getMatch(2).endPos); |
| |
| ki.addDoc(createFieldDoc0()); |
| ki.commit(); |
| |
| // ---- Distance 3 to 3 |
| // Candidate list is empty, but there are secondspans in the other doc |
| sq = createQuery("s:c", "s:d", 3, 3, true); |
| kr = ki.search(sq, (short) 10); |
| assertEquals((long) 2, kr.getTotalResults()); |
| |
| ki.close(); |
| } |
| |
| |
| /** |
| * - Ensure the same document |
| * - Multiple matches in multiple documents and atomic indices |
| */ |
| @Test |
| public void testCase3 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.commit(); |
| ki.addDoc(createFieldDoc2()); |
| ki.addDoc(createFieldDoc1()); |
| ki.commit(); |
| |
| SpanQuery sq; |
| sq = createQuery("s:c", "s:d", 3, 3, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 2); |
| } |
| |
| |
| /** |
| * - Firstspan.next() is in the other doc, but there is |
| * still a secondspans in the same doc |
| * - hasMoreFirstSpan and secondspans.next() are true, |
| * but ensureSameDoc() = false |
| */ |
| @Test |
| public void testCase4 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.commit(); |
| ki.addDoc(createFieldDoc2()); |
| ki.addDoc(createFieldDoc1()); |
| ki.commit(); |
| |
| // ---- Distance 1 to 2 |
| SpanQuery sq = createQuery("s:b", "s:c", 1, 2, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 3); |
| assertEquals(0, kr.getMatch(0).startPos); |
| assertEquals(3, kr.getMatch(0).endPos); |
| assertEquals(1, kr.getMatch(1).startPos); |
| assertEquals(3, kr.getMatch(1).endPos); |
| assertEquals(1, kr.getMatch(2).startPos); |
| assertEquals(4, kr.getMatch(2).endPos); |
| ki.close(); |
| } |
| |
| |
| /** ElementQueries */ |
| @Test |
| public void testCase5 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc1()); |
| ki.commit(); |
| |
| // Intersection ---- Distance 0:0 |
| SpanQuery sq = createElementQuery("x", "y", 0, 0, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 4); |
| assertEquals(2, kr.getMatch(0).startPos); |
| assertEquals(7, kr.getMatch(0).endPos); |
| assertEquals(3, kr.getMatch(1).startPos); |
| assertEquals(7, kr.getMatch(1).endPos); |
| assertEquals(3, kr.getMatch(2).startPos); |
| assertEquals(8, kr.getMatch(2).endPos); |
| |
| // Next to ---- Distance 1:1 |
| sq = createElementQuery("y", "x", 1, 1, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 1); |
| assertEquals(5, kr.getMatch(0).startPos); |
| assertEquals(10, kr.getMatch(0).endPos); |
| |
| // ---- Distance 1:2 |
| sq = createElementQuery("y", "x", 1, 2, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 2); |
| assertEquals(4, kr.getMatch(0).startPos); |
| assertEquals(9, kr.getMatch(0).endPos); |
| assertEquals(5, kr.getMatch(1).startPos); |
| assertEquals(10, kr.getMatch(1).endPos); |
| |
| // The same element type ---- Distance 1:2 |
| sq = createElementQuery("x", "x", 1, 2, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 2); |
| } |
| |
| |
| /** Skip to */ |
| @Test |
| public void testCase6 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc2()); |
| ki.addDoc(createFieldDoc1()); |
| ki.commit(); |
| |
| SpanQuery firstClause = createQuery("s:d", "s:e", 3, 4, true); |
| kr = ki.search(firstClause, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 3); |
| assertEquals(0, kr.getMatch(0).getLocalDocID()); |
| assertEquals(2, kr.getMatch(0).startPos); |
| assertEquals(6, kr.getMatch(0).endPos); |
| assertEquals(1, kr.getMatch(1).getLocalDocID()); |
| assertEquals(4, kr.getMatch(1).startPos); |
| assertEquals(8, kr.getMatch(1).endPos); |
| assertEquals(4, kr.getMatch(2).startPos); |
| assertEquals(9, kr.getMatch(2).endPos); |
| |
| // The secondspans is skipped to doc# of the current firstspans |
| SpanQuery sq = new SpanSegmentQuery( |
| createQuery("s:d", "s:e", 3, 4, true), |
| createElementQuery("y", "x", 1, 2, true)); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 1); |
| assertEquals(4, kr.getMatch(0).startPos); |
| assertEquals(9, kr.getMatch(0).endPos); |
| } |
| |
| |
| /** Same tokens */ |
| @Test |
| public void testCase7 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc1()); |
| ki.commit(); |
| |
| SpanQuery sq = createQuery("s:c", "s:c", 1, 2, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 3); |
| assertEquals(0, kr.getMatch(0).startPos); |
| assertEquals(3, kr.getMatch(0).endPos); |
| assertEquals(2, kr.getMatch(1).startPos); |
| assertEquals(4, kr.getMatch(1).endPos); |
| assertEquals(3, kr.getMatch(2).startPos); |
| assertEquals(6, kr.getMatch(2).endPos); |
| |
| ki.addDoc(createFieldDoc2()); |
| ki.commit(); |
| |
| // with order |
| sq = createQuery("s:e", "s:e", 1, 1, true); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 1); |
| |
| // without order |
| sq = createQuery("s:e", "s:e", 1, 1, false); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals(kr.getTotalResults(), 2); |
| } |
| |
| } |