| package de.ids_mannheim.korap.index; |
| |
| import static org.junit.Assert.assertEquals; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.spans.SpanQuery; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.JUnit4; |
| |
| import de.ids_mannheim.korap.KrillIndex; |
| import de.ids_mannheim.korap.query.DistanceConstraint; |
| import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| import de.ids_mannheim.korap.query.SpanElementQuery; |
| import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery; |
| import de.ids_mannheim.korap.query.SpanNextQuery; |
| import de.ids_mannheim.korap.response.Result; |
| |
| @RunWith(JUnit4.class) |
| public class TestMultipleDistanceIndex { |
| |
| private KrillIndex ki; |
| private Result kr; |
| |
| |
| public SpanQuery createQuery (String x, String y, |
| List<DistanceConstraint> constraints, boolean isOrdered) { |
| |
| SpanQuery sx = new SpanTermQuery(new Term("base", x)); |
| SpanQuery sy = new SpanTermQuery(new Term("base", y)); |
| |
| return new SpanMultipleDistanceQuery(sx, sy, constraints, isOrdered, |
| true); |
| } |
| |
| |
| public DistanceConstraint createConstraint (String unit, int min, int max, |
| boolean isOrdered, boolean exclusion) { |
| |
| if (unit.equals("w")) { |
| return new DistanceConstraint(min, max, isOrdered, exclusion); |
| } |
| return new DistanceConstraint(new SpanElementQuery("base", unit), min, |
| max, isOrdered, exclusion); |
| } |
| |
| |
| private FieldDocument createFieldDoc0 () { |
| FieldDocument fd = new FieldDocument(); |
| fd.addString("ID", "doc-0"); |
| fd.addTV( |
| "base", |
| "text", |
| "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]" |
| + "[(1-2)s:b|s:c|_2$<i>1<i>2]" |
| + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>4<b>0]" |
| + "[(3-4)s:b|_4$<i>3<i>4]" |
| + "[(4-5)s:c|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0|<>:p$<b>64<i>4<i>6<i>6<b>0]" |
| + "[(5-6)s:e|_6$<i>5<i>6]"); |
| return fd; |
| } |
| |
| |
| private FieldDocument createFieldDoc1 () { |
| FieldDocument fd = new FieldDocument(); |
| fd.addString("ID", "doc-1"); |
| fd.addTV( |
| "base", |
| "text", |
| "[(0-1)s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]" |
| + "[(1-2)s:c|s:e|_2$<i>1<i>2]" |
| + "[(2-3)s:e|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>4<b>0]" |
| + "[(3-4)s:c|_4$<i>3<i>4]" |
| + "[(4-5)s:e|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0|<>:p$<b>64<i>4<i>6<i>6<b>0]" |
| + "[(5-6)s:c|_6$<i>5<i>6]"); |
| return fd; |
| } |
| |
| |
| private FieldDocument createFieldDoc2 () { |
| FieldDocument fd = new FieldDocument(); |
| fd.addString("ID", "doc-2"); |
| fd.addTV( |
| "base", |
| "text", |
| "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]" |
| + "[(1-2)s:b|s:e|_2$<i>1<i>2]" |
| + "[(2-3)s:e|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>4<b>0]" |
| + "[(3-4)s:b|s:c|_4$<i>3<i>4]" |
| + "[(4-5)s:e|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0|<>:p$<b>64<i>4<i>6<i>6<b>0]" |
| + "[(5-6)s:d|_6$<i>5<i>6]" |
| + "[(6-7)s:b|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0|<>:p$<b>64<i>6<i>7<i>7<b>0]"); |
| return fd; |
| } |
| |
| |
| private FieldDocument createFieldDoc3 () { |
| FieldDocument fd = new FieldDocument(); |
| fd.addString("ID", "doc-0"); |
| fd.addTV("base", "text", |
| "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]" |
| + "[(1-2)s:b|s:c|_2$<i>1<i>2]" |
| + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>5<b>0]" |
| + "[(3-4)s:b|_4$<i>3<i>4]" + "[(4-5)s:b|_5$<i>4<i>5]" |
| + "[(5-6)s:b|_6$<i>5<i>6]" + // gap |
| "[(6-7)s:c|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0|<>:p$<b>64<i>6<i>7<i>7<b>0]"); |
| return fd; |
| } |
| |
| |
| /** |
| * Unordered, same sentence |
| * */ |
| @Test |
| public void testCase1 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.commit(); |
| |
| List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); |
| constraints.add(createConstraint("w", 0, 2, false, false)); |
| constraints.add(createConstraint("s", 0, 0, false, false)); |
| |
| SpanQuery mdq; |
| mdq = createQuery("s:b", "s:c", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| // System.out.println(mdq); |
| |
| assertEquals((long) 3, kr.getTotalResults()); |
| assertEquals(0, kr.getMatch(0).getStartPos()); |
| assertEquals(2, kr.getMatch(0).getEndPos()); |
| assertEquals(1, kr.getMatch(1).getStartPos()); |
| assertEquals(2, kr.getMatch(1).getEndPos()); |
| assertEquals(2, kr.getMatch(2).getStartPos()); |
| assertEquals(4, kr.getMatch(2).getEndPos()); |
| } |
| |
| |
| /** |
| * Ordered |
| * Unordered |
| * Two constraints |
| * Three constraints |
| * */ |
| @Test |
| public void testCase2 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.commit(); |
| |
| // Ordered - two constraints |
| List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); |
| constraints.add(createConstraint("w", 0, 2, true, false)); |
| constraints.add(createConstraint("s", 1, 1, true, false)); |
| |
| SpanQuery mdq; |
| mdq = createQuery("s:b", "s:c", constraints, true); |
| kr = ki.search(mdq, (short) 10); |
| assertEquals((long) 3, kr.getTotalResults()); |
| assertEquals(0, kr.getMatch(0).getStartPos()); |
| assertEquals(3, kr.getMatch(0).getEndPos()); |
| assertEquals(1, kr.getMatch(1).getStartPos()); |
| assertEquals(3, kr.getMatch(1).getEndPos()); |
| assertEquals(3, kr.getMatch(2).getStartPos()); |
| assertEquals(5, kr.getMatch(2).getEndPos()); |
| |
| // Three constraints |
| constraints.add(createConstraint("p", 0, 0, true, false)); |
| mdq = createQuery("s:b", "s:c", constraints, true); |
| kr = ki.search(mdq, (short) 10); |
| assertEquals((long) 2, kr.getTotalResults()); |
| |
| |
| // Unordered - two constraints |
| constraints.clear(); |
| constraints.add(createConstraint("w", 0, 2, false, false)); |
| constraints.add(createConstraint("s", 1, 1, false, false)); |
| |
| mdq = createQuery("s:c", "s:b", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| assertEquals((long) 4, kr.getTotalResults()); |
| assertEquals(1, kr.getMatch(2).getStartPos()); |
| assertEquals(4, kr.getMatch(2).getEndPos()); |
| |
| // Three constraints |
| constraints.add(createConstraint("p", 0, 0, false, false)); |
| mdq = createQuery("s:b", "s:c", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| assertEquals((long) 3, kr.getTotalResults()); |
| |
| } |
| |
| |
| /** |
| * Multiple documents |
| * Ensure same doc (inner term span) |
| * */ |
| @Test |
| public void testCase3 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.addDoc(createFieldDoc1()); |
| ki.addDoc(createFieldDoc2()); |
| ki.commit(); |
| |
| List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); |
| constraints.add(createConstraint("w", 1, 2, false, false)); |
| constraints.add(createConstraint("s", 1, 2, false, false)); |
| |
| SpanQuery mdq; |
| mdq = createQuery("s:b", "s:e", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| |
| assertEquals((long) 5, kr.getTotalResults()); |
| assertEquals(3, kr.getMatch(0).getStartPos()); |
| assertEquals(6, kr.getMatch(0).getEndPos()); |
| assertEquals(2, kr.getMatch(1).getLocalDocID()); |
| assertEquals(1, kr.getMatch(2).getStartPos()); |
| assertEquals(4, kr.getMatch(2).getEndPos()); |
| assertEquals(3, kr.getMatch(3).getStartPos()); |
| assertEquals(5, kr.getMatch(3).getEndPos()); |
| assertEquals(4, kr.getMatch(4).getStartPos()); |
| assertEquals(7, kr.getMatch(4).getEndPos()); |
| |
| // System.out.print(kr.getTotalResults()+"\n"); |
| // for (int i=0; i< kr.getTotalResults(); i++){ |
| // System.out.println( |
| // kr.match(i).getLocalDocID()+" "+ |
| // kr.match(i).startPos + " " + |
| // kr.match(i).endPos |
| // ); |
| // } |
| |
| } |
| |
| |
| /** |
| * Skip to |
| * */ |
| @Test |
| public void testCase4 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.addDoc(createFieldDoc3()); |
| ki.addDoc(createFieldDoc1()); |
| ki.addDoc(createFieldDoc2()); |
| ki.commit(); |
| |
| List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); |
| constraints.add(createConstraint("w", 1, 2, false, false)); |
| constraints.add(createConstraint("s", 1, 2, false, false)); |
| |
| SpanQuery mdq; |
| mdq = createQuery("s:b", "s:c", constraints, false); |
| |
| SpanQuery sq = new SpanNextQuery(mdq, new SpanTermQuery(new Term( |
| "base", "s:e"))); |
| kr = ki.search(sq, (short) 10); |
| |
| assertEquals((long) 2, kr.getTotalResults()); |
| assertEquals(3, kr.getMatch(0).getStartPos()); |
| assertEquals(6, kr.getMatch(0).getEndPos()); |
| assertEquals(3, kr.getMatch(1).getLocalDocID()); |
| assertEquals(1, kr.getMatch(1).getStartPos()); |
| assertEquals(5, kr.getMatch(1).getEndPos()); |
| |
| } |
| |
| |
| /** |
| * Same tokens: ordered and unordered yield the same results |
| * */ |
| @Test |
| public void testCase5 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc0()); |
| ki.addDoc(createFieldDoc1()); |
| ki.commit(); |
| |
| List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); |
| constraints.add(createConstraint("w", 1, 2, false, false)); |
| constraints.add(createConstraint("s", 1, 2, false, false)); |
| |
| SpanQuery mdq; |
| mdq = createQuery("s:c", "s:c", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| |
| assertEquals((long) 4, kr.getTotalResults()); |
| assertEquals(1, kr.getMatch(0).getStartPos()); |
| assertEquals(3, kr.getMatch(0).getEndPos()); |
| assertEquals(2, kr.getMatch(1).getStartPos()); |
| assertEquals(5, kr.getMatch(1).getEndPos()); |
| assertEquals(1, kr.getMatch(2).getLocalDocID()); |
| assertEquals(1, kr.getMatch(2).getStartPos()); |
| assertEquals(4, kr.getMatch(2).getEndPos()); |
| assertEquals(3, kr.getMatch(3).getStartPos()); |
| assertEquals(6, kr.getMatch(3).getEndPos()); |
| |
| } |
| |
| |
| /** |
| * Exclusion |
| * Gaps |
| * */ |
| @Test |
| public void testCase6 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc3()); |
| ki.commit(); |
| |
| // First constraint - token exclusion |
| SpanQuery sx = new SpanTermQuery(new Term("base", "s:b")); |
| SpanQuery sy = new SpanTermQuery(new Term("base", "s:c")); |
| |
| DistanceConstraint dc1 = createConstraint("w", 0, 1, false, true); |
| SpanDistanceQuery sq = new SpanDistanceQuery(sx, sy, dc1, true); |
| |
| kr = ki.search(sq, (short) 10); |
| assertEquals((long) 1, kr.getTotalResults()); |
| // 4-5 |
| |
| // Second constraint - element distance |
| DistanceConstraint dc2 = createConstraint("s", 1, 1, false, false); |
| sq = new SpanDistanceQuery(sx, sy, dc2, true); |
| kr = ki.search(sq, (short) 10); |
| // 0-3, 1-3, 1-4, 1-5, 3-7, 4-7 |
| assertEquals((long) 6, kr.getTotalResults()); |
| |
| |
| List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); |
| constraints.add(dc1); |
| constraints.add(dc2); |
| |
| SpanQuery mdq; |
| mdq = createQuery("s:b", "s:c", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| |
| assertEquals((long) 2, kr.getTotalResults()); |
| assertEquals(1, kr.getMatch(0).getStartPos()); |
| assertEquals(5, kr.getMatch(0).getEndPos()); |
| assertEquals(4, kr.getMatch(1).getStartPos()); |
| assertEquals(7, kr.getMatch(1).getEndPos()); |
| } |
| |
| |
| /** |
| * Exclusion, multiple documents |
| * */ |
| @Test |
| public void testCase7 () throws IOException { |
| ki = new KrillIndex(); |
| ki.addDoc(createFieldDoc2()); |
| ki.commit(); |
| |
| SpanQuery sx = new SpanTermQuery(new Term("base", "s:b")); |
| SpanQuery sy = new SpanTermQuery(new Term("base", "s:c")); |
| // Second constraint |
| SpanDistanceQuery sq = new SpanDistanceQuery(sx, sy, createConstraint( |
| "s", 0, 0, false, true), true); |
| kr = ki.search(sq, (short) 10); |
| assertEquals((long) 3, kr.getTotalResults()); |
| // 0-1, 1-2, 6-7 |
| |
| // Exclusion within the same sentence |
| List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); |
| constraints.add(createConstraint("w", 0, 2, false, true)); |
| constraints.add(createConstraint("s", 0, 0, false, true)); |
| |
| SpanQuery mdq; |
| mdq = createQuery("s:b", "s:c", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| assertEquals((long) 2, kr.getTotalResults()); |
| assertEquals(0, kr.getMatch(0).getStartPos()); |
| assertEquals(1, kr.getMatch(0).getEndPos()); |
| assertEquals(6, kr.getMatch(1).getStartPos()); |
| assertEquals(7, kr.getMatch(1).getEndPos()); |
| |
| |
| // Third constraint |
| sq = new SpanDistanceQuery(sx, sy, createConstraint("p", 0, 0, false, |
| true), true); |
| kr = ki.search(sq, (short) 10); |
| assertEquals((long) 1, kr.getTotalResults()); |
| // 6-7 |
| |
| constraints.add(createConstraint("p", 0, 0, false, true)); |
| mdq = createQuery("s:b", "s:c", constraints, false); |
| kr = ki.search(mdq, (short) 10); |
| |
| assertEquals((long) 1, kr.getTotalResults()); |
| assertEquals(6, kr.getMatch(0).getStartPos()); |
| assertEquals(7, kr.getMatch(0).getEndPos()); |
| |
| } |
| } |