Add totalResources to index search
Change-Id: I274997f0bab4ab4837c1f7811aaa2754e07f9d77
diff --git a/Changes b/Changes
index ba791db..185fd0f 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.61.3 2023-07-11
+ - Add totalResources to results (diewald)
+
0.61.2 2023-04-05
- [bugfix] Fix pagebreak retrieval (margaretha, diewald)
- [feature] Support token lists for match infos (solved #88,
diff --git a/pom.xml b/pom.xml
index 0796235..ff80e83 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.61.2</version>
+ <version>0.61.3</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 6ec2c0e..6768bcf 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -459,7 +459,6 @@
if (this.reader.getRefCount() == 0) {
// Retry update
- // System.err.println("Retry update!");
break;
};
*/
@@ -1416,7 +1415,8 @@
};
// Some initializations ...
- int i = 0;
+ int i = 0; // matchcount
+ int j = 0; // matchdoccount
int startIndex = kr.getStartIndex();
int count = kr.getItemsPerPage();
int hits = kr.getItemsPerPage() + startIndex;
@@ -1464,7 +1464,6 @@
if (DEBUG)
log.trace("Rewritten query is {}", query.toString());
-
// Todo: run this in a separated thread
for (LeafReaderContext atomic : this.reader().leaves()) {
@@ -1479,9 +1478,8 @@
*/
final FixedBitSet bitset = collection.bits(atomic);
- if (bitset.nextSetBit(0) == DocIdSetIterator.NO_MORE_DOCS) {
+ if (bitset.nextSetBit(0) == DocIdSetIterator.NO_MORE_DOCS)
continue;
- };
final PositionsToOffset pto = snippets ? new PositionsToOffset(atomic, field) : null;
@@ -1497,11 +1495,14 @@
if (DEBUG)
log.trace("Match Nr {}/{}", i, count);
-
+
// There are no more spans to find
if (!spans.next())
break;
+ // Increment resource counter
+ itemsPerResourceCounter++;
+
// Timeout!
if (tthread.getTime() > timeout) {
kr.setTimeExceeded(true);
@@ -1511,30 +1512,38 @@
localDocID = spans.doc();
- // Count hits per resource
- if (itemsPerResource > 0) {
+ // IDS are identical
+ if (localDocID == oldLocalDocID
+ || oldLocalDocID == -1) {
- // IDS are identical
- if (localDocID == oldLocalDocID
- || oldLocalDocID == -1) {
- if (itemsPerResourceCounter++ >= itemsPerResource) {
+ // Count hits per resource
+ if (itemsPerResource > 0) {
+
+ // End of resourcecounter is reached
+ if (itemsPerResourceCounter > itemsPerResource) {
+
+ // Skip to next resource
if (spans.skipTo(localDocID + 1) != true) {
break;
}
- else {
- itemsPerResourceCounter = 1;
- localDocID = spans.doc();
- };
+
+ itemsPerResourceCounter = 1;
+ localDocID = spans.doc();
};
}
+ }
- // Reset counter
- else
- itemsPerResourceCounter = 0;
+ // localDoc is new
+ else
+ itemsPerResourceCounter = 1;
- oldLocalDocID = localDocID;
- };
+ if (itemsPerResourceCounter == 1)
+ j++;
+
+ oldLocalDocID = localDocID;
+
+
// The next matches are not yet part of the result
if (startIndex > i)
continue;
@@ -1612,20 +1621,26 @@
break;
};
- // Count hits per resource
- if (itemsPerResource > 0) {
- localDocID = spans.doc();
+ // Increment resource counter
+ itemsPerResourceCounter++;
+
+ localDocID = spans.doc();
- if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ break;
+
+ // IDS are identical
+ if (localDocID == oldLocalDocID
+ || oldLocalDocID == -1) {
+
+ if (localDocID == -1)
break;
+
+ // Count hits per resource
+ if (itemsPerResource > 0) {
- // IDS are identical
- if (localDocID == oldLocalDocID
- || oldLocalDocID == -1) {
- if (localDocID == -1)
- break;
-
- if (itemsPerResourceCounter++ >= itemsPerResource) {
+ // End of resourcecounter is reached
+ if (itemsPerResourceCounter > itemsPerResource) {
if (spans.skipTo(localDocID + 1) != true) {
break;
};
@@ -1633,13 +1648,15 @@
localDocID = spans.doc();
};
}
+ }
+ // Reset counter
+ else
+ itemsPerResourceCounter = 1;
- // Reset counter
- else
- itemsPerResourceCounter = 0;
-
- oldLocalDocID = localDocID;
- };
+ if (itemsPerResourceCounter == 1)
+ j++;
+
+ oldLocalDocID = localDocID;
i++;
};
atomicMatches.clear();
@@ -1649,6 +1666,7 @@
kr.setItemsPerResource(itemsPerResource);
kr.setTotalResults(cutoff ? (long) -1 : (long) i);
+ kr.setTotalResources(cutoff ? (long) -1 : (long) j);
}
catch (IOException e) {
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index 84d8636..f36cc28 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -440,8 +440,12 @@
String json = getJsonString(getClass()
.getResource("/queries/bsp-itemsPerResource.jsonld").getFile());
- Krill ks = new Krill(json);
- Result kr = ks.apply(ki);
+ Krill ks;
+ Result kr;
+ KrillMeta meta;
+
+ ks = new Krill(json);
+ kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 10);
assertEquals(0, kr.getStartIndex());
assertEquals(20, kr.getItemsPerPage());
@@ -452,6 +456,7 @@
assertEquals("WPD_AAA.00002", kr.getMatch(7).getDocID());
assertEquals("WPD_AAA.00002", kr.getMatch(8).getDocID());
assertEquals("WPD_AAA.00004", kr.getMatch(9).getDocID());
+ assertEquals(kr.getTotalResources(), 3);
ks = new Krill(json);
ks.getMeta().setItemsPerResource(1);
@@ -463,6 +468,7 @@
assertEquals("WPD_AAA.00004", kr.getMatch(2).getDocID());
assertEquals(kr.getTotalResults(), 3);
+ assertEquals(kr.getTotalResources(), 3);
assertEquals(0, kr.getStartIndex());
assertEquals(20, kr.getItemsPerPage());
@@ -478,11 +484,12 @@
assertEquals("WPD_AAA.00004", kr.getMatch(4).getDocID());
assertEquals(kr.getTotalResults(), 5);
+ assertEquals(kr.getTotalResources(), 3);
assertEquals(0, kr.getStartIndex());
assertEquals(20, kr.getItemsPerPage());
ks = new Krill(json);
- KrillMeta meta = ks.getMeta();
+ meta = ks.getMeta();
meta.setItemsPerResource(1);
meta.setStartIndex(1);
meta.setCount(1);
@@ -492,10 +499,28 @@
assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
assertEquals(kr.getTotalResults(), 3);
+ assertEquals(kr.getTotalResources(), 3);
assertEquals(1, kr.getStartIndex());
assertEquals(1, kr.getItemsPerPage());
assertEquals((short) 1, kr.getItemsPerResource());
+
+ ks = new Krill(json);
+ meta = ks.getMeta();
+ meta.setItemsPerResource(2);
+ meta.setStartIndex(2);
+ meta.setCount(1);
+
+ kr = ks.apply(ki);
+
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+
+ assertEquals(kr.getTotalResults(), 5);
+ assertEquals(kr.getTotalResources(), 3);
+ assertEquals(2, kr.getStartIndex());
+ assertEquals(1, kr.getItemsPerPage());
+
+
};