Added minor changes to collector system
diff --git a/pom.xml b/pom.xml
index 247f241..ffe154e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -88,6 +88,20 @@
<version>1.7.5</version>
</dependency>
+ <!-- SQLite for database connection tests -->
+ <dependency>
+ <groupId>org.xerial</groupId>
+ <artifactId>sqlite-jdbc</artifactId>
+ <version>3.7.2</version>
+ </dependency>
+
+ <!-- Database Connection Pool Manager -->
+ <dependency>
+ <groupId>c3p0</groupId>
+ <artifactId>c3p0</artifactId>
+ <version>0.9.1.2</version>
+ </dependency>
+
<!-- solr dependency -->
<!--
<dependency>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index ddb616c..038f1cc 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1224,9 +1224,9 @@
// Collect matches
- public String collect (KorapCollection collection,
- KorapSearch ks,
- MatchCollector mc) {
+ public MatchCollector collect (KorapCollection collection,
+ KorapSearch ks,
+ MatchCollector mc) {
if (DEBUG)
log.trace("Start collecting matches");
@@ -1247,6 +1247,8 @@
HashSet<String> fieldsToLoadLocal = new HashSet<>();
fieldsToLoadLocal.add("ID");
+ // List<KorapMatch> atomicMatches = new ArrayList<KorapMatch>(10);
+
try {
// Rewrite query (for regex and wildcard queries)
@@ -1256,8 +1258,12 @@
query = (SpanQuery) rewrittenQuery;
};
+ int matchcount = 0;
+ int uniqueDocID = -1;
+
for (AtomicReaderContext atomic : this.reader().leaves()) {
+ int previousDocID = -1;
int oldLocalDocID = -1;
// Use OpenBitSet;
@@ -1271,25 +1277,48 @@
while (spans.next()) {
int localDocID = spans.doc();
- // int docID = atomic.docBase + localDocID;
- Document doc = lreader.document(localDocID, fieldsToLoadLocal);
- // Do not load all of this, in case the doc is the same!
+ // New match
+ // MatchIdentifier possibly needs more
+ /*
KorapMatch match = new KorapMatch();
- match.setDocID(doc.get("ID"));
match.setStartPos(spans.start());
match.setEndPos(spans.end());
- // MatchIdentifier possibly needs more
// Add payload information to match
if (spans.isPayloadAvailable())
match.addPayload(spans.getPayload());
+ */
- if (DEBUG)
- log.trace("I've got a match in {}", match.getDocID());
-
- // Add match to the collector
- mc.add(match);
+ if (previousDocID != localDocID) {
+ if (matchcount > 0) {
+ mc.add(uniqueDocID, matchcount);
+ matchcount = 0;
+ };
+
+ // Read document id from index
+ /*
+ uniqueDocID = lreader.document(localDocID, fieldsToLoadLocal).get("ID");
+ */
+ uniqueDocID = localDocID;
+ previousDocID = localDocID;
+ }
+ else {
+ matchcount++;
+ // atomicMatches.add(match);
+ };
+ };
+
+ /*
+ if (!atomicMatches.isEmpty()) {
+ // Add matches to the collector
+ mc.add(uniqueDocID, atomicMatches);
+ atomicMatches.clear();
+ };
+ */
+ if (matchcount > 0) {
+ mc.add(uniqueDocID, matchcount);
+ matchcount = 0;
};
};
@@ -1299,7 +1328,8 @@
mc.setError("There was an IO error");
log.warn( e.getLocalizedMessage() );
};
-
- return mc.toJSON();
+
+ mc.commit();
+ return mc;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 77d6b4d..b014903 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -528,11 +528,19 @@
if (this.localDocID == -1)
return null;
- MatchIdentifier id = new MatchIdentifier();
+ MatchIdentifier id = this.getMatchIdentifier();
// Get prefix string corpus/doc
id.setCorpusID(this.getCorpusID());
id.setDocID(this.getDocID());
+
+ return (this.identifier = id.toString());
+ };
+
+ @JsonIgnore
+ public MatchIdentifier getMatchIdentifier () {
+ MatchIdentifier id = new MatchIdentifier();
+
id.setStartPos(startPos);
id.setEndPos(endPos);
@@ -547,7 +555,7 @@
};
};
- return (this.identifier = id.toString());
+ return id;
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapNode.java b/src/main/java/de/ids_mannheim/korap/KorapNode.java
index 14c57c3..e50a990 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapNode.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapNode.java
@@ -37,8 +37,6 @@
*/
private static String path = new String("/home/ndiewald/Repositories/korap/KorAP-modules/KorAP-lucene-index/sandbox/index");
-
-
/**
* Starts Grizzly HTTP server exposing JAX-RS resources defined in this application.
* @return Grizzly HTTP server.
diff --git a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
index c1f46b8..8c2faaf 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MatchCollector.java
@@ -1,8 +1,9 @@
package de.ids_mannheim.korap.index;
import de.ids_mannheim.korap.KorapMatch;
+import java.util.*;
public interface MatchCollector {
- public int add (KorapMatch km);
+ public void add (int uniqueDocID, int matchcount);
/*
* The following methods are shared and should be used from KorapResult
@@ -13,10 +14,8 @@
public void setError(String s);
public void setBenchmarkHitCounter(long t1, long t2);
- public String getBenchmarkHitCounter();
-
-
public int getMatchCount ();
public int getDocumentCount ();
public String toJSON();
+ public void commit();
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java
index fcf0dc4..758aaad 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java
@@ -79,8 +79,27 @@
};
sb.append(this.docID);
- sb.append("-p");
- sb.append(this.startPos).append('-').append(this.endPos);
+ sb.append('-');
+ sb.append(this.getPositionString());
+ return sb.toString();
+ };
+
+ /*
+ public String getPositionBytes () {
+ ByteBuffer b = new ByteBuffer(8);
+ b.putInt(this.startPos);
+ b.putInt(this.endPos);
+
+ // Get Position information
+ for (int[] i : this.pos) {
+ b.putInt(i[2]).putInt(i[0]).putInt(i[1]);
+ };
+ };
+ */
+
+ public String getPositionString () {
+ StringBuilder sb = new StringBuilder();
+ sb.append('p').append(this.startPos).append('-').append(this.endPos);
// Get Position information
for (int[] i : this.pos) {
@@ -88,6 +107,8 @@
sb.append(i[0]).append('-').append(i[1]);
};
+ return sb.toString();
+
/*
if (this.processed) {
sb.append('c');
@@ -103,7 +124,5 @@
};
};
*/
-
- return sb.toString();
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorDB.java b/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorDB.java
new file mode 100644
index 0000000..9c35b6f
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorDB.java
@@ -0,0 +1,68 @@
+package de.ids_mannheim.korap.index.collector;
+import de.ids_mannheim.korap.KorapMatch;
+import de.ids_mannheim.korap.index.MatchCollector;
+import java.util.*;
+
+public class MatchCollectorDB implements MatchCollector {
+
+ /*
+ Todo: In case there are multiple threads searching,
+ the list should be synchrinized Collections.synchronizedList()
+ */
+
+ private String error;
+ private int doccount = 0;
+ private int matchcount = 0;
+ private int doccollect = 0;
+
+ private List matchCollector;
+ private int bufferSize;
+
+ private String tableName;
+
+ /*
+ * Create a new collector for database connections
+ */
+ public MatchCollectorDB (int bufferSize, String tableName) {
+ this.bufferSize = bufferSize;
+ this.tableName = tableName;
+ this.matchCollector = new ArrayList<int[]>(bufferSize + 2);
+ };
+
+ /*
+ * Add matches till the bufferSize exceeds - then commit to the database.
+ */
+ public void add (int uniqueDocID, int matchcount) {
+ this.doccount++;
+ this.matchcount += matchcount;
+ this.matchCollector.add(new int[]{uniqueDocID, matchcount});
+ if (this.doccollect++ > bufferSize)
+ this.commit();
+ };
+
+ public void setError(String msg) {
+ this.error = msg;
+ };
+
+ public void setBenchmarkHitCounter(long t1, long t2) {
+ };
+
+ public int getMatchCount () {
+ return matchcount;
+ };
+
+ public int getDocumentCount () {
+ return doccount;
+ };
+
+ public String toJSON () {
+ // This may also be a commit!
+ return "{ \"documents\" : " + doccount + ", \"matches\" : " + matchcount + " }";
+ };
+
+ public void commit () {
+
+ this.matchCollector.clear();
+ this.doccollect = 0;
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorTest.java b/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorTest.java
new file mode 100644
index 0000000..9935919
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/index/collector/MatchCollectorTest.java
@@ -0,0 +1,39 @@
+package de.ids_mannheim.korap.index.collector;
+import de.ids_mannheim.korap.index.MatchCollector;
+import de.ids_mannheim.korap.KorapMatch;
+import java.util.*;
+
+public class MatchCollectorTest implements MatchCollector {
+
+ private String error;
+ private int doccount = 0;
+ private int matchcount = 0;
+
+ public void add (int uniqueDocID, int matchcount) {
+ this.doccount++;
+ this.matchcount += matchcount;
+ };
+
+ public void setError(String msg) {
+ this.error = msg;
+ };
+
+ public void setBenchmarkHitCounter(long t1, long t2) {
+ };
+
+ public int getMatchCount () {
+ return matchcount;
+ };
+
+ public int getDocumentCount () {
+ return doccount;
+ };
+
+ public String toJSON () {
+ // This is also a commit!
+ return "{ \"documents\" : " + doccount + ", \"matches\" : " + matchcount + " }";
+ };
+
+ public void commit() {
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
index 11b6fda..fe1a416 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
@@ -213,7 +213,7 @@
payload.addAll(firstSpans.getPayload());
}
if (classNumber > 0 ){
- System.out.println("Extension offsets "+start+","+end);
+ // System.out.println("Extension offsets "+start+","+end);
payload.add(calculateExtensionOffsets(start, end));
}
return payload;
diff --git a/src/main/java/de/ids_mannheim/korap/server/Resource.java b/src/main/java/de/ids_mannheim/korap/server/Resource.java
index f71e9cb..e500a81 100644
--- a/src/main/java/de/ids_mannheim/korap/server/Resource.java
+++ b/src/main/java/de/ids_mannheim/korap/server/Resource.java
@@ -23,6 +23,8 @@
import java.util.regex.Pattern;
import java.util.regex.Matcher;
+import com.mchange.v2.c3p0.*;
+
/**
* Root resource (exposed at root path)
*
diff --git a/src/test/java/de/ids_mannheim/korap/server/DBTest.java b/src/test/java/de/ids_mannheim/korap/server/DBTest.java
new file mode 100644
index 0000000..11f3a23
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/server/DBTest.java
@@ -0,0 +1,84 @@
+package de.ids_mannheim.korap.server;
+
+import de.ids_mannheim.korap.index.MatchCollector;
+import de.ids_mannheim.korap.index.collector.MatchCollectorDB;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.sql.SQLException;
+
+/*
+ bitbucket.org/xerial/sqlite-jdbc
+*/
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+public class DBTest {
+
+ private Connection conn;
+ private Statement stat;
+
+ @Before
+ public void setUp() throws Exception {
+ Class.forName("org.sqlite.JDBC");
+ conn = DriverManager.getConnection("jdbc:sqlite::memory:");
+ this.stat = conn.createStatement();
+ stat.executeUpdate("CREATE TABLE IF NOT EXISTS people (name TEXT, age INTEGER);");
+ conn.setAutoCommit(false);
+ };
+
+ @Test
+ public void TestDatabase () throws Exception {
+ PreparedStatement prep = this.conn.prepareStatement(
+ "INSERT INTO people VALUES (?, ?);"
+ );
+
+ prep.setString(1, "Peter");
+ prep.setString(2, "24");
+ prep.addBatch();
+
+ prep.setString(1, "Klaus");
+ prep.setString(2, "31");
+ prep.addBatch();
+
+ prep.executeBatch();
+ conn.setAutoCommit(true);
+
+ ResultSet rs = stat.executeQuery("SELECT * FROM people;");
+
+ rs.next();
+
+ assertEquals(rs.getString("name"), "Peter");
+ assertEquals(rs.getInt("age"), 24);
+
+ rs.next();
+
+ assertEquals(rs.getString("name"), "Klaus");
+ assertEquals(rs.getInt("age"), 31);
+
+ rs.close();
+ };
+
+ @Test
+ public void TestMatchCollectorDB () throws Exception {
+ MatchCollector mc = new MatchCollectorDB(2000, "matchXYZ");
+ mc.add(5,7);
+ mc.add(8,2);
+ mc.add(9,10);
+ mc.add(16,90);
+ mc.commit();
+ assertEquals(mc.getMatchCount(), 109);
+ assertEquals(mc.getDocumentCount(), 4);
+ };
+
+ @After
+ public void shutDown () throws Exception {
+ this.conn.close();
+ };
+};