bugfix for multithreaded environments
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 037452a..34f4962 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -110,6 +110,7 @@
public IndexReader reader;
private IndexWriter writer;
+ private IndexWriterConfig config;
private IndexSearcher searcher;
private boolean readerOpen = false;
private int commitCounter = 0;
@@ -166,9 +167,7 @@
);
// Create configuration with base analyzer
- IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);
-
- this.writer = new IndexWriter(this.directory, config);
+ this.config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);
};
@@ -193,7 +192,8 @@
};
public void closeWriter () throws IOException {
- this.writer.close();
+ if (this.writer != null)
+ this.writer.close();
};
@@ -222,6 +222,10 @@
public FieldDocument addDoc (FieldDocument fd) throws IOException {
+ if (this.writer == null)
+ this.writer = new IndexWriter(this.directory, this.config);
+
+
// Add document to writer
this.writer.addDocument( fd.doc );
if (++commitCounter > autoCommit) {
@@ -261,6 +265,9 @@
};
public void commit () throws IOException {
+ if (this.writer == null)
+ return;
+
if (commitCounter > 0) {
this.writer.commit();
commitCounter = 0;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index 977b556..ebaa2a5 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -2,6 +2,7 @@
import java.util.*;
import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
@@ -15,11 +16,47 @@
*/
public class SpanSequenceQueryWrapper implements SpanQueryWrapperInterface {
private String field;
- public ArrayList<SpanQuery> segments;
+ private ArrayList<SpanQuery> segments;
+ private ArrayList<DistanceConstraint> constraints;
+ private boolean isInOrder = true;
+
+
+ private class DistanceConstraint {
+ private int min = 0;
+ private int max = 0;
+ private String element = null;
+
+ public DistanceConstraint (int min, int max) {
+ this.min = min;
+ this.max = max;
+ };
+
+ public DistanceConstraint (int min, int max, String element) {
+ this.min = min;
+ this.max = max;
+ this.element = element;
+ };
+
+ public boolean hasElement () {
+ return (this.element != null ? true : false);
+ };
+
+ public String getElement () {
+ return this.element;
+ };
+
+ public int getMin () {
+ return this.min;
+ };
+
+ public int getMax () {
+ return this.max;
+ };
+ };
public SpanSequenceQueryWrapper (String field) {
this.field = field;
- this.segments = new ArrayList<SpanQuery>();
+ this.segments = new ArrayList<SpanQuery>(2);
};
public SpanSequenceQueryWrapper (String field, String ... terms) {
@@ -82,6 +119,17 @@
return this;
};
+ public SpanSequenceQueryWrapper withConstraint (int min, int max) {
+ this.constraints.add(new DistanceConstraint(min, max));
+ return this;
+ };
+
+ public SpanSequenceQueryWrapper withConstraint (int min, int max, String element) {
+ this.constraints.add(new DistanceConstraint(min, max, element));
+ return this;
+ };
+
+
public SpanQuery toQuery () {
if (this.segments.size() == 0) {
return (SpanQuery) null;
@@ -89,13 +137,31 @@
SpanQuery query = this.segments.get(0);
- for (int i = 1; i < this.segments.size(); i++) {
- query = new SpanNextQuery(
- query,
- this.segments.get(i),
- false
- );
+ // NextQueries:
+ if (this.constraints == null) {
+ for (int i = 1; i < this.segments.size(); i++) {
+ query = new SpanNextQuery(
+ query,
+ this.segments.get(i) // Todo: Maybe payloads are not necessary
+ );
+ };
+ return (SpanQuery) query;
};
- return (SpanQuery) query;
+
+ // DistanceQueries
+ if (this.constraints.size() == 1) {
+ };
+
+ // MultiDistanceQueries
+
+ return (SpanQuery) null;
+ };
+
+ public void setInOrder (boolean isInOrder) {
+ this.isInOrder = isInOrder;
+ };
+
+ public boolean isInOrder () {
+ return this.isInOrder;
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
index 4296da4..2d10b6f 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
@@ -50,7 +50,8 @@
contentBuilder.append(str);
};
in.close();
- } catch (IOException e) {
+ }
+ catch (IOException e) {
fail(e.getMessage());
}
return contentBuilder.toString();
diff --git a/src/test/resources/queries/cosmas10.json b/src/test/resources/queries/cosmas10.json
new file mode 100644
index 0000000..d958810
--- /dev/null
+++ b/src/test/resources/queries/cosmas10.json
@@ -0,0 +1,41 @@
+{
+ "context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:sequence",
+ "operands" : [ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Institut",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }, {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "für",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }, {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "caseInsensitive" : true,
+ "key" : "deutsche",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }, {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Sprache",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ } ]
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/cosmas16.json b/src/test/resources/queries/cosmas16.json
new file mode 100644
index 0000000..61333fd
--- /dev/null
+++ b/src/test/resources/queries/cosmas16.json
@@ -0,0 +1,31 @@
+{
+ "context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:submatch",
+ "classRef" : [ 1 ],
+ "operands" : [ {
+ "@type" : "korap:group",
+ "operation" : "operation:position",
+ "frame" : "frame:startswith",
+ "operands" : [ {
+ "@type" : "korap:span",
+ "key" : "s"
+ }, {
+ "@type" : "korap:group",
+ "operation" : "operation:class",
+ "class" : 1,
+ "operands" : [ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "caseInsensitive" : true,
+ "key" : "wegen",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ } ]
+ } ]
+ } ]
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/cosmas17.json b/src/test/resources/queries/cosmas17.json
new file mode 100644
index 0000000..32e073b
--- /dev/null
+++ b/src/test/resources/queries/cosmas17.json
@@ -0,0 +1,21 @@
+{
+ "context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:position",
+ "frame" : "frame:startswith",
+ "operands" : [ {
+ "@type" : "korap:span",
+ "key" : "s"
+ }, {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "caseInsensitive" : true,
+ "key" : "wegen",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ } ]
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/cosmas20.json b/src/test/resources/queries/cosmas20.json
new file mode 100644
index 0000000..32887c7
--- /dev/null
+++ b/src/test/resources/queries/cosmas20.json
@@ -0,0 +1,30 @@
+{
+ "context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:submatch",
+ "classRef" : [ 1 ],
+ "operands" : [ {
+ "@type" : "korap:group",
+ "operation" : "operation:position",
+ "frame" : "frame:endswith",
+ "operands" : [ {
+ "@type" : "korap:span",
+ "key" : "s"
+ }, {
+ "@type" : "korap:group",
+ "operation" : "operation:class",
+ "class" : 1,
+ "operands" : [ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "V",
+ "layer" : "pos",
+ "match" : "match:eq"
+ }
+ } ]
+ } ]
+ } ]
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/cosmas3.json b/src/test/resources/queries/cosmas3.json
new file mode 100644
index 0000000..2c77f42
--- /dev/null
+++ b/src/test/resources/queries/cosmas3.json
@@ -0,0 +1,31 @@
+{
+ "context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:sequence",
+ "inOrder" : true,
+ "distances" : [ {
+ "@type" : "korap:distance",
+ "key" : "w",
+ "min" : 1,
+ "max" : 3
+ } ],
+ "operands" : [ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "das",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }, {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Buch",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ } ]
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/cosmas4.json b/src/test/resources/queries/cosmas4.json
new file mode 100644
index 0000000..2040995
--- /dev/null
+++ b/src/test/resources/queries/cosmas4.json
@@ -0,0 +1,40 @@
+{
+ "context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:sequence",
+ "inOrder" : true,
+ "distances" : [ {
+ "@type" : "korap:group",
+ "operation" : "operation:and",
+ "operands" : [ {
+ "@type" : "korap:distance",
+ "key" : "w",
+ "min" : 1,
+ "max" : 3
+ }, {
+ "@type" : "korap:distance",
+ "key" : "s",
+ "min" : 1,
+ "max" : 1
+ } ]
+ } ],
+ "operands" : [ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "das",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }, {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Buch",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ } ]
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/queries/readme.txt b/src/test/resources/queries/readme.txt
index f6f2584..2aa2f2e 100644
--- a/src/test/resources/queries/readme.txt
+++ b/src/test/resources/queries/readme.txt
@@ -1,19 +1,27 @@
-bsp1.json: [base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815
-bsp1b.json: [base=foo]|([base=foo][base=bar]) meta author=Goethe&year=1815
-bsp2.json: ([base=foo]|[base=bar])[base=foobar]
-bsp3.json: shrink({[base=Mann]})
-bsp4.json: shrink({[base=foo]}[orth=bar])
-bsp5.json: shrink(1:[base=Der]{1:[base=Mann]})
-bsp6.json: [base=Katze]
-bsp7.json: [base!=Katze]
-bsp8.json: [!base=Katze]
-bsp9.json: [base=Katze&orth=Katzen]
-bsp10.json: [base=Katze][orth=und][orth=Hunde]
-bsp11.json: [!(base=Katze&orth=Katzen)]
-bsp12.json: contains(<np>,[base=Mann])
-bsp13.json: startswith(<np>,[!pos=Det])
-bsp13b.json: startswith(<np>,[pos=Det])
-bsp14.json: 'vers{2,3}uch'
-bsp15.json: [orth='vers.*ch']
-bsp16.json: [(base=bar|base=foo)&orth=foobar]
-bsp17.json: within(<np>,[base=Mann])
+bsp1: [base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815
+bsp1b: [base=foo]|([base=foo][base=bar]) meta author=Goethe&year=1815
+bsp2: ([base=foo]|[base=bar])[base=foobar]
+bsp3: shrink({[base=Mann]})
+bsp4: shrink({[base=foo]}[orth=bar])
+bsp5: shrink(1:[base=Der]{1:[base=Mann]})
+bsp6: [base=Katze]
+bsp7: [base!=Katze]
+bsp8: [!base=Katze]
+bsp9: [base=Katze&orth=Katzen]
+bsp10: [base=Katze][orth=und][orth=Hunde]
+bsp11: [!(base=Katze&orth=Katzen)]
+bsp12: contains(<np>,[base=Mann])
+bsp13: startswith(<np>,[!pos=Det])
+bsp13b: startswith(<np>,[pos=Det])
+bsp14: 'vers{2,3}uch'
+bsp15: [orth='vers.*ch']
+bsp16: [(base=bar|base=foo)&orth=foobar]
+bsp17: within(<np>,[base=Mann])
+
+// Based on KorAP-querySerialization/examples/
+cosmas3: "das /+w1:3 Buch" # word-distance constraint
+cosmas4: "das /+w1:3,s1 Buch" # combined word-distance and sent-distance constraint
+cosmas10: "Institut für $deutsche Sprache" # finds both
+cosmas16: "$wegen #IN(L) <s>" # finds 'wegen' at beginning of sentence, also when capitalised
+cosmas17: "#BED($wegen , +sa)" # equivalent to above
+cosmas20: "MORPH(V) #IN(R) #ELEM(S)" # e.g. subordinate clauses