new feature and security check for getMatchInfo()
diff --git a/CHANGES b/CHANGES
index 60987bf..f39b7d0 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,9 @@
+0.31.5 2014-06-17
+ - [feature] Batch return of defined foundries and layers in
+ getMatchInfo() (diewald)
+ - [security] Improve foundry and layer check before injection in
+ regex (diewald)
+
0.31.4 2014-06-16
- [feature] MatchModifyClassQuery now can extend (diewald)
- [bugfix] Make matchinfo a bit more robust (diewald)
diff --git a/pom.xml b/pom.xml
index 7502b08..7006850 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,7 +11,7 @@
-->
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.31.4</version>
+ <version>0.31.5</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 5a04288..83718d8 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -8,6 +8,7 @@
import java.nio.ByteBuffer;
import java.util.zip.GZIPInputStream;
+import java.util.regex.Pattern;
import java.io.FileInputStream;
import org.apache.lucene.search.IndexSearcher;
@@ -76,6 +77,7 @@
import de.ids_mannheim.korap.index.SearchContext;
import de.ids_mannheim.korap.index.MatchIdentifier;
import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.util.QueryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -502,25 +504,26 @@
};
- public KorapMatch getMatch (String id) {
+ public KorapMatch getMatch (String id) throws QueryException {
return this.getMatchInfo(
id, // MatchID
"tokens", // field
false, // info
- null, // foundry
- null, // layer
+ (ArrayList) null, // foundry
+ (ArrayList) null, // layer
false, // includeSpans
true, // includeHighlights
false // extendToSentence
);
};
+ // There is a good chance that some of these methods will die ...
public KorapMatch getMatchInfo (String id,
String field,
String foundry,
String layer,
boolean includeSpans,
- boolean includeHighlights) {
+ boolean includeHighlights) throws QueryException {
return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights, false);
};
@@ -530,14 +533,30 @@
String layer,
boolean includeSpans,
boolean includeHighlights,
- boolean extendToSentence) {
+ boolean extendToSentence) throws QueryException {
return this.getMatchInfo(id, field, true, foundry, layer, includeSpans, includeHighlights, extendToSentence);
};
+ public KorapMatch getMatchInfo (String id,
+ String field,
+ boolean info,
+ String foundry,
+ String layer,
+ boolean includeSpans,
+ boolean includeHighlights,
+ boolean extendToSentence) throws QueryException {
+ ArrayList<String> foundryList = new ArrayList<>(1);
+ if (foundry != null)
+ foundryList.add(foundry);
+ ArrayList<String> layerList = new ArrayList<>(1);
+ if (layer != null)
+ layerList.add(layer);
+ return this.getMatchInfo(id, field, info, foundryList, layerList, includeSpans, includeHighlights, extendToSentence);
+ };
+
/**
* Get a match.
- * BE AWARE - THIS IS STILL A PLAYGROUND!
*/
/*
KorapInfo is associated with a KorapMatch and has an array with all informations
@@ -546,22 +565,20 @@
public KorapMatch getMatchInfo (String idString,
String field,
boolean info,
- String foundry,
- String layer,
+ ArrayList<String> foundry,
+ ArrayList<String> layer,
boolean includeSpans,
boolean includeHighlights,
- boolean extendToSentence) {
+ boolean extendToSentence) throws QueryException {
KorapMatch match = new KorapMatch(idString, includeHighlights);
if (this.getVersion() != null)
match.setVersion(this.getVersion());
-
if (match.getStartPos() == -1)
return match;
-
// Create a filter based on the corpusID and the docID
BooleanQuery bool = new BooleanQuery();
bool.add(new TermQuery(new Term("ID", match.getDocID())), BooleanClause.Occur.MUST);
@@ -577,24 +594,69 @@
* are of interest.
*/
StringBuilder regex = new StringBuilder();
-
- // Todo: Only support one direction!
+ Pattern harmlessFoundry = Pattern.compile("^[-a-zA-Z0-9_]+$");
+ Pattern harmlessLayer = Pattern.compile("^[-a-zA-Z0-9_:]+$");
+ Iterator<String> iter;
+ int i = 0;
+
if (includeSpans)
- regex.append("((\">\"|\"<\"\">\"?)\":\")?");
- if (foundry != null) {
- regex.append(foundry).append('/');
- if (layer != null)
- regex.append(layer).append(":");
+ regex.append("((\">\"|\"<\"\">\")\":\")?");
+
+ // There is a foundry given
+ if (foundry != null && foundry.size() > 0) {
+
+ // Filter out bad foundries
+ for (i = foundry.size() - 1; i >= 0 ; i--) {
+ if (!harmlessFoundry.matcher(foundry.get(i)).matches()) {
+ throw new QueryException("Invalid foundry requested: " + foundry.get(i));
+ // foundry.remove(i);
+ };
+ };
+
+ // Build regex for multiple foundries
+ if (foundry.size() > 0) {
+ regex.append("(");
+ iter = foundry.iterator();
+ while (iter.hasNext()) {
+ regex.append(iter.next()).append("|");
+ };
+ regex.replace(regex.length() - 1, regex.length(), ")");
+ regex.append("\"/\"");
+
+ // There is a filter given
+ if (layer != null && layer.size() > 0) {
+
+ // Filter out bad layers
+ for (i = layer.size() - 1; i >= 0 ; i--) {
+ if (!harmlessLayer.matcher(layer.get(i)).matches()) {
+ throw new QueryException("Invalid layer requested: " + layer.get(i));
+ // layer.remove(i);
+ };
+ };
+
+ // Build regex for multiple layers
+ if (layer.size() > 0) {
+ regex.append("(");
+ iter = layer.iterator();
+ while (iter.hasNext()) {
+ regex.append(iter.next()).append("|");
+ };
+ regex.replace(regex.length() - 1, regex.length(), ")");
+ regex.append("\":\"");
+ };
+ };
+ };
}
else if (includeSpans) {
+ // No foundries - but spans
regex.append("([^-is]|[-is][^:])");
}
else {
- regex.append("([^-is<>]|([-is>][^:])|<[^:>])");
+ // No foundries - no spans
+ regex.append("([^-is<>]|[-is>][^:]|<[^:>])");
};
regex.append("(.){1,}|_[0-9]+");
-
if (DEBUG)
log.trace("The final regexString is {}", regex.toString());
RegExp regexObj = new RegExp(regex.toString(), RegExp.COMPLEMENT);
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index f7f1293..6d0a303 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -17,6 +17,8 @@
import de.ids_mannheim.korap.KorapSearch;
import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.KorapMatch;
+import de.ids_mannheim.korap.util.QueryException;
+
import de.ids_mannheim.korap.index.FieldDocument;
@@ -24,7 +26,7 @@
public class TestMatchIdentifier {
@Test
- public void identifierExample1 () throws IOException {
+ public void identifierExample1 () throws IOException, QueryException {
MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20");
assertEquals(id.getCorpusID(), "c1");
assertEquals(id.getDocID(), "d1");
@@ -104,7 +106,7 @@
};
@Test
- public void indexExample2 () throws IOException {
+ public void indexExample2 () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.commit();
@@ -207,7 +209,7 @@
@Test
- public void indexExample3 () throws IOException {
+ public void indexExample3 () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.commit();
@@ -257,7 +259,7 @@
};
@Test
- public void indexExample4 () throws IOException {
+ public void indexExample4 () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.commit();
@@ -303,7 +305,7 @@
};
@Test
- public void indexExample5Spans () throws IOException {
+ public void indexExample5Spans () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.commit();
@@ -322,7 +324,7 @@
};
@Test
- public void indexExample6Spans () throws IOException {
+ public void indexExample6Spans () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.commit();
@@ -341,7 +343,7 @@
};
@Test
- public void indexExample7Spans () throws IOException {
+ public void indexExample7Spans () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.commit();
@@ -404,7 +406,7 @@
};
@Test
- public void indexExample6Relations () throws IOException {
+ public void indexExample6Relations () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.commit();
@@ -496,7 +498,7 @@
@Test
- public void indexExample7SentenceExpansion () throws IOException {
+ public void indexExample7SentenceExpansion () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
ki.addDoc(createSimpleFieldDoc2());
@@ -552,7 +554,7 @@
};
@Test
- public void indexExample7Dependencies () throws IOException {
+ public void indexExample7Dependencies () throws IOException, QueryException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc2());
ki.commit();
@@ -610,7 +612,104 @@
"</span>",
km.getSnippetHTML());
};
+
+
+ @Test
+ public void indexExampleMultipleFoundries () throws IOException, QueryException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc4());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d4-p3-9",
+ "tokens",
+ "f",
+ "m",
+ false,
+ false);
+ assertEquals("f:m info",
+ km.getSnippetBrackets(),
+ "... [{f/m:vier:a}{f/m:fuenf:b}{f/m:sechs:c}{f/m:sieben:a}{f/m:acht:b}{f/m:neun:a}] ...");
+
+ km = ki.getMatchInfo("match-c1!d4-p3-9",
+ "tokens",
+ "f",
+ null,
+ false,
+ false);
+ assertEquals("f info",
+ km.getSnippetBrackets(),
+ "... [{f/m:vier:{f/y:four:a}}{f/m:fuenf:{f/y:five:b}}{f/m:sechs:{f/y:six:c}}{f/m:sieben:{f/y:seven:a}}{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}] ..."
+ );
+
+
+ km = ki.getMatchInfo("match-c1!d4-p3-4",
+ "tokens",
+ null,
+ null,
+ false,
+ false);
+ assertEquals("all info",
+ km.getSnippetBrackets(),
+ "... [{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}] ..."
+ );
+
+ ArrayList<String> foundryList = new ArrayList<>(2);
+ foundryList.add("f");
+ foundryList.add("x");
+
+ km = ki.getMatchInfo("match-c1!d4-p3-4",
+ "tokens",
+ true,
+ foundryList,
+ (ArrayList<String>) null,
+ false,
+ false,
+ false);
+ assertEquals("f|x info",
+ km.getSnippetBrackets(),
+ "... [{f/m:vier:{f/y:four:{x/o:viertens:a}}}] ..."
+ );
+
+ foundryList.clear();
+ foundryList.add("y");
+ foundryList.add("x");
+
+ km = ki.getMatchInfo("match-c1!d4-p3-4",
+ "tokens",
+ true,
+ foundryList,
+ (ArrayList<String>) null,
+ false,
+ false,
+ false);
+ assertEquals("y|x info",
+ km.getSnippetBrackets(),
+ "... [{x/o:viertens:a}] ..."
+ );
+
+
+ foundryList.clear();
+ foundryList.add("f");
+ foundryList.add("it");
+
+ ArrayList<String> layerList = new ArrayList<>(2);
+ layerList.add("is");
+
+ km = ki.getMatchInfo("match-c1!d4-p3-4",
+ "tokens",
+ true,
+ foundryList,
+ layerList,
+ false,
+ false,
+ false);
+ assertEquals("f|it/is",
+ km.getSnippetBrackets(),
+ "... [{it/is:4:a}] ..."
+ );
+ };
+
private FieldDocument createSimpleFieldDoc(){
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");