Fixed case sensitive tests in collection suite
Change-Id: I534f76238d082f924f0270880bdaf747fd0f4a55
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
index f94b407..3409ff5 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
@@ -27,7 +27,7 @@
.getLogger(KrillCollection.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
/**
@@ -97,13 +97,17 @@
};
*/
-
+ /**
+ * This will respect deleted documents.
+ */
public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
AtomicReader r = atomic.reader();
FixedBitSet bitset = new FixedBitSet(r.maxDoc());
DocIdSet docids = this.getDocIdSet(atomic, (Bits) r.getLiveDocs());
+
if (docids == null)
return null;
+
bitset.or(docids.iterator());
return bitset;
};
@@ -144,6 +148,7 @@
);
};
+
/**
* Search for the number of occurrences of different types,
* e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
diff --git a/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
index 7efcc60..60c4ebb 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
@@ -33,7 +33,7 @@
private final static Logger log = LoggerFactory.getLogger(KrillCollection.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
// Init operands list
private final List<GroupFilterOperand> operands = new ArrayList<>(3);
diff --git a/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java b/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
index bd5181b..bca39b5 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TextAnalyzer.java
@@ -11,7 +11,7 @@
public class TextAnalyzer extends Analyzer {
@Override
- protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+ protected TokenStreamComponents createComponents (final String fieldName, final Reader reader) {
final Tokenizer source = new StandardTokenizer(reader);
TokenStream sink = new LowerCaseFilter(source);
return new TokenStreamComponents(source, sink);
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index eacf929..5fed33b 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-# log4j.rootLogger = ERROR, stdout
+log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -21,7 +21,7 @@
# Collections:
# log4j.logger.de.ids_mannheim.korap.collection.Filter = TRACE, stdout
-log4j.logger.de.ids_mannheim.korap.KrillCollection = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.KrillCollection = TRACE, stdout
# Responses:
# log4j.logger.de.ids_mannheim.korap.server.Node = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index e764123..7cd11f0 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -5,6 +5,11 @@
import de.ids_mannheim.korap.KrillCollectionNew;
import de.ids_mannheim.korap.collection.CollectionBuilderNew;
import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.index.TextAnalyzer;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import static org.junit.Assert.*;
import org.junit.Test;
@@ -92,10 +97,10 @@
kcn.fromBuilder(cb.andGroup(cb.term("textClass", "finanzen")).with(cb.term("textClass", "kultur")));
assertEquals(0, kcn.docCount());
- kcn.fromBuilder(cb.term("text", "Mann"));
+ kcn.fromBuilder(cb.term("text", "mann"));
assertEquals(3, kcn.docCount());
- kcn.fromBuilder(cb.term("text", "Frau"));
+ kcn.fromBuilder(cb.term("text", "frau"));
assertEquals(1, kcn.docCount());
};
@@ -191,6 +196,35 @@
};
@Test
+ public void testIndexStream () throws IOException {
+ ki = new KrillIndex();
+ FieldDocument fd = ki.addDoc(createDoc1());
+ ki.commit();
+
+ Analyzer ana = new TextAnalyzer();
+ TokenStream ts = fd.doc.getField("text").tokenStream(ana, null);
+
+ CharTermAttribute charTermAttribute =
+ ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
+
+ ts.incrementToken();
+ assertEquals("der", charTermAttribute.toString());
+ ts.incrementToken();
+ assertEquals("alte", charTermAttribute.toString());
+ ts.incrementToken();
+ assertEquals("mann", charTermAttribute.toString());
+ ts.incrementToken();
+ assertEquals("ging", charTermAttribute.toString());
+ ts.incrementToken();
+ assertEquals("über", charTermAttribute.toString());
+ ts.incrementToken();
+ assertEquals("die", charTermAttribute.toString());
+ ts.incrementToken();
+ assertEquals("straße", charTermAttribute.toString());
+ };
+
+ @Test
public void testIndexWithDateRanges () throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
@@ -270,14 +304,14 @@
kcn.fromBuilder(cb.re("author", "Frank|Peter"));
assertEquals(2, kcn.docCount());
- kcn.fromBuilder(cb.term("text", "Frau"));
+ // "Frau" doesn't work!
+ kcn.fromBuilder(cb.term("text", "frau"));
assertEquals(1, kcn.docCount());
- kcn.fromBuilder(cb.re("text", "Frau"));
+ kcn.fromBuilder(cb.re("text", "frau"));
assertEquals(1, kcn.docCount());
- kcn.fromBuilder(cb.re("text", "Frau|Mann"));
- System.err.println(kcn.toString());
+ kcn.fromBuilder(cb.re("text", "frau|mann"));
assertEquals(3, kcn.docCount());
};