Improve readme and synopsis
Change-Id: I277ea470f0ac2aa505260c7f0db0f45f9b4b54c7
diff --git a/LICENSE b/LICENSE
new file mode 100755
index 0000000..f1083af
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2015, IDS Mannheim
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
\ No newline at end of file
diff --git a/README.md b/README.md
index a4e8a5f..dd9780c 100644
--- a/README.md
+++ b/README.md
@@ -3,18 +3,104 @@
The Krawfish Prototype is a testbed search backend for KorAP,
to implement design concepts both for Kanalito and Krill.
-Krawfish focusses on
+Krawfish Prototype focusses on
- Patterns for distribution
(data aggregation, sorting, grouping ...)
- Implementation of a forward index
- Normalization and optimization of complex queries
- Implementation of experimental features
+Krawfish Prototype is still work in progress.
The segment handling of Krawfish is based on Krill and therefore
heavily inspired by Lucene and Lucy.
+**! This software is in its early stages and not stable yet! Use it on your own risk!**
+
+## SETUP
+
+Krawfish Prototype requires Perl of at least v5.10.1.
+The recommended environment is based on [Perlbrew](http://perlbrew.pl/)
+with [App::cpanminus](http://search.cpan.org/~miyagawa/App-cpanminus/).
+
+```
+$ git clone https://github.com/KorAP/Krawfish-Prototype
+$ cd Krawfish-Prototype
+$ cpanm --installdeps .
+$ perl Makefile.PL
+$ make test
+```
+
+## SYNOPSIS
+
+```
+use Krawfish::Koral;
+use Krawfish::Index;
+
+# Add documents to index
+my $index = Krawfish::Index->new;
+$index->introduce_field('docID' => 'de_DE');
+$index->add_doc('t/data/doc1.jsonld');
+$index->add_doc('t/data/doc2.jsonld');
+$index->commit;
+
+# Start KoralQuery object
+my $koral = Krawfish::Koral->new;
+
+# Define a query
+# [einen|"d.*"][][Hut]
+my $query = $koral->query_builder;
+$koral->query(
+ $query->seq(
+ $query->token(
+ $query->bool_or(
+ 'einen',
+ $query->term_re('d.*')
+ )
+ ),
+ $query->anywhere,
+ $query->term('Hut')
+ )
+);
+
+# Define a virtual corpus
+my $corpus = $koral->corpus_builder;
+$koral->corpus(
+ $corpus->bool_and(
+ $corpus->string('license=free'),
+ $corpus->string('corpus=corpus-2')
+ )
+);
+
+# Define a compilation target
+my $compile = $koral->compile_builder;
+$koral->compile(
+ $compile->aggregate(
+ $compile->a_fields('license'),
+ $compile->a_frequencies
+ ),
+ $compile->enrich(
+ $compile->e_fields('textLength')
+ ),
+ $compile->sort_by(
+ $compile->s_field('docID')
+ )
+);
+
+# Execute KoralQuery
+my $request = $koral->to_query
+ ->identify($index->dict)
+ ->optimize($index->segment);
+
+if ($request->next) {
+ print $request->current_match->to_string;
+};
+
+```
+
+## COPYRIGHT AND LICENSE
+
Copyright (C) 2017, [IDS Mannheim](http://www.ids-mannheim.de/)<br>
Author: [Nils Diewald](http://nils-diewald.de/)
Krawfish Prototype is free software published under the
-[BSD-2 License](https://raw.githubusercontent.com/KorAP/Kalamar/master/LICENSE).
\ No newline at end of file
+[BSD-2 License](https://raw.githubusercontent.com/KorAP/Krawfish-Prototype/master/LICENSE).
\ No newline at end of file
diff --git a/lib/Krawfish/Compile.pm b/lib/Krawfish/Compile.pm
index 705b46b..fab80a6 100644
--- a/lib/Krawfish/Compile.pm
+++ b/lib/Krawfish/Compile.pm
@@ -68,4 +68,8 @@
};
+sub max_freq {
+ $_[0]->{query}->max_freq;
+};
+
1;
diff --git a/lib/Krawfish/Compile/Segment/Sort.pm b/lib/Krawfish/Compile/Segment/Sort.pm
index 3c69732..825399a 100644
--- a/lib/Krawfish/Compile/Segment/Sort.pm
+++ b/lib/Krawfish/Compile/Segment/Sort.pm
@@ -319,24 +319,41 @@
# Get the current match object
sub current_match {
- # my $self = shift;
- # my $current = $self->current or return;
- # my $match = Krawfish::Koral::Result::Match->new(
- # doc_id => $current->doc_id,
- # start => $current->start,
- # end => $current->end,
- # payload => $current->payload,
- # );
- #
- # if (DEBUG) {
- # print_log('sort', 'Current match is ' . $match->to_string);
- # };
- #
- # return $match;
- ...
+ my $self = shift;
+ my $match = $self->match_from_query or return;
+
+ if (DEBUG) {
+ print_log('sort', 'Current match is ' . $match->to_string);
+ };
+
+ return $match;
};
+# Get match from query
+sub match_from_query {
+ my $self = shift;
+
+ print_log('sort', 'Get match from query') if DEBUG;
+
+ # Get current match from query
+ my $current = $self->current or return;
+
+ if (DEBUG) {
+ print_log('sort', 'Current posting is '. $self->{query}->to_string);
+ };
+
+ # Create new match
+ return Krawfish::Koral::Result::Match->new(
+ doc_id => $current->doc_id,
+ start => $current->start,
+ end => $current->end,
+ payload => $current->payload->clone
+ );
+};
+
+
+
# Return the current posting
sub current {
my $self = shift;
diff --git a/lib/Krawfish/Compile/Segment/Sort/No.pm b/lib/Krawfish/Compile/Segment/Sort/No.pm
index 61f3b90..0f930a8 100644
--- a/lib/Krawfish/Compile/Segment/Sort/No.pm
+++ b/lib/Krawfish/Compile/Segment/Sort/No.pm
@@ -23,6 +23,10 @@
};
+sub type {
+ 'no';
+};
+
sub criterion {
$_[0]->{field};
};
diff --git a/lib/Krawfish/Index.pm b/lib/Krawfish/Index.pm
index 272dbb9..dec7932 100644
--- a/lib/Krawfish/Index.pm
+++ b/lib/Krawfish/Index.pm
@@ -190,6 +190,23 @@
};
+# Add to index
+sub add_doc {
+ my ($self, $file) = @_;
+
+ # Get Koral document
+ my $kq = Krawfish::Koral::Document->new($file) or return;
+
+ # Identify document
+ $kq = $kq->identify($self->dict) or return;
+
+ # Add to segment
+ $self->segment->add($kq) or return;
+
+ return $self;
+};
+
+
# Commit all pending data
sub commit {
my $self = shift;
diff --git a/t/koral/synopsis.t b/t/koral/synopsis.t
new file mode 100644
index 0000000..99f376b
--- /dev/null
+++ b/t/koral/synopsis.t
@@ -0,0 +1,75 @@
+use Test::More;
+use strict;
+use warnings;
+
+use Krawfish::Koral;
+use Krawfish::Index;
+
+# Add documents to index
+my $index = Krawfish::Index->new;
+$index->introduce_field('docID' => 'de_DE');
+$index->add_doc('t/data/doc1.jsonld');
+$index->add_doc('t/data/doc2.jsonld');
+$index->commit;
+
+# Start KoralQuery object
+my $koral = Krawfish::Koral->new;
+
+# Define a query
+# [einen|"d.*"][][Hut]
+my $query = $koral->query_builder;
+$koral->query(
+ $query->seq(
+ $query->token(
+ $query->bool_or(
+ 'einen',
+ $query->term_re('d.*')
+ )
+ ),
+ $query->anywhere,
+ $query->term('Hut')
+ )
+);
+
+# Define a virtual corpus
+my $corpus = $koral->corpus_builder;
+$koral->corpus(
+ $corpus->bool_and(
+ $corpus->string('license=free'),
+ $corpus->string('corpus=corpus-2')
+ )
+);
+
+# Define a compilation target
+my $compile = $koral->compile_builder;
+$koral->compile(
+ $compile->aggregate(
+ $compile->a_fields('license'),
+ $compile->a_frequencies
+ ),
+ $compile->enrich(
+ $compile->e_fields('textLength')
+ ),
+ $compile->sort_by(
+ $compile->s_field('docID')
+ )
+);
+
+my $request = $koral->to_query
+ ->identify($index->dict)
+ ->optimize($index->segment);
+
+# TODO:
+# Serialize ->to_koral_query, that will
+
+my $str = '';
+if ($request->next) {
+ $str .= $request->current_match->to_string;
+};
+
+# warn $request->collection->to_string;
+
+is($str, '[0:9-12]');
+
+done_testing;
+__END__