blob: b935db83bd0c0662186f62b55be66babb19627f6 [file] [log] [blame]
Nils Diewalde0725012014-09-25 19:32:52 +00001package de.ids_mannheim.korap;
2import java.util.*;
3import java.io.*;
4import org.apache.lucene.store.MMapDirectory;
5import de.ids_mannheim.korap.KorapIndex;
6import org.slf4j.Logger;
7import org.slf4j.LoggerFactory;
8
9public class KorapIndexer {
10 KorapIndex index;
11 String indexDir;
12 int count;
13 int commitCount;
14
15 // Init logger
16 private final static Logger log = LoggerFactory.getLogger(KorapIndexer.class);
17
18 public KorapIndexer(Properties prop) throws IOException {
19 this.indexDir = prop.getProperty("lucene.indexDir");
20
21 System.out.println("Index to " + this.indexDir);
22
23 String commitCount = prop.getProperty("lucene.index.commit.count", "1000");
24
25 this.index = new KorapIndex(new MMapDirectory(new File(indexDir)));
26 this.count = 0;
27 this.commitCount = Integer.parseInt(commitCount);
28 };
29
30
31 public void parse (File dir) {
32 for (String file : dir.list()) {
33 if (file.matches("^[^\\.].+?\\.json\\.gz$")) {
34 String found = dir.getPath() + '/' + file;
35 System.out.print(" Index " + found + " ... ");
36 if (this.index.addDocFile(found, true) == null) {
37 System.out.println("fail.");
38 continue;
39 };
40 System.out.println("done (" + count + ").");
41 this.count++;
42
43 if ((this.count % this.commitCount) == 0)
44 this.commit();
45 };
46 };
47 };
48
49
50 public void commit () {
51 System.out.println("-----");
52 System.out.print(" Commit ... ");
53 try {
54 this.index.commit();
55 }
56 catch (IOException e) {
57 System.err.println("Unable to commit to index " + this.indexDir);
58 };
59 System.out.println("done.");
60 };
61
62
63
64 public static void main (String[] argv) throws IOException {
65 Properties prop = new Properties();
66 InputStream fr = new FileInputStream(argv[0]);
67 prop.load(fr);
68 KorapIndexer ki = new KorapIndexer(prop);
69 System.out.println();
70
71 for (String arg : Arrays.copyOfRange(argv, 1, argv.length)) {
72 File f = new File(arg);
73 if (f.isDirectory())
74 ki.parse(f);
75 };
76
77
78 // Final commit
79 ki.commit();
80
81 // Finish indexing
82 System.out.println("-----");
83 System.out.println(" Indexed " + ki.count + " files.");
84 System.out.println();
85 };
86};