| Nils Diewald | e072501 | 2014-09-25 19:32:52 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap; |
| 2 | import java.util.*; |
| 3 | import java.io.*; |
| 4 | import org.apache.lucene.store.MMapDirectory; |
| 5 | import de.ids_mannheim.korap.KorapIndex; |
| 6 | import org.slf4j.Logger; |
| 7 | import org.slf4j.LoggerFactory; |
| 8 | |
| 9 | public class KorapIndexer { |
| 10 | KorapIndex index; |
| 11 | String indexDir; |
| 12 | int count; |
| 13 | int commitCount; |
| 14 | |
| 15 | // Init logger |
| 16 | private final static Logger log = LoggerFactory.getLogger(KorapIndexer.class); |
| 17 | |
| 18 | public KorapIndexer(Properties prop) throws IOException { |
| 19 | this.indexDir = prop.getProperty("lucene.indexDir"); |
| 20 | |
| 21 | System.out.println("Index to " + this.indexDir); |
| 22 | |
| 23 | String commitCount = prop.getProperty("lucene.index.commit.count", "1000"); |
| 24 | |
| 25 | this.index = new KorapIndex(new MMapDirectory(new File(indexDir))); |
| 26 | this.count = 0; |
| 27 | this.commitCount = Integer.parseInt(commitCount); |
| 28 | }; |
| 29 | |
| 30 | |
| 31 | public void parse (File dir) { |
| 32 | for (String file : dir.list()) { |
| 33 | if (file.matches("^[^\\.].+?\\.json\\.gz$")) { |
| 34 | String found = dir.getPath() + '/' + file; |
| 35 | System.out.print(" Index " + found + " ... "); |
| 36 | if (this.index.addDocFile(found, true) == null) { |
| 37 | System.out.println("fail."); |
| 38 | continue; |
| 39 | }; |
| 40 | System.out.println("done (" + count + ")."); |
| 41 | this.count++; |
| 42 | |
| 43 | if ((this.count % this.commitCount) == 0) |
| 44 | this.commit(); |
| 45 | }; |
| 46 | }; |
| 47 | }; |
| 48 | |
| 49 | |
| 50 | public void commit () { |
| 51 | System.out.println("-----"); |
| 52 | System.out.print(" Commit ... "); |
| 53 | try { |
| 54 | this.index.commit(); |
| 55 | } |
| 56 | catch (IOException e) { |
| 57 | System.err.println("Unable to commit to index " + this.indexDir); |
| 58 | }; |
| 59 | System.out.println("done."); |
| 60 | }; |
| 61 | |
| 62 | |
| 63 | |
| 64 | public static void main (String[] argv) throws IOException { |
| 65 | Properties prop = new Properties(); |
| 66 | InputStream fr = new FileInputStream(argv[0]); |
| 67 | prop.load(fr); |
| 68 | KorapIndexer ki = new KorapIndexer(prop); |
| 69 | System.out.println(); |
| 70 | |
| 71 | for (String arg : Arrays.copyOfRange(argv, 1, argv.length)) { |
| 72 | File f = new File(arg); |
| 73 | if (f.isDirectory()) |
| 74 | ki.parse(f); |
| 75 | }; |
| 76 | |
| 77 | |
| 78 | // Final commit |
| 79 | ki.commit(); |
| 80 | |
| 81 | // Finish indexing |
| 82 | System.out.println("-----"); |
| 83 | System.out.println(" Indexed " + ki.count + " files."); |
| 84 | System.out.println(); |
| 85 | }; |
| 86 | }; |