/*
 * Decompiled with CFR 0.152.
 */
package experimental.morfessor;

import experimental.morfessor.CharEncoder;
import java.io.IOException;
import java.io.Serializable;
import java.io.Writer;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import marmot.util.Counter;
import marmot.util.FileUtils;
import marmot.util.LineIterator;
import marmot.util.StringUtils;

public class Vocab {
    private Counter<String> counter_ = new Counter();

    public Vocab(String filename) {
        this.init(filename, -1);
        System.err.println("|Vocab| = " + this.counter_.size());
    }

    public static List<String> tokenize(String word) {
        LinkedList<String> list = new LinkedList<String>();
        StringBuilder sb = new StringBuilder(word.length());
        for (int index = 0; index < word.length(); ++index) {
            char c = word.charAt(index);
            if (Character.isDigit(c) || Character.isLetter(c)) {
                sb.append(c);
                continue;
            }
            if (sb.length() > 0) {
                list.add(sb.toString());
                sb.setLength(0);
            }
            if (Character.isWhitespace(c)) continue;
            list.add(Character.toString(c));
        }
        if (sb.length() > 0) {
            list.add(sb.toString());
        }
        return list;
    }

    private void init(String filename, int limit) {
        LineIterator iterator2 = new LineIterator(filename);
        for (int lines = 0; iterator2.hasNext() && (limit < 0 || lines < limit); ++lines) {
            Object line = iterator2.next();
            double count2 = Double.parseDouble((String)line.get(0));
            for (String word : Vocab.tokenize((String)line.get(1))) {
                if (Vocab.isSpecial(word)) continue;
                word = StringUtils.normalize(word, StringUtils.Mode.lower);
                this.counter_.increment(word, count2);
            }
        }
    }

    public void saveToAsciiFile(String filename) {
        CharEncoder encoder = CharEncoder.fromVocab(this);
        FileUtils.saveToFile((Serializable)encoder, filename + ".map");
        Counter<String> counter = new Counter<String>();
        for (Map.Entry<String, Double> entry : this.counter_.entrySet()) {
            String form = encoder.encode(entry.getKey());
            Double count2 = entry.getValue();
            counter.increment(form, count2);
        }
        try {
            Writer writer = FileUtils.openFileWriter(filename + ".ascii");
            for (Map.Entry entry : counter.entrySet()) {
                writer.write(String.format("%d %s\n", entry.getValue().intValue(), entry.getKey()));
            }
            writer.close();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public Set<Map.Entry<String, Double>> entrySet() {
        return this.counter_.entrySet();
    }

    public static void main(String[] args) {
        Vocab vocab = new Vocab(args[0]);
        vocab.saveToAsciiFile(args[1]);
    }

    public static boolean isSpecial(String word) {
        return word.length() == 1 && !Character.isDigit(word.charAt(0)) && !Character.isLetter(word.charAt(0));
    }
}

