/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.gc.cmdline.dictionary;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.HashSet;
import opennlp.tools.cmdline.BasicCmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.postag.ExtendedPOSDictionary;
import opennlp.tools.postag.MyPOSDictionary;
import org.cogroo.formats.ad.ADFeaturizerSampleStream;
import org.cogroo.gc.cmdline.dictionary.POSDictionaryBuilderParams;
import org.cogroo.interpreters.FlorestaTagInterpreter;
import org.cogroo.interpreters.JspellTagInterpreter;
import org.cogroo.tools.featurizer.FeatureSample;

public class POSDictionaryBuilderTool
extends BasicCmdLineTool {
    public String getShortDescription() {
        return "builds a new POS Tag dictionary";
    }

    public String getHelp() {
        return this.getBasicHelp(Params.class);
    }

    public void run(String[] args) {
        Params params = (Params)this.validateAndParseParams(args, Params.class);
        File dictInFile = params.getInputFile();
        File dictOutFile = params.getOutputFile();
        File corpusFile = params.getCorpus();
        Charset encoding = params.getEncoding();
        CmdLineUtil.checkInputFile((String)"dictionary input file", (File)dictInFile);
        CmdLineUtil.checkOutputFile((String)"dictionary output file", (File)dictOutFile);
        CmdLineUtil.checkInputFile((String)"corpus input file", (File)corpusFile);
        InputStreamReader in = null;
        OutputStream out = null;
        try {
            ADFeaturizerSampleStream sentenceStream = new ADFeaturizerSampleStream(new FileInputStream(corpusFile), "ISO-8859-1", false);
            HashSet<String> knownFeats = new HashSet<String>();
            HashSet<String> knownPostags = new HashSet<String>();
            FeatureSample sample = sentenceStream.read();
            while (sample != null) {
                Collections.addAll(knownFeats, sample.getFeatures());
                Collections.addAll(knownPostags, sample.getTags());
                sample = sentenceStream.read();
            }
            in = new InputStreamReader((InputStream)new FileInputStream(dictInFile), encoding);
            out = new FileOutputStream(dictOutFile);
            ExtendedPOSDictionary dict = MyPOSDictionary.parseOneEntryPerLine(in, new JspellTagInterpreter(), new FlorestaTagInterpreter(), knownFeats, knownPostags, params.getAllowInvalidFeats());
            dict.serialize(out);
        }
        catch (IOException e) {
            throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
        }
        finally {
            try {
                in.close();
                out.close();
            }
            catch (IOException e) {}
        }
    }

    static interface Params
    extends POSDictionaryBuilderParams {
    }
}

