/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.chunk;

import edu.nyu.jet.JetTest;
import edu.nyu.jet.aceJet.Ace;
import edu.nyu.jet.chunk.MaxEntNE;
import edu.nyu.jet.chunk.Onoma;
import edu.nyu.jet.hmm.HMMNameTagger;
import edu.nyu.jet.hmm.HMMannotator;
import edu.nyu.jet.lex.Lexicon;
import edu.nyu.jet.lex.Tokenizer;
import edu.nyu.jet.scorer.NameTagger;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.DocumentCollection;
import edu.nyu.jet.tipster.ExternalDocument;
import edu.nyu.jet.tipster.Span;
import edu.nyu.jet.zoner.SentenceSplitter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.Vector;

public class MENameTagger
implements NameTagger {
    public MaxEntNE mene = new MaxEntNE();
    HMMannotator annotator = new HMMannotator(this.mene);

    public MENameTagger() {
        this.annotator.setBItag(true);
        this.annotator.setAnnotateEachToken(false);
    }

    public void initialize(String tagTableFile, String featureFile) {
        this.mene.resetForTraining(featureFile);
        this.annotator.readTagTable(tagTableFile);
    }

    public void train(String trainingCollection) throws IOException {
        DocumentCollection trainCol = new DocumentCollection(trainingCollection);
        trainCol.open();
        for (int i = 0; i < trainCol.size(); ++i) {
            ExternalDocument doc = trainCol.get(i);
            this.train(doc);
        }
    }

    public void train(ExternalDocument doc) {
        doc.setAllTags(true);
        doc.open();
        doc.stretchAll();
        System.out.println("Training from " + doc.fileName());
        this.mene.newDocument();
        Vector<Annotation> textSegments = doc.annotationsOfType("TEXT");
        if (textSegments == null) {
            return;
        }
        for (Annotation ann : textSegments) {
            Span textSpan = ann.span();
            Ace.monocase = Ace.allLowerCase(doc);
            SentenceSplitter.split(doc, textSpan);
        }
        Vector<Annotation> sentences = doc.annotationsOfType("sentence");
        if (sentences == null) {
            return;
        }
        for (Annotation sentence : sentences) {
            Span sentenceSpan = sentence.span();
            Tokenizer.tokenize(doc, sentenceSpan);
            Lexicon.annotateWithDefinitions(doc, sentenceSpan.start(), sentenceSpan.end());
            this.annotator.trainOnSpan(doc, sentenceSpan);
        }
        doc.clearAnnotations();
    }

    public void train(String directory, String fileList) throws IOException {
        String currentDoc;
        BufferedReader reader = new BufferedReader(new FileReader(fileList));
        int docCount = 0;
        while ((currentDoc = reader.readLine()) != null) {
            System.out.println("\nTraining from document " + ++docCount + ": " + currentDoc);
            ExternalDocument doc = new ExternalDocument("sgml", directory, currentDoc);
            this.train(doc);
        }
    }

    public void store(String fileName) throws IOException {
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(fileName), JetTest.encoding));
        this.annotator.writeTagTable(out);
        out.write("endtags");
        out.newLine();
        this.mene.store(out);
    }

    public void load(String fileName) throws IOException {
        BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(fileName), JetTest.encoding));
        this.annotator.readTagTable(in);
        this.mene.load(in);
    }

    public void tagDocument(Document doc) {
        this.mene.newDocument();
        Vector<Annotation> textSegments = doc.annotationsOfType("TEXT");
        for (Annotation ann : textSegments) {
            Span textSpan = ann.span();
            SentenceSplitter.split(doc, textSpan);
        }
        Vector<Annotation> sentences = doc.annotationsOfType("sentence");
        for (Annotation sentence : sentences) {
            Span sentenceSpan = sentence.span();
            Tokenizer.tokenize(doc, sentenceSpan);
            Lexicon.annotateWithDefinitions(doc, sentenceSpan.start(), sentenceSpan.end());
            this.tag(doc, sentenceSpan);
        }
    }

    public void newDocument() {
        this.mene.newDocument();
    }

    public void tag(Document doc, Span span) {
        if (HMMNameTagger.inZone(doc, span, "POSTER") || HMMNameTagger.inZone(doc, span, "SPEAKER")) {
            HMMNameTagger.tagPersonZone(doc, span, this.annotator);
        } else {
            this.annotator.annotateSpan(doc, span);
            Onoma.tagDrugs(doc, span);
        }
    }

    public static void main(String[] args) throws IOException {
        if (args.length < 6 || args.length % 2 == 1) {
            System.err.println("MENameTagger requires 4 + 2n arguments for n training corpora:");
            System.err.println("  state-file feature-file model-file props-file directory1 filelist1 [directory2 filelist2] ...");
            System.exit(1);
        }
        String stateFile = args[0];
        String featureFile = args[1];
        String modelFile = args[2];
        String configFile = args[3];
        JetTest.initializeFromConfig(configFile);
        MENameTagger nt = new MENameTagger();
        nt.initialize(stateFile, featureFile);
        for (int pass = 1; pass <= 2; ++pass) {
            MaxEntNE.pass = pass;
            MaxEntNE.trainingDocCount = 0;
            for (int iarg = 4; iarg < args.length; iarg += 2) {
                String directory = args[iarg];
                String fileList = args[iarg + 1];
                nt.train(directory, fileList);
            }
        }
        nt.mene.createModel();
        nt.store(modelFile);
    }
}

