/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.hmm;

import edu.nyu.jet.hmm.HMMNameTagger;
import edu.nyu.jet.hmm.InteractiveAnnotator;
import edu.nyu.jet.hmm.SentenceWithMargin;
import edu.nyu.jet.hmm.WordFeatureHMMemitter;
import edu.nyu.jet.lex.Tokenizer;
import edu.nyu.jet.scorer.SGMLScorer;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.AnnotationColor;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.DocumentCollection;
import edu.nyu.jet.tipster.ExternalDocument;
import edu.nyu.jet.tipster.Span;
import edu.nyu.jet.zoner.SentenceSplitter;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Vector;

public class ActiveLearner {
    static HMMNameTagger nt;
    static String[] tagsToRead;
    static final int initialTrainingSetSize = 50;
    static final int testSetSize = 50;
    static final boolean activeTraining = true;
    static final boolean simulatedTraining = false;
    static final boolean multithread = true;
    static final int sentencesPerSweep = 5;
    static ArrayList sentencesWithSmallestMargin;
    static ArrayList sentencesToAnnotate;
    static ArrayList documentsBeingAnnotated;
    static InteractiveAnnotator annotationThread;
    static ArrayList poolSentences;
    public static volatile boolean keepLearning;
    static int sentencesInPool;
    static DocumentCollection col;
    static PrintWriter logFile;

    public static void main(String[] args) throws IOException {
        String home = "C:/Documents and Settings/Ralph Grishman/My Documents/";
        String logFileName = home + "active.log";
        logFile = new PrintWriter(new BufferedWriter(new FileWriter(logFileName)));
        new AnnotationColor(home + "HMM");
        col = new DocumentCollection(home + "HMM/NE/ACE training Collection.txt");
        ActiveLearner.initialize();
        for (int rep = 0; rep <= 500; rep += 5) {
            ActiveLearner.learn();
            if (!keepLearning) break;
        }
        logFile.close();
    }

    static void initialize() {
        Vector<Annotation> sentences;
        ExternalDocument doc;
        int i;
        col.open();
        for (int i2 = 0; i2 < col.size(); ++i2) {
            ExternalDocument doc2 = col.get(i2);
            System.out.println("Reading " + doc2.fileName());
            doc2.setAllTags(true);
            doc2.open();
            Vector<Annotation> textSegments = doc2.annotationsOfType("TEXT");
            for (Annotation ann : textSegments) {
                Span textSpan = ann.span();
                SentenceSplitter.split(doc2, textSpan);
                Vector<Annotation> sentences2 = doc2.annotationsOfType("sentence");
                if (sentences2 == null) continue;
                for (Annotation sentence : sentences2) {
                    Span sentenceSpan = sentence.span();
                    Tokenizer.tokenize(doc2, sentenceSpan);
                }
            }
        }
        int initialTrainingSentenceCount = 0;
        for (i = 0; i < 50; ++i) {
            doc = col.get(i);
            sentences = doc.annotationsOfType("sentence");
            if (sentences == null) continue;
            for (Annotation sentence : sentences) {
                sentence.put("training", "true");
                ++initialTrainingSentenceCount;
            }
        }
        System.out.println(initialTrainingSentenceCount + " sentences in initial training set");
        for (i = 0; i < col.size(); ++i) {
            doc = col.get(i);
            Vector<Annotation> enamexList = doc.annotationsOfType("ENAMEX");
            if (enamexList == null) continue;
            for (Annotation enamex : enamexList) {
                doc.annotate("TRUENAMEX", enamex.span(), enamex.attributes());
                if (i < 50) continue;
                doc.removeAnnotation(enamex);
            }
        }
        nt = new HMMNameTagger(WordFeatureHMMemitter.class);
        nt.buildNameHMM("data/ACEnameTags.txt");
        ActiveLearner.nt.nameHMM.recordMargin();
        for (i = 0; i < col.size(); ++i) {
            doc = col.get(i);
            ActiveLearner.nt.nameHMM.newDocument();
            sentences = doc.annotationsOfType("sentence");
            if (sentences == null) continue;
            for (Annotation sentence : sentences) {
                if (sentence.get("training") == null) continue;
                Span sentenceSpan = sentence.span();
                ActiveLearner.nt.annotator.trainOnSpan(doc, sentenceSpan);
            }
        }
        ActiveLearner.nt.nameHMM.computeProbabilities();
    }

    static void learn() {
        SentenceWithMargin swm;
        int k;
        ExternalDocument doc;
        int i;
        int sentencesAnnotated = 0;
        sentencesInPool = 0;
        sentencesWithSmallestMargin = new ArrayList(5);
        double maxSmallestMargin = 0.0;
        for (int i2 = 0; i2 < col.size() && keepLearning; ++i2) {
            ExternalDocument doc2 = col.get(i2);
            if (documentsBeingAnnotated.contains(doc2)) continue;
            ActiveLearner.nt.nameHMM.newDocument();
            Vector<Annotation> sentences = doc2.annotationsOfType("sentence");
            if (sentences == null) continue;
            for (Annotation sentence : sentences) {
                if (sentence.get("training") != null) continue;
                Span sentenceSpan = sentence.span();
                ActiveLearner.nt.annotator.annotateSpan(doc2, sentenceSpan);
                double margin = ActiveLearner.nt.nameHMM.getMargin();
                if (sentencesWithSmallestMargin.size() < 5) {
                    sentencesWithSmallestMargin.add(new SentenceWithMargin(doc2, sentence, margin));
                    if (maxSmallestMargin < margin) {
                        maxSmallestMargin = margin;
                    }
                } else if (margin < maxSmallestMargin) {
                    SentenceWithMargin x = (SentenceWithMargin)Collections.max(sentencesWithSmallestMargin);
                    sentencesWithSmallestMargin.remove(x);
                    sentencesWithSmallestMargin.add(new SentenceWithMargin(doc2, sentence, margin));
                    SentenceWithMargin y = (SentenceWithMargin)Collections.max(sentencesWithSmallestMargin);
                    maxSmallestMargin = y.margin;
                }
                ++sentencesInPool;
            }
        }
        int tagsInResponses = 0;
        int tagsInKeys = 0;
        int matchingTags = 0;
        int matchingAttrs = 0;
        for (i = col.size() - 50; i < col.size(); ++i) {
            doc = col.get(i);
            SGMLScorer scorer = new SGMLScorer(doc, doc);
            scorer.match("TRUENAMEX", "ENAMEX");
            tagsInResponses += scorer.totalTagsInDoc1;
            tagsInKeys += scorer.totalTagsInDoc2;
            matchingTags += scorer.totalMatchingTags;
            matchingAttrs += scorer.totalMatchingAttrs;
        }
        System.out.println("Overall Type Recall:          " + (float)matchingTags / (float)tagsInKeys);
        System.out.println("Overall Type Precision:       " + (float)matchingTags / (float)tagsInResponses);
        System.out.println("Overall Attribute Recall:     " + (float)matchingAttrs / (float)tagsInKeys);
        System.out.println("Overall Attribute Precision:  " + (float)matchingAttrs / (float)tagsInResponses);
        if (logFile != null) {
            logFile.println(sentencesAnnotated + ", " + (float)matchingAttrs / (float)tagsInKeys + ", " + (float)matchingAttrs / (float)tagsInResponses);
        }
        for (i = 0; i < col.size(); ++i) {
            doc = col.get(i);
            Vector<Annotation> sentences = doc.annotationsOfType("sentence");
            if (sentences == null) continue;
            for (Annotation sentence : sentences) {
                if (sentence.get("training") != null) continue;
                Span sentenceSpan = sentence.span();
                ActiveLearner.eraseAnnotationsInside(doc, "ENAMEX", sentenceSpan);
            }
        }
        if (annotationThread != null) {
            try {
                if (annotationThread.isAlive()) {
                    System.out.println("Waiting for annotation thread.");
                }
                annotationThread.join();
                System.out.println("Annotation thread finished.");
            }
            catch (InterruptedException e) {
                System.out.println(e);
            }
            for (k = 0; k < sentencesToAnnotate.size(); ++k) {
                swm = (SentenceWithMargin)sentencesToAnnotate.get(k);
                ActiveLearner.nt.annotator.trainOnSpan(swm.document, swm.sentence.span());
                ++sentencesAnnotated;
            }
            ActiveLearner.nt.nameHMM.computeProbabilities();
        }
        if (!keepLearning) {
            return;
        }
        sentencesToAnnotate = new ArrayList(sentencesWithSmallestMargin);
        documentsBeingAnnotated = new ArrayList();
        for (k = 0; k < sentencesToAnnotate.size(); ++k) {
            swm = (SentenceWithMargin)sentencesToAnnotate.get(k);
            documentsBeingAnnotated.add(swm.document);
        }
        annotationThread = new InteractiveAnnotator(sentencesToAnnotate);
        annotationThread.setPriority(6);
        annotationThread.start();
        System.out.println("*** initiated annotation Thread ***");
    }

    private static void eraseAnnotationsInside(Document doc, String type, Span span) {
        Vector v = doc.annotationsOfType(type);
        if (v == null) {
            return;
        }
        v = (Vector)v.clone();
        for (int i = 0; i < v.size(); ++i) {
            Annotation a = (Annotation)v.get(i);
            if (!a.span().within(span)) continue;
            doc.removeAnnotation(a);
        }
    }

    private static void addToTraining(Document doc, Annotation sentence) {
        Span span = sentence.span();
        System.out.println("Now annotating:");
        System.out.println(doc.text(sentence));
        int start = span.start();
        int end = span.end();
        for (int i = start; i < end; ++i) {
            Vector<Annotation> enamexList = doc.annotationsAt(i, "TRUENAMEX");
            if (enamexList == null) continue;
            for (Annotation enamex : enamexList) {
                doc.annotate("ENAMEX", enamex.span(), enamex.attributes());
            }
        }
        sentence.put("training", "true");
        ActiveLearner.nt.annotator.trainOnSpan(doc, span);
    }

    static {
        tagsToRead = new String[]{"ENAMEX", "TIMEX", "NUMEX"};
        documentsBeingAnnotated = new ArrayList();
        annotationThread = null;
        keepLearning = true;
        sentencesInPool = 0;
        logFile = null;
    }
}

