/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.hmm;

import edu.nyu.jet.Console;
import edu.nyu.jet.chunk.TokenClassifier;
import edu.nyu.jet.lex.Tokenizer;
import edu.nyu.jet.lisp.FeatureSet;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.DocumentCollection;
import edu.nyu.jet.tipster.ExternalDocument;
import edu.nyu.jet.tipster.Span;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Vector;

public class HMMannotator {
    TokenClassifier hmm;
    String[][] tagTable;
    boolean BItag;
    boolean annotateEachToken;
    String zoneToTag;
    boolean trace;
    boolean recordMargin = false;
    boolean recordProbability = false;
    final int LONGEST_ANNOTATION_SPAN = 10;

    public HMMannotator(TokenClassifier h) {
        this.hmm = h;
        this.tagTable = new String[0][4];
        this.BItag = false;
        this.annotateEachToken = true;
        this.zoneToTag = "S";
        this.trace = false;
    }

    public void setTagTable(String[][] table) {
        this.tagTable = table;
    }

    public void readTagTable(String tagFileName) {
        try {
            BufferedReader in = new BufferedReader(new FileReader(tagFileName));
            this.readTagTable(in);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void readTagTable(BufferedReader in) {
        try {
            String line;
            ArrayList<String[]> tagTableList = new ArrayList<String[]>();
            while ((line = in.readLine()) != null) {
                String hmmTag;
                String[] tags = line.split("\\s+");
                String[] tagTableEntry = new String[4];
                if (tags.length == 2) {
                    tagTableEntry[0] = tags[0].intern();
                    tagTableEntry[3] = hmmTag = tags[1].intern();
                } else if (tags.length == 4) {
                    tagTableEntry[0] = tags[0].intern();
                    tagTableEntry[1] = tags[1].intern();
                    tagTableEntry[2] = tags[2].intern();
                    tagTableEntry[3] = hmmTag = tags[3].intern();
                } else {
                    if (tags.length == 1 && tags[0].equals("endtags")) break;
                    System.out.println("*** Invalid entry in tag table file: " + line);
                    continue;
                }
                tagTableList.add(tagTableEntry);
            }
            this.tagTable = (String[][])tagTableList.toArray((T[])new String[0][0]);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void writeTagTable(BufferedWriter pw) {
        try {
            for (int i = 0; i < this.tagTable.length; ++i) {
                if (this.tagTable[i][1] == null) {
                    pw.write(this.tagTable[i][0] + " " + this.tagTable[i][3]);
                } else {
                    pw.write(this.tagTable[i][0] + " " + this.tagTable[i][1] + " " + this.tagTable[i][2] + " " + this.tagTable[i][3]);
                }
                pw.newLine();
            }
        }
        catch (IOException e) {
            System.out.println("Error in HMMannotator.writeTagTable: " + e);
            System.exit(0);
        }
    }

    public String[][] getTagTable() {
        return this.tagTable;
    }

    public void setBItag(boolean flag) {
        this.BItag = flag;
    }

    public void setAnnotateEachToken(boolean flag) {
        this.annotateEachToken = flag;
    }

    public void setZoneToTag(String zone) {
        this.zoneToTag = zone;
    }

    public void setTrace(boolean trace) {
        this.trace = trace;
    }

    public void setRecordMargin(boolean recordMargin) {
        this.recordMargin = recordMargin;
    }

    public void setRecordProb(boolean recordProbability) {
        this.recordProbability = recordProbability;
    }

    private String tagForAnnotation(Annotation ann) {
        for (int i = 0; i < this.tagTable.length; ++i) {
            String[] tagEntry = this.tagTable[i];
            if (!ann.type().equals(tagEntry[0]) || tagEntry[1] != null && !ann.get(tagEntry[1]).equals(tagEntry[2])) continue;
            return tagEntry[3];
        }
        return null;
    }

    private void annotateForTag(Document doc, String tag, Annotation[] tokens, int first, int last) {
        if (last - first + 1 > 10) {
            return;
        }
        int start = tokens[first].start();
        int end = tokens[last].end();
        Span span = new Span(start, end);
        for (int i = 0; i < this.tagTable.length; ++i) {
            String[] tagEntry = this.tagTable[i];
            if (!tag.equals(tagEntry[3])) continue;
            double margin = 0.0;
            if (this.recordMargin) {
                margin = this.hmm.getLocalMargin(doc, tokens, tag, first, last);
            }
            FeatureSet fs = tagEntry[1] == null ? new FeatureSet() : new FeatureSet(tagEntry[1], tagEntry[2]);
            Annotation ann = new Annotation(tagEntry[0], span, fs);
            doc.addAnnotation(ann);
            if (this.recordMargin) {
                ann.put("margin", new Integer((int)margin));
            }
            if (this.trace) {
                Console.println("Annotate " + doc.normalizedText(span) + " == " + ann.toSGMLString());
            }
            return;
        }
    }

    public void train(Document doc) {
        Vector<Annotation> textSegments = doc.annotationsOfType(this.zoneToTag);
        if (textSegments == null) {
            System.out.println("HMMAnnotate.train:  no " + this.zoneToTag + " annotations in document.");
            return;
        }
        for (Annotation para : textSegments) {
            Span textSpan = para.span();
            Tokenizer.tokenize(doc, textSpan);
            this.trainOnSpan(doc, textSpan);
        }
    }

    public void trainOnSpan(Document doc, Span textSpan) {
        Annotation token;
        String continuationTag = "other";
        int markupEnd = 0;
        ArrayList<Annotation> tokens = new ArrayList<Annotation>();
        ArrayList<String> tags = new ArrayList<String>();
        int posn = textSpan.start();
        int end = textSpan.end();
        posn = Tokenizer.skipWSX(doc, posn, end);
        continuationTag = "other";
        while (posn < end && (token = doc.tokenAt(posn)) != null) {
            String tokenTag;
            Vector<Annotation> anns = doc.annotationsAt(posn);
            String tag = null;
            Annotation ann = null;
            for (int i = 0; i < anns.size() && (tag = this.tagForAnnotation(ann = anns.get(i))) == null; ++i) {
            }
            if (tag != null) {
                if (markupEnd == 0) {
                    if (this.BItag) {
                        tokenTag = ("B-" + tag).intern();
                        continuationTag = ("I-" + tag).intern();
                    } else {
                        tokenTag = tag;
                        continuationTag = tag;
                    }
                    markupEnd = ann.span().end();
                } else {
                    System.out.println("Nested tag " + tag + " ignored.");
                    System.out.println("(tag from annotation " + ann + ")");
                    tokenTag = continuationTag;
                }
            } else {
                tokenTag = continuationTag;
            }
            tokens.add(token);
            tags.add(tokenTag);
            posn = token.span().end();
            if (markupEnd != 0 && posn > markupEnd) {
                System.out.println("Annotation does not end at token boundary");
                System.out.println("(annotation ends at " + markupEnd + ", token boundary is " + posn);
            }
            if (posn < markupEnd) continue;
            markupEnd = 0;
            continuationTag = "other";
        }
        if (markupEnd != 0) {
            System.out.println("Annotation extends past text [sentence] boundary");
            System.out.println("(annotation ends at " + markupEnd + ")");
        }
        int size = tokens.size();
        Annotation[] tokenArray = tokens.toArray(new Annotation[size]);
        String[] tagArray = tags.toArray(new String[size]);
        this.hmm.train(doc, tokenArray, tagArray);
    }

    public void train(DocumentCollection col) {
        col.open();
        for (int i = 0; i < col.size(); ++i) {
            ExternalDocument doc = col.get(i);
            doc.open();
            System.out.println("Training from " + doc.fileName());
            this.train(doc);
        }
    }

    public void annotate(Document doc) {
        Vector<Annotation> textSegments = doc.annotationsOfType(this.zoneToTag);
        if (textSegments == null) {
            System.out.println("HMMAnnotate.annotate:  no " + this.zoneToTag + " annotations in document.");
        } else {
            for (Annotation para : textSegments) {
                Span textSpan = para.span();
                this.annotateSpan(doc, textSpan);
            }
        }
    }

    public void annotateNbest(Document doc, int n, String hypId) {
        Vector<Annotation> textSegments = doc.annotationsOfType(this.zoneToTag);
        if (textSegments == null) {
            System.out.println("HMMAnnotate.annotate:  no " + this.zoneToTag + " annotations in document.");
        } else {
            for (Annotation para : textSegments) {
                Span textSpan = para.span();
                this.annotateSpanNbest(doc, textSpan, n, hypId);
            }
        }
    }

    public void annotateSpan(Document doc, Span textSpan) {
        Annotation[] tokens = Tokenizer.gatherTokens(doc, textSpan);
        if (tokens.length == 0) {
            return;
        }
        String[] tags = this.hmm.viterbi(doc, tokens);
        if (tags == null) {
            return;
        }
        this.tagsToAnnotations(doc, tokens, tags);
    }

    public ArrayList annotateSpanNbest(Document doc, Span textSpan, int n, String hypId) {
        ArrayList<String> hypotheses = new ArrayList<String>();
        Annotation[] tokens = Tokenizer.gatherTokens(doc, textSpan);
        if (tokens.length == 0) {
            return hypotheses;
        }
        String[] tags = this.hmm.viterbi(doc, tokens);
        if (tags == null) {
            return hypotheses;
        }
        String hypothesis = hypId + "-0";
        doc.setCurrentHypothesis(hypothesis);
        this.tagsToAnnotations(doc, tokens, tags);
        hypotheses.add(hypothesis);
        if (this.recordProbability) {
            doc.annotate("HMMtags", textSpan, new FeatureSet("prob", new Integer((int)this.hmm.getPathProbability())));
        }
        for (int i = 1; i < n && (tags = this.hmm.nextBest()) != null; ++i) {
            hypothesis = hypId + "-" + i;
            doc.setCurrentHypothesis(hypothesis);
            this.tagsToAnnotations(doc, tokens, tags);
            hypotheses.add(hypothesis);
            if (!this.recordProbability) continue;
            doc.annotate("HMMtags", textSpan, new FeatureSet("prob", new Integer((int)this.hmm.getPathProbability())));
        }
        doc.setCurrentHypothesis(null);
        return hypotheses;
    }

    private void tagsToAnnotations(Document doc, Annotation[] tokens, String[] tags) {
        if (this.BItag) {
            int start = -1;
            String xtag = "";
            for (int i = 0; i < tokens.length; ++i) {
                Annotation tokenAnn = tokens[i];
                String tag = tags[i];
                if (tag.length() > 2 && !tag.substring(0, 2).equals("I-") && start >= 0) {
                    this.annotateForTag(doc, xtag, tokens, start, i - 1);
                    start = -1;
                }
                if (tag.length() <= 2 || !tag.substring(0, 2).equals("B-")) continue;
                start = i;
                xtag = tag.substring(2);
            }
        } else if (this.annotateEachToken) {
            for (int i = 0; i < tokens.length; ++i) {
                this.annotateForTag(doc, tags[i], tokens, i, i);
            }
        } else {
            Annotation tokenAnn = tokens[0];
            int first = 0;
            String tag = tags[0];
            for (int i = 1; i < tokens.length; ++i) {
                tokenAnn = tokens[i];
                if (tags[i].equals(tag)) continue;
                this.annotateForTag(doc, tag, tokens, first, i - 1);
                tag = tags[i];
                first = i;
            }
            this.annotateForTag(doc, tag, tokens, first, tokens.length - 1);
        }
    }
}

