/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.chunk;

import edu.nyu.jet.MaxEntModel;
import edu.nyu.jet.aceJet.Ace;
import edu.nyu.jet.aceJet.Datum;
import edu.nyu.jet.chunk.NameConstraints;
import edu.nyu.jet.chunk.TokenClassifier;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Vector;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class MaxEntNE
extends TokenClassifier {
    MaxEntModel model;
    String[] state;
    Map<String, String> cache = new HashMap<String, String>();
    public static int pass = 0;
    Map<String, String> wordType = new HashMap<String, String>();
    Map<String, String> wordTypeEvens = new HashMap<String, String>();
    Map<String, String> wordTypeOdds = new HashMap<String, String>();
    static int trainingDocCount = 0;
    boolean useOnoma = true;
    public static double otherOffset = 0.0;
    String[] onomaType;
    static Map<String, String> word2cluster = new HashMap<String, String>();
    static boolean useWordClusters = false;

    public MaxEntNE() {
        this.model = new MaxEntModel();
        this.model.setIterations(80);
    }

    public void resetForTraining(String featureFile) {
        this.model.initializeForTraining(featureFile);
        trainingDocCount = 0;
    }

    @Override
    public void newDocument() {
        this.cache.clear();
        ++trainingDocCount;
    }

    @Override
    public void train(Document doc, Annotation[] tokens, String[] tags) {
        int nTokens = tokens.length;
        String[] words = new String[nTokens];
        if (this.useOnoma) {
            this.onomaType = new String[nTokens];
        }
        for (int iToken = 0; iToken < nTokens; ++iToken) {
            words[iToken] = doc.text(tokens[iToken]).trim();
            if (!this.useOnoma) continue;
            this.onomaType[iToken] = this.onomaFeature(doc, tokens[iToken]);
        }
        String priorTag = "other";
        for (int iToken = 0; iToken < nTokens; ++iToken) {
            if (pass == 1) {
                this.recordWord(words[iToken], tags[iToken]);
                continue;
            }
            Datum d = this.NEfeatures(iToken, words, tokens, priorTag, doc);
            d.setOutcome(tags[iToken]);
            this.model.addEvent(d);
            priorTag = tags[iToken];
            this.addToCache(words[iToken], tags[iToken]);
        }
    }

    private void recordWord(String word, String tag) {
        String token = word.toLowerCase();
        if (tag.charAt(1) == '-') {
            tag = tag.substring(2);
        }
        char tagChar = tag.charAt(0);
        if (tag.equals("other")) {
            tagChar = 'x';
        }
        this.recordWord(token, tagChar, this.wordType);
        if (trainingDocCount % 2 == 1) {
            this.recordWord(token, tagChar, this.wordTypeOdds);
        } else {
            this.recordWord(token, tagChar, this.wordTypeEvens);
        }
        if (!Ace.monocase && tagChar == 'p' && Character.isLowerCase(word.charAt(0))) {
            System.out.println("Lower case person token " + token);
        }
    }

    private void recordWord(String token, char tagChar, Map<String, String> typeTable) {
        String type = typeTable.get(token);
        if (type == null) {
            typeTable.put(token, "" + tagChar);
        } else if (type.indexOf(tagChar) < 0) {
            type = type + tagChar;
            typeTable.put(token, type);
        }
    }

    private Datum NEfeatures(int i, String[] words, Annotation[] tokens, String priorTag, Document doc) {
        Datum d = new Datum();
        String prior1 = i > 0 ? words[i - 1].toLowerCase() : "^";
        String prior2 = i > 1 ? words[i - 2].toLowerCase() : "^";
        String current = words[i].toLowerCase();
        String next = i >= words.length - 1 ? "$" : words[i + 1].toLowerCase();
        String next2 = i >= words.length - 2 ? "$" : words[i + 2].toLowerCase();
        d.addFV("p", prior1 + ":" + priorTag);
        d.addFV("c", current + ":" + priorTag);
        d.addFV("n", next + ":" + priorTag);
        d.addFV("n2", next2 + ":" + priorTag);
        String cf = MaxEntNE.wordFeature(words[i], tokens[i].get("case") == "forcedCap");
        String pf = "^";
        String nf = "$";
        if (i > 0) {
            pf = MaxEntNE.wordFeature(words[i - 1], tokens[i - 1].get("case") == "forcedCap");
        }
        if (i < words.length - 1) {
            nf = MaxEntNE.wordFeature(words[i + 1], tokens[i + 1].get("case") == "forcedCap");
        }
        d.addFV("pcnf", pf + cf + nf);
        d.addFV(Ace.monocase ? "cfmono" : "cf", MaxEntNE.wordFeature(words[i], tokens[i].get("case") == "forcedCap") + ":" + priorTag);
        d.addFV("pt", priorTag);
        String cacheValue = this.cache.get(words[i]);
        if (cacheValue == null) {
            cacheValue = "";
        }
        d.addFV("ca", cacheValue + ":" + priorTag);
        d.addFV("pc", prior1 + ":" + words[i] + priorTag);
        d.addFV("p2", prior2 + ":" + prior1 + priorTag);
        if (i > 0) {
            d.addFV(Ace.monocase ? "pfmono" : "pf", MaxEntNE.wordFeature(words[i - 1], tokens[i - 1].get("case") == "forcedCap") + ":" + priorTag);
        } else {
            d.addFV("pf", "^:" + priorTag);
        }
        if (i < words.length - 1) {
            d.addFV(Ace.monocase ? "nfmono" : "nf", MaxEntNE.wordFeature(words[i + 1], tokens[i + 1].get("case") == "forcedCap") + ":" + priorTag);
        } else {
            d.addFV("nf", "$:" + priorTag);
        }
        d.addFV("tt", this.typeFeature(words[i]) + ":" + priorTag);
        d.addFV("w", words[i] + ":" + priorTag);
        if (this.useOnoma) {
            if (this.onomaType[i] != null) {
                d.addFV("onoma", this.onomaType[i]);
            }
            if (words[i].length() > 0 && Character.isUpperCase(words[i].charAt(0)) && i < words.length - 2) {
                int j;
                for (j = i + 1; words[j].length() > 0 && Character.isUpperCase(words[j].charAt(0)) && j < words.length - 2; ++j) {
                }
                String onomaType2 = this.onomaType[j + 1];
                if (words[j].equals(",") && (onomaType2 == "country" || onomaType2 == "usstate")) {
                    d.addF("cityContext");
                } else if (words[j].equals("-") && words[j + 1].equals("based")) {
                    d.addF("-basedContext");
                }
            }
        }
        if (useWordClusters) {
            prior1 = i > 0 ? words[i - 1] : "^";
            current = words[i];
            next = i >= words.length - 1 ? "$" : words[i + 1];
            d.addFV("p1px4", MaxEntNE.getWordClusterPrefix(prior1, 4));
            d.addFV("p1px6", MaxEntNE.getWordClusterPrefix(prior1, 6));
            d.addFV("p1px10", MaxEntNE.getWordClusterPrefix(prior1, 10));
            d.addFV("p1px20", MaxEntNE.getWordClusterPrefix(prior1, 20));
            d.addFV("cpx4", MaxEntNE.getWordClusterPrefix(current, 4));
            d.addFV("cpx6", MaxEntNE.getWordClusterPrefix(current, 6));
            d.addFV("cpx10", MaxEntNE.getWordClusterPrefix(current, 10));
            d.addFV("cpx20", MaxEntNE.getWordClusterPrefix(current, 20));
            d.addFV("npx4", MaxEntNE.getWordClusterPrefix(next, 4));
            d.addFV("npx6", MaxEntNE.getWordClusterPrefix(next, 6));
            d.addFV("npx10", MaxEntNE.getWordClusterPrefix(next, 10));
            d.addFV("npx20", MaxEntNE.getWordClusterPrefix(next, 20));
        }
        return d;
    }

    private String typeFeature(String word) {
        String wordL;
        Map<String, String> typeTable = trainingDocCount < 0 ? this.wordType : (trainingDocCount % 2 == 0 ? this.wordTypeOdds : this.wordTypeEvens);
        if (typeTable.get(wordL = word.toLowerCase()) == null) {
            return "OOV";
        }
        return typeTable.get(wordL);
    }

    private String onomaFeature(Document doc, Annotation token) {
        int posn = token.start();
        Vector<Annotation> v = doc.annotationsAt(posn, "onoma");
        if (v == null || v.size() == 0) {
            return null;
        }
        Annotation a = v.get(0);
        return (String)a.get("type");
    }

    private void addToCache(String word, String tag) {
        if (tag.equals("other")) {
            return;
        }
        if (tag.charAt(1) == '-') {
            tag = tag.substring(2);
        }
        char tagChar = tag.charAt(0);
        String cacheValue = this.cache.get(word);
        if (cacheValue == null) {
            cacheValue = "" + tagChar;
            this.cache.put(word, cacheValue);
        } else if (cacheValue.indexOf(tagChar) < 0) {
            cacheValue = cacheValue + tagChar;
            this.cache.put(word, cacheValue);
        }
    }

    @Override
    public void createModel() {
        this.model.buildModel();
    }

    @Override
    public void store(String fileName) {
        try {
            this.store(new BufferedWriter(new FileWriter(fileName)));
        }
        catch (IOException e) {
            System.err.println("Error in MaxEntNE.store: " + e);
            System.exit(1);
        }
    }

    public void store(BufferedWriter writer) {
        try {
            for (String word : this.wordType.keySet()) {
                writer.write(word + " " + this.wordType.get(word));
                writer.newLine();
            }
            writer.write("endWordType");
            writer.newLine();
        }
        catch (IOException e) {
            System.err.println("Error in MaxEntNE.store: " + e);
            System.exit(1);
        }
        this.model.saveModel(writer);
    }

    @Override
    public void load(String fileName) {
        try {
            this.load(new BufferedReader(new FileReader(fileName)));
        }
        catch (IOException e) {
            System.err.println("Error in MaxEntNE.load: " + e);
            System.exit(1);
        }
    }

    public void load(BufferedReader reader) {
        try {
            String line;
            while (!(line = reader.readLine()).equals("endWordType")) {
                String[] tags = line.split("\\s+");
                if (tags.length != 2) {
                    System.err.println("MaxEntNE.load:  invalid line " + line);
                    continue;
                }
                this.wordType.put(tags[0], tags[1]);
            }
        }
        catch (IOException e) {
            System.err.println("Error in MaxEntNE.load: " + e);
            System.exit(1);
        }
        this.model.loadModel(reader);
    }

    public String[] simpleDecoder(Document doc, Annotation[] tokens) {
        int nTokens = tokens.length;
        String[] words = new String[nTokens];
        String[] tags = new String[nTokens];
        for (int iToken = 0; iToken < nTokens; ++iToken) {
            words[iToken] = doc.text(tokens[iToken]).trim();
        }
        String priorTag = "other";
        for (int iToken = 0; iToken < nTokens; ++iToken) {
            Datum d = this.NEfeatures(iToken, words, tokens, priorTag, doc);
            tags[iToken] = this.model.bestOutcome(d);
            this.addToCache(words[iToken], tags[iToken]);
            priorTag = tags[iToken];
        }
        return tags;
    }

    @Override
    public String[] viterbi(Document doc, Annotation[] tokens) {
        trainingDocCount = -1;
        int nTokens = tokens.length;
        if (nTokens == 0) {
            return new String[0];
        }
        String[] words = new String[nTokens];
        if (this.useOnoma) {
            this.onomaType = new String[nTokens];
        }
        for (int iToken = 0; iToken < nTokens; ++iToken) {
            words[iToken] = doc.text(tokens[iToken]).trim();
            if (!this.useOnoma) continue;
            this.onomaType[iToken] = this.onomaFeature(doc, tokens[iToken]);
        }
        int nStates = this.model.getNumOutcomes();
        this.state = new String[nStates];
        for (int i = 0; i < nStates; ++i) {
            this.state[i] = this.model.getOutcome(i);
        }
        double[][] prob = new double[nTokens][nStates];
        int[][] prior = new int[nTokens][nStates];
        NameConstraints constraints = new NameConstraints(doc, tokens, this.state);
        int[] path = new int[nTokens];
        double IMPOSSIBLE = -1000000.0;
        for (int iState = 0; iState < nStates; ++iState) {
            if (this.state[iState].charAt(0) == 'I' || !constraints.allowedState(0, iState)) {
                prob[0][iState] = IMPOSSIBLE;
                continue;
            }
            Datum d = this.NEfeatures(0, words, tokens, "other", doc);
            double[] outcome = this.model.getOutcomeProbabilities(d);
            double p = Math.log(outcome[iState]);
            if (this.state[iState].equals("other")) {
                p += otherOffset;
            }
            prob[0][iState] = p;
        }
        for (int iToken = 1; iToken < nTokens; ++iToken) {
            for (int iState = 0; iState < nStates; ++iState) {
                prob[iToken][iState] = IMPOSSIBLE;
                prior[iToken][iState] = -1;
            }
            for (int iPrior = 0; iPrior < nStates; ++iPrior) {
                Datum d = this.NEfeatures(iToken, words, tokens, this.state[iPrior], doc);
                double[] outcome = this.model.getOutcomeProbabilities(d);
                for (int iState = 0; iState < nStates; ++iState) {
                    double p = Math.log(outcome[iState]);
                    if (this.state[iState].equals("other")) {
                        p += otherOffset;
                    }
                    if (!this.allowedTransition(iPrior, iState) || !constraints.allowedState(iToken, iState) || !(prob[iToken - 1][iPrior] + p > prob[iToken][iState])) continue;
                    prob[iToken][iState] = prob[iToken - 1][iPrior] + p;
                    prior[iToken][iState] = iPrior;
                }
            }
        }
        double bestProb = IMPOSSIBLE;
        int bestState = -1;
        for (int iState = 0; iState < nStates; ++iState) {
            if (!(prob[nTokens - 1][iState] > bestProb)) continue;
            bestProb = prob[nTokens - 1][iState];
            bestState = iState;
        }
        if (bestState < 0) {
            System.err.println("No valid path.");
            return null;
        }
        path[nTokens - 1] = bestState;
        for (int iToken = nTokens - 2; iToken >= 0; --iToken) {
            int iPrior = prior[iToken + 1][path[iToken + 1]];
            if (iPrior < 0) {
                System.err.println("No valid path.");
                return null;
            }
            path[iToken] = iPrior;
        }
        String[] tags = new String[nTokens];
        for (int iToken = 0; iToken < nTokens; ++iToken) {
            tags[iToken] = this.state[path[iToken]];
            this.addToCache(words[iToken], tags[iToken]);
        }
        return tags;
    }

    private boolean allowedTransition(int iPrior, int iState) {
        String priorState = this.state[iPrior];
        String currentState = this.state[iState];
        if (currentState.substring(0, 2).equals("I-")) {
            return priorState.substring(1).equals(currentState.substring(1));
        }
        return true;
    }

    static String wordFeature(String word, boolean forcedCap) {
        int len = word.length();
        boolean allDigits = true;
        boolean allCaps = true;
        boolean initCap = true;
        boolean allLower = true;
        boolean hyphenated = true;
        boolean abbrev = true;
        for (int i = 0; i < len; ++i) {
            char c = word.charAt(i);
            if (!Character.isDigit(c)) {
                allDigits = false;
            }
            if (!Character.isUpperCase(c)) {
                allCaps = false;
            }
            if (!Character.isLowerCase(c)) {
                allLower = false;
            }
            if (!Character.isLetter(c) && c != '-') {
                hyphenated = false;
            }
            if (!Character.isLetter(c) && c != '.') {
                abbrev = false;
            }
            if ((i != 0 || Character.isUpperCase(c)) && (i <= 0 || Character.isLowerCase(c))) continue;
            initCap = false;
        }
        if (allDigits) {
            if (len == 2) {
                return "twoDigitNum";
            }
            if (len == 4) {
                return "fourDigitNum";
            }
            return "otherNum";
        }
        if (allCaps) {
            return "allCaps";
        }
        if (forcedCap) {
            return "forcedCap";
        }
        if (initCap) {
            return "initCap";
        }
        if (allLower) {
            return "lowerCase";
        }
        if (abbrev) {
            return "abbrev";
        }
        if (hyphenated) {
            return "hyphenated";
        }
        return "other";
    }

    public static void loadWordClusters(String wordClusterFile) throws IOException {
        String sline;
        System.out.println("Loading word clusters from " + wordClusterFile);
        File paths = new File(wordClusterFile);
        BufferedReader rdr = new BufferedReader(new FileReader(paths));
        while ((sline = rdr.readLine()) != null) {
            String[] tokens = sline.split("\t");
            if (tokens.length < 2) continue;
            word2cluster.put(tokens[1], tokens[0]);
        }
        useWordClusters = true;
    }

    private static String getWordClusterPrefix(String word, int bits) {
        String wc = new String();
        if (word2cluster.containsKey(word)) {
            wc = word2cluster.get(word);
            if (wc.length() >= bits) {
                wc = wc.substring(0, bits);
            }
        } else {
            wc = "nil";
        }
        return wc;
    }
}

