/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.aceJet;

import edu.nyu.jet.Control;
import edu.nyu.jet.JetTest;
import edu.nyu.jet.aceJet.Ace;
import edu.nyu.jet.aceJet.AceDocument;
import edu.nyu.jet.aceJet.AceEntityMention;
import edu.nyu.jet.aceJet.EDTtypeData;
import edu.nyu.jet.aceJet.LearnRelations;
import edu.nyu.jet.aceJet.NameSubtyper;
import edu.nyu.jet.aceJet.PerfectAce;
import edu.nyu.jet.lex.EnglishLex;
import edu.nyu.jet.lisp.FeatureSet;
import edu.nyu.jet.parser.SynFun;
import edu.nyu.jet.pat.Pat;
import edu.nyu.jet.refres.Resolve;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.ExternalDocument;
import edu.nyu.jet.tipster.Span;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Vector;

public class EDTtype {
    static ExternalDocument doc;
    static boolean monocase;
    static PrintStream writer;
    static PrintStream gwriter;
    static TreeMap<String, EDTtypeData> typeDataMap;
    static TreeSet<String> genericHeads;
    static int trainingMentions;
    static int correct;
    static int incorrect;
    static int unknown;
    static String apfFileSuffix;
    static String dataDir;
    static boolean useSubtype;
    private static final String[] partitives;
    private static final String[] governmentTitles;
    static HashMap<String, String> specifiedEDTtype;

    public static void main(String[] args) throws IOException {
        if (args.length < 7 || (args.length - 3) % 4 != 0) {
            System.err.println("EDTtype must have 4n+3 arguments:");
            System.err.print("  property-file type-dict generic-dict ");
            System.err.print("(year useSubtypes directory filelist)+");
            System.exit(1);
        }
        System.out.println("Starting ACE EDT Type / Generic Training ...");
        JetTest.initializeFromConfig(args[0]);
        Pat.trace = false;
        Resolve.ACE = true;
        writer = new PrintStream(new FileOutputStream(args[1]));
        gwriter = new PrintStream(new FileOutputStream(args[2]));
        for (int iarg = 3; iarg < args.length; iarg += 4) {
            String year = args[iarg];
            useSubtype = !args[iarg + 1].equals("-");
            String directory = args[iarg + 2];
            if (!directory.endsWith("/")) {
                directory = directory + "/";
            }
            String filelist = args[iarg + 3];
            if (year.equals("2002")) {
                AceDocument.ace2004 = false;
                AceDocument.ace2005 = false;
                apfFileSuffix = ".sgm.tmx.rdc.xml";
            } else if (year.equals("2003")) {
                AceDocument.ace2004 = false;
                AceDocument.ace2005 = false;
                apfFileSuffix = ".apf.xml";
            } else if (year.equals("2004")) {
                AceDocument.ace2004 = true;
                AceDocument.ace2005 = false;
                apfFileSuffix = ".apf.xml";
            } else if (year.equals("2005")) {
                AceDocument.ace2004 = true;
                AceDocument.ace2005 = true;
                apfFileSuffix = ".apf.xml";
            } else {
                System.err.println("Invalid year " + year + " in argument list.");
                System.err.println("(Only 2002 - 2005 allowed.)");
                System.exit(1);
            }
            EDTtype.trainFromFileList(directory, filelist);
        }
        EDTtype.writeTypeDict(writer);
        EDTtype.writeGenericDict(gwriter);
        EDTtypeData.reportSubtypeTotals();
        System.out.println(trainingMentions + " training mentions");
        System.out.println(correct + " correct predictions, " + incorrect + " incorrect");
        System.out.println(unknown + " unknown");
    }

    static void trainFromFileList(String dataDir, String fileList) throws IOException {
        String currentDoc;
        BufferedReader reader = new BufferedReader(new FileReader(fileList));
        int docCount = 0;
        while ((currentDoc = reader.readLine()) != null) {
            System.out.println("\nProcessing document " + ++docCount + ": " + currentDoc);
            String textFileName = dataDir + currentDoc + ".sgm";
            doc = new ExternalDocument("sgml", textFileName);
            doc.setAllTags(true);
            if (!AceDocument.ace2005) {
                doc.setEmptyTags(new String[]{"W", "TURN"});
            }
            doc.open();
            Ace.monocase = Ace.allLowerCase(doc);
            Control.processDocument(doc, null, false, docCount);
            String apfFileName = dataDir + currentDoc + apfFileSuffix;
            AceDocument aceDoc = new AceDocument(textFileName, apfFileName);
            LearnRelations.findEntityMentions(aceDoc);
            EDTtype.processMentions(doc);
        }
    }

    static void processMentions(ExternalDocument doc) {
        Vector<Annotation> mentions = Resolve.gatherMentions(doc, new Span(0, doc.length()));
        for (int imention = 0; imention < mentions.size(); ++imention) {
            boolean training;
            Annotation mention = mentions.get(imention);
            Annotation head = Resolve.getHeadC(mention);
            String cat = (String)head.get("cat");
            if (cat.equals("pro") || cat.equals("det") || cat.equals("name")) continue;
            String headString = Resolve.normalizeName(doc.text(head));
            if (monocase) {
                headString = headString.toLowerCase();
            }
            AceEntityMention apfMention = (AceEntityMention)LearnRelations.mentionStartMap.get(new Integer(head.start()));
            String EDTtype2 = "OTHER";
            String EDTsubtype = "";
            if (apfMention != null) {
                EDTtype2 = apfMention.entity.type;
                EDTsubtype = apfMention.entity.subtype;
            }
            if (!useSubtype) {
                EDTsubtype = "*";
            }
            boolean bl = training = trainingMentions < 200000;
            if (training) {
                ++trainingMentions;
                EDTtypeData wt = typeDataMap.get(headString);
                if (wt == null) {
                    wt = new EDTtypeData(headString);
                    typeDataMap.put(headString, wt);
                }
                wt.incrementTypeCount(EDTtype2, EDTsubtype, 1);
                if (apfMention == null) continue;
                wt.incrementGenericCount(apfMention.entity.generic);
                continue;
            }
            String prediction = EDTtype.bareType(EDTtype.getTypeSubtype(doc, null, mention));
            if (prediction.equals(EDTtype2)) {
                ++correct;
                continue;
            }
            ++incorrect;
            System.out.print("Word: " + headString);
            System.out.println(" predict " + prediction + ", should be " + EDTtype2);
        }
    }

    static void writeTypeDict(PrintStream writer) {
        Iterator<EDTtypeData> it = typeDataMap.values().iterator();
        while (it.hasNext()) {
            it.next().write(writer);
        }
        writer.close();
    }

    public static void readTypeDict() {
        File f;
        typeDataMap = new TreeMap();
        String fileName = JetTest.getConfigFile("Ace.EDTtype.fileName");
        if (fileName != null) {
            EDTtype.readTypeDict(fileName);
        } else {
            System.err.println("EDTtype.readTypeDict:  no file name specified in config file");
        }
        String auxFileName = JetTest.getConfigFile("Ace.EDTtype.auxFileName");
        if (auxFileName != null && (f = new File(auxFileName)).exists() && !f.isDirectory()) {
            EDTtype.readTypeDict(auxFileName);
        }
    }

    public static void readTypeDict(String dictFile) {
        System.err.println("Loading type dictionary " + dictFile);
        try {
            String line;
            BufferedReader reader = new BufferedReader(new FileReader(dictFile));
            while ((line = reader.readLine()) != null) {
                EDTtypeData data = EDTtypeData.readLine(line);
                if (data == null) continue;
                typeDataMap.put(data.word, data);
            }
            System.err.println("Type dictionary loaded.");
        }
        catch (IOException e) {
            System.err.print("Unable to load dictionary due to exception: ");
            System.err.println(e);
        }
    }

    public static boolean isDictLoaded() {
        return !typeDataMap.isEmpty();
    }

    static void writeGenericDict(PrintStream writer) {
        for (EDTtypeData td : typeDataMap.values()) {
            if (td.genericCount <= 0 && td.nonGenericCount <= 0) continue;
            writer.println(td.word + " | " + td.genericCount + " " + td.nonGenericCount);
        }
        writer.close();
    }

    public static void emptyGenericDict() {
        genericHeads = new TreeSet();
    }

    public static void readGenericDict() {
        String fileName = JetTest.getConfigFile("Ace.generic.fileName");
        if (fileName != null) {
            EDTtype.readGenericDict(fileName);
        } else {
            System.err.println("EDTtype.readGenericDict:  no file name specified in config file");
        }
    }

    public static void readGenericDict(String dictFile) {
        System.err.println("Loading generic dictionary.");
        genericHeads = new TreeSet();
        try {
            String line;
            BufferedReader reader = new BufferedReader(new FileReader(dictFile));
            while ((line = reader.readLine()) != null) {
                int nonGenericCount;
                int split = line.indexOf(124);
                if (split <= 1) {
                    System.err.println("** error in generic dict: " + line);
                    return;
                }
                String term = line.substring(0, split - 1);
                String typeStatistics = line.substring(split + 2);
                StringTokenizer st = new StringTokenizer(typeStatistics);
                String genericCountString = st.nextToken();
                String nonGenericCountString = st.nextToken();
                int genericCount = Integer.valueOf(genericCountString);
                if (genericCount <= (nonGenericCount = Integer.valueOf(nonGenericCountString).intValue()) || genericCount + nonGenericCount <= 2) continue;
                genericHeads.add(term);
            }
            System.err.println("Generic dictionary loaded.");
        }
        catch (IOException e) {
            System.err.print("Unable to load dictionary due to exception: ");
            System.err.println(e);
        }
    }

    public static boolean hasGenericHead(Document doc, Annotation mention) {
        Annotation headC = Resolve.getHeadC(mention);
        String headWord = Resolve.normalizeName(doc.text(headC).trim());
        return genericHeads.contains(headWord);
    }

    public static String getTypeSubtype(Document doc, Annotation entity, Annotation mention) {
        String gazetteerType;
        String paHead = SynFun.getHead(doc, mention).toLowerCase();
        String det = SynFun.getDet(mention);
        boolean isHumanMention = SynFun.getHuman(mention);
        Annotation headC = Resolve.getHeadC(mention);
        if (Ace.perfectMentions) {
            String tsubt = PerfectAce.getTypeSubtype(headC);
            if (tsubt != null && !tsubt.equals("")) {
                return tsubt;
            }
            System.err.println("*** no type info for " + doc.text(headC));
        }
        if ((gazetteerType = EDTtype.getGazetteerTypeSubtype(doc, mention)) != null) {
            return gazetteerType;
        }
        String headWord = Resolve.normalizeName(doc.text(headC).trim());
        String name = SynFun.getName(doc, mention);
        String cat = (String)headC.get("cat");
        if (cat == null) {
            cat = "nn";
        }
        if (name != null) {
            if (paHead == null || paHead.equalsIgnoreCase("otherName") || paHead.equals("url")) {
                return "OTHER";
            }
            if (paHead.equals("email")) {
                return AceDocument.ace2005 ? EDTtype.typeAndSubtype("PERSON", "Individual") : "PERSON";
            }
            String type = paHead.toUpperCase().intern();
            if (EDTtype.sectionOfGPE(doc, mention, type, headC)) {
                return EDTtype.typeAndSubtype("LOCATION", AceDocument.ace2005 ? "Region-General" : "Region-National");
            }
            String subtype = null;
            Object pa = SynFun.getPA(mention);
            if (pa != null && pa instanceof FeatureSet) {
                subtype = (String)((FeatureSet)pa).get("subtype");
            }
            if (subtype == null) {
                subtype = NameSubtyper.classify(name, type);
            }
            return EDTtype.typeAndSubtype(type, subtype);
        }
        if (EDTtype.in(paHead, partitives) || headC.get("cat") == "q") {
            Annotation x;
            for (x = mention; x != null && x.get("of") == null; x = (Annotation)x.get("headC")) {
            }
            if (x != null) {
                String type;
                Annotation of = (Annotation)x.get("of");
                if (Ace.entityTrace) {
                    System.out.println("Using computed type for " + paHead);
                }
                if (EDTtype.bareType(type = EDTtype.getTypeSubtype(doc, null, of)).equals("GPE") && (paHead.equals("part") || paHead.equals("portion"))) {
                    type = AceDocument.ace2005 ? "LOCATION:Region-General" : "LOCATION:Region-Subnational";
                }
                return type;
            }
        }
        if (cat.equals("pro") || cat.equals("det") || cat.equals("q")) {
            return "OTHER";
        }
        String type = EDTtype.handCodedEDTtype(det, headWord);
        if (type != null) {
            return type;
        }
        type = EDTtype.lookUpEDTtype(headWord.toLowerCase());
        if (type != null) {
            return type.intern();
        }
        type = EDTtype.lookUpEDTtype(paHead);
        if (type != null) {
            return type.replaceAll("Individual", "Group").intern();
        }
        String[] singular = new String[]{paHead};
        String[] plural = EnglishLex.nounPlural(singular);
        type = EDTtype.lookUpEDTtype(plural[0]);
        if (type != null) {
            return type.replaceAll("Group", "Individual").intern();
        }
        if (Ace.preferRelations && (isHumanMention || entity != null && entity.get("human") == "t")) {
            return "PERSON:Individual";
        }
        ++unknown;
        return "OTHER";
    }

    private static String getGazetteerTypeSubtype(Document doc, Annotation mention) {
        if (Ace.gazetteer == null) {
            return null;
        }
        String[] headTokens = Resolve.getHeadTokens(doc, mention);
        if (mention.get("cat") == "np") {
            if (Ace.gazetteer.isNational(headTokens)) {
                return AceDocument.ace2005 ? "PERSON:Individual" : "PERSON";
            }
            if (Ace.gazetteer.isNationals(headTokens)) {
                return AceDocument.ace2005 ? "PERSON:Group" : "PERSON";
            }
        } else if (Ace.gazetteer.isNationality(headTokens)) {
            return "GPE:Nation";
        }
        return null;
    }

    private static boolean sectionOfGPE(Document doc, Annotation mention, String type, Annotation headC) {
        int startOfHead;
        if (type != "GPE") {
            return false;
        }
        int startOfExtent = mention.start();
        if (startOfExtent == (startOfHead = headC.start())) {
            return false;
        }
        String modifier = doc.text(new Span(startOfExtent, startOfHead));
        if (modifier.contains("north") || modifier.contains("south") || modifier.contains("east") || modifier.contains("west") || modifier.contains("central")) {
            mention.put("nameWithModifier", "t");
            return true;
        }
        return false;
    }

    static String handCodedEDTtype(String determiner, String head) {
        String type = specifiedEDTtype.get(head);
        if (type == null) {
            return null;
        }
        if (determiner == null) {
            return "OTHER";
        }
        return type;
    }

    static String lookUpEDTtype(String word) {
        if (word == null) {
            return null;
        }
        EDTtypeData data = typeDataMap.get(word.toLowerCase());
        if (data == null) {
            return null;
        }
        return data.getBestTypeSubtype();
    }

    private static boolean in(Object o, Object[] array) {
        for (int i = 0; i < array.length; ++i) {
            if (array[i] == null || !array[i].equals(o)) continue;
            return true;
        }
        return false;
    }

    public static String bareType(String typeSubtype) {
        int p = typeSubtype.indexOf(58);
        if (p > 0) {
            return typeSubtype.substring(0, p);
        }
        return typeSubtype;
    }

    static String subtype(String typeSubtype) {
        int p = typeSubtype.indexOf(58);
        if (p > 0) {
            return typeSubtype.substring(p + 1);
        }
        return "";
    }

    static String typeAndSubtype(String type, String subtype) {
        return type.toUpperCase() + ":" + subtype;
    }

    static {
        typeDataMap = new TreeMap();
        genericHeads = new TreeSet();
        trainingMentions = 0;
        correct = 0;
        incorrect = 0;
        unknown = 0;
        partitives = new String[]{"group", "part", "member", "portion", "center", "bunch", "couple", "remainder", "rest", "lot", "percent", "%", "dozen", "hundred", "thousand", "some", "either", "neither", "any", "each", "all", "both", "none", "most", "many", "afew", "one", "q"};
        governmentTitles = new String[]{"Vice-President", "Vice-Premier", "Prime-Minister", "Foreign-Minister", "Foreign-Secretary", "Secretary-of-State", "Attorney-General", "Justice-Minister", "Secretary-General"};
        specifiedEDTtype = new HashMap();
        specifiedEDTtype.put("force", "ORGANIZATION:Government");
        specifiedEDTtype.put("board", "ORGANIZATION:Commercial");
        specifiedEDTtype.put("prison", "FACILITY:" + (AceDocument.ace2005 ? "Building-Grounds" : "Building"));
        specifiedEDTtype.put("room", "FACILITY:" + (AceDocument.ace2005 ? "Subarea-Facility" : "Subarea-Building"));
        specifiedEDTtype.put("home", "FACILITY:" + (AceDocument.ace2005 ? "Building-Grounds" : "Building"));
        specifiedEDTtype.put("state", "GPE:State-or-Province");
        specifiedEDTtype.put("land", "LOCATION:" + (AceDocument.ace2005 ? "Region-General" : "Region-Subnational"));
        specifiedEDTtype.put("minister", "PERSON" + (AceDocument.ace2005 ? ":Individual" : ""));
    }
}

