/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.parser;

import edu.nyu.jet.Control;
import edu.nyu.jet.JetTest;
import edu.nyu.jet.aceJet.Ace;
import edu.nyu.jet.hmm.BigramHMMemitter;
import edu.nyu.jet.hmm.HMMstate;
import edu.nyu.jet.hmm.Retagger;
import edu.nyu.jet.lex.Tokenizer;
import edu.nyu.jet.lisp.FeatureSet;
import edu.nyu.jet.parser.ParseTreeNode;
import edu.nyu.jet.parserStub.HeadFinder;
import edu.nyu.jet.parserStub.Parser;
import edu.nyu.jet.parserStub.Settings;
import edu.nyu.jet.parserStub.lisp.Sexp;
import edu.nyu.jet.parserStub.lisp.SexpList;
import edu.nyu.jet.parserStub.lisp.Symbol;
import edu.nyu.jet.pat.Pat;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.DocumentCollection;
import edu.nyu.jet.tipster.ExternalDocument;
import edu.nyu.jet.tipster.Span;
import edu.nyu.jet.zoner.SpecialZoner;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import java.util.Vector;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class StatParser {
    static int nextToken = 0;
    static ArrayList<String> words;
    static ArrayList<Span> spans;
    static ArrayList<Annotation> wordDefns;
    static ArrayList<String> pennPOS;
    static Parser parser;
    static boolean initialized;
    static final boolean findHeads = true;
    static HeadFinder headFinder;

    public static void main(String[] args) throws IOException {
        if (args.length != 4) {
            System.err.println("StatParser requires 4 arguments");
            System.err.println("  properties-file input-directory output-directory list-of-files");
            System.exit(1);
        }
        String properties = args[0];
        String inputDir = args[1];
        String outputDir = args[2];
        String fileList = args[3];
        System.out.println("Starting Jet StatParser ...");
        JetTest.initializeFromConfig(properties);
        Pat.trace = false;
        StatParser.parseCollection(inputDir, outputDir, fileList);
    }

    public static void initialize(String dataPath, Properties config) {
        String properties = config.getProperty("StatParser.properties.fileName");
        String grammar = config.getProperty("StatParser.grammar.fileName");
        if (properties == null && grammar == null) {
            return;
        }
        if (properties == null || grammar == null) {
            System.err.println("Error in properties file:  for StatParser, both");
            System.err.println("properties.Filename and grammar.fileName must be specified");
            return;
        }
        StatParser.initialize(dataPath + File.separatorChar + properties, dataPath + File.separatorChar + grammar);
    }

    public static void initialize(String propertiesFile, String grammarFile) {
        try {
            Settings.load(propertiesFile);
            parser = new Parser(grammarFile);
            initialized = true;
        }
        catch (Exception e) {
            System.out.println(e);
            System.out.println("Unable to initialize parser.");
        }
    }

    public static boolean isInitialized() {
        return initialized;
    }

    public static ParseTreeNode parse(Document doc, Span span) {
        SexpList parseTreeSexp;
        JetTest.tagger.annotate(doc, span, "tagger");
        int start = span.start();
        int end = span.end();
        StatParser.buildParserInput(doc, start, end, true);
        StatParser.fixHyphenatedItems(doc);
        if (wordDefns.size() == 0) {
            System.out.println("StatParse:  no tokens in span");
            return null;
        }
        SexpList sentence = new SexpList();
        for (int i = 0; i < words.size(); ++i) {
            String word = words.get(i);
            String pos = pennPOS.get(i);
            SexpList posSx = new SexpList();
            posSx.add(Symbol.get(pos));
            SexpList wordSx = new SexpList();
            wordSx.add(Symbol.get(word));
            wordSx.add(posSx);
            sentence.add(wordSx);
        }
        System.out.println("Sentence = " + sentence.toString());
        try {
            parseTreeSexp = parser.parse(sentence);
        }
        catch (Exception e) {
            System.out.println(e);
            System.out.println("No parse possible.");
            return null;
        }
        if (parseTreeSexp == null) {
            System.out.println("No parse possible.");
            return null;
        }
        System.out.println("Parse = " + parseTreeSexp);
        if (headFinder == null) {
            try {
                headFinder = new HeadFinder();
            }
            catch (IOException e) {
                System.out.println("StatParser: " + e);
                System.out.println("Unable to generate heads.");
            }
        }
        nextToken = 0;
        ParseTreeNode parseTree = StatParser.makeParseTree(doc, parseTreeSexp);
        Annotation rootAnnotation = ParseTreeNode.makeParseAnnotations(doc, parseTree);
        StatParser.deleteUnusedConstits(doc, span, rootAnnotation);
        Vector<Annotation> anns = doc.annotationsAt(start, "sentence");
        if (anns != null && anns.size() > 0) {
            Annotation sentAnn = anns.get(0);
            sentAnn.put("parse", rootAnnotation);
        }
        return parseTree;
    }

    public static void deleteUnusedConstits(Document doc, Span span, Annotation rootAnnotation) {
        Set<Annotation> annotationsInTree = StatParser.descendants(rootAnnotation);
        Vector<Annotation> constits = doc.annotationsOfType("constit", span);
        if (constits == null) {
            return;
        }
        for (int i = 0; i < constits.size(); ++i) {
            Annotation a = constits.get(i);
            if (annotationsInTree != null && annotationsInTree.contains(a)) continue;
            doc.removeAnnotation(a);
        }
    }

    public static void buildParserInput(Document doc, int start, int end, boolean setPOS) {
        wordDefns = new ArrayList();
        pennPOS = new ArrayList();
        words = new ArrayList();
        spans = new ArrayList();
        int posn = Tokenizer.skipWSX(doc, start, end);
        while (posn < end) {
            Span wspan;
            Vector<Annotation> constits = doc.annotationsAt(posn, "constit");
            Annotation constit = null;
            String cat = null;
            if (constits != null) {
                for (int i = 0; i < constits.size(); ++i) {
                    Annotation a = constits.get(i);
                    if (a.get("hidden") != null) continue;
                    constit = a;
                    cat = (String)constit.get("cat");
                    break;
                }
            }
            wordDefns.add(constit);
            if (setPOS) {
                String pos = StatParser.ptbPOS(doc, posn, constit, cat);
                pennPOS.add(pos);
            } else {
                pennPOS.add(null);
            }
            if (constit != null) {
                wspan = constit.span();
            } else {
                Annotation token = doc.tokenAt(posn);
                if (token == null) break;
                wspan = token.span();
            }
            spans.add(wspan);
            String word = doc.text(wspan).trim();
            if (word.equals("_")) {
                word = "--";
            } else if (word.equals("(")) {
                word = "-LRB-";
            } else if (word.equals(")")) {
                word = "-RRB-";
            } else if (word.equals("\"")) {
                word = "''";
            }
            words.add(word);
            posn = wspan.end();
        }
    }

    private static String ptbPOS(Document doc, int posn, Annotation constit, String cat) {
        String p;
        if (cat == "name") {
            return "NNP";
        }
        Vector<Annotation> v = doc.annotationsAt(posn, "tagger");
        Annotation a = v.get(0);
        String pos = (String)a.get("cat");
        String textSpanned = doc.text(a).trim().toLowerCase();
        Vector<Annotation> constits = doc.annotationsAt(posn, "constit");
        if (constits == null || constits.size() == 0) {
            return pos;
        }
        for (int ic = 0; ic < constits.size(); ++ic) {
            Annotation jetDefn = constits.get(ic);
            if (!a.span().equals(jetDefn.span())) continue;
            FeatureSet[] z = Retagger.ptbToJetFS(textSpanned, pos);
            for (int i = 0; i < z.length; ++i) {
                if (!z[i].subsetOf(jetDefn.attributes())) continue;
                return pos;
            }
        }
        if (constit != null && (p = Retagger.jetToPtbPos(constit.attributes())) != null) {
            return p;
        }
        return pos;
    }

    public static void fixHyphenatedItems(Document doc) {
        for (int i = 1; i < words.size() - 1; ++i) {
            if (!words.get(i).equals("-") || spans.get(i - 1).end() != spans.get(i).start() || spans.get(i).end() != spans.get(i + 1).start()) continue;
            Annotation hyphenDefn = new Annotation("constit", spans.get(i), new FeatureSet("cat", "-"));
            doc.addAnnotation(hyphenDefn);
            wordDefns.set(i, hyphenDefn);
            Span hwSpan = new Span(spans.get(i - 1).start(), spans.get(i + 1).end());
            Annotation[] children = new Annotation[3];
            children[0] = wordDefns.get(i - 1);
            if (children[0] == null) {
                children[0] = new Annotation("constit", spans.get(i - 1), new FeatureSet("cat", "?"));
                doc.addAnnotation(children[0]);
            }
            children[1] = wordDefns.get(i);
            children[2] = wordDefns.get(i + 1);
            if (children[2] == null) {
                children[2] = new Annotation("constit", spans.get(i + 1), new FeatureSet("cat", "?"));
                doc.addAnnotation(children[2]);
            }
            Annotation hwDefn = new Annotation("constit", hwSpan, new FeatureSet("cat", "hyphword", "children", children));
            doc.addAnnotation(hwDefn);
            spans.set(i - 1, hwSpan);
            spans.remove(i + 1);
            spans.remove(i);
            words.set(i - 1, doc.text(hwSpan).trim());
            words.remove(i + 1);
            words.remove(i);
            pennPOS.set(i - 1, "JJ");
            pennPOS.remove(i + 1);
            pennPOS.remove(i);
            wordDefns.set(i - 1, hwDefn);
            wordDefns.remove(i + 1);
            wordDefns.remove(i);
        }
    }

    private static ParseTreeNode makeParseTree(Document doc, Sexp sx) {
        if (!sx.isList()) {
            System.out.println("StatParse:  invalid Sexp for parse node " + sx.toString());
            return null;
        }
        SexpList s = (SexpList)sx;
        if (s.length() < 2) {
            System.out.println("StatParse:  invalid Sexp for parse node " + s.toString());
            return null;
        }
        Sexp catSx = s.get(0);
        if (!catSx.isSymbol()) {
            System.out.println("StatParse:  invalid Sexp for parse node " + s.toString());
            return null;
        }
        String pennPOS = ((Symbol)catSx).toString().intern();
        String cat = pennPOS.toLowerCase().intern();
        Sexp wordSx = s.get(1);
        if (wordSx.isSymbol()) {
            String expectedWord = ((Symbol)wordSx).toString();
            String word = words.get(nextToken);
            while (!word.equals(expectedWord)) {
                System.out.println("StatParser:  parse skips " + word + " in sentence.");
                if (++nextToken >= words.size()) {
                    System.out.println("*** Unable to align sentence and parse tree.");
                    return null;
                }
                word = words.get(nextToken);
            }
            Annotation wordDefn = wordDefns.get(nextToken);
            Span span = spans.get(nextToken);
            wordDefn = StatParser.buildWordDefn(doc, word, span, wordDefn, pennPOS);
            ++nextToken;
            return new ParseTreeNode((Object)cat, null, wordDefn.start(), wordDefn.end(), wordDefn, word);
        }
        int head = 0;
        head = headFinder.findHead(sx);
        int childCount = s.length() - 1;
        int startToken = nextToken;
        int start = spans.get(startToken).start();
        ParseTreeNode[] children = new ParseTreeNode[childCount];
        for (int iChild = 0; iChild < childCount; ++iChild) {
            children[iChild] = StatParser.makeParseTree(doc, s.get(iChild + 1));
        }
        int end = start;
        if (nextToken > startToken) {
            end = spans.get(nextToken - 1).end();
        }
        return new ParseTreeNode(cat, children, start, end, head);
    }

    public static Annotation buildWordDefn(Document doc, String word, Span span, Annotation wordDefn, String pennPOS) {
        String cat;
        if (wordDefn != null && ((cat = (String)wordDefn.get("cat")) == "name" || cat == "hyphword")) {
            return wordDefn;
        }
        Vector<Annotation> jetAnns = doc.annotationsAt(span.start(), "constit");
        if (jetAnns != null) {
            for (int i = 0; i < jetAnns.size(); ++i) {
                Annotation jetAnn = jetAnns.get(i);
                if (jetAnn.get("hidden") != null || !Retagger.compatible(word, pennPOS, jetAnn)) continue;
                return jetAnn;
            }
        }
        if (pennPOS != "POS" && wordDefn != null) {
            return wordDefn;
        }
        FeatureSet[] FSpenn = Retagger.ptbToJetFS(word, pennPOS);
        if (FSpenn.length > 0) {
            FeatureSet jetFS = new FeatureSet(FSpenn[0]);
            String cat2 = (String)jetFS.get("cat");
            if (cat2 == "n" || cat2 == "v" || cat2 == "tv" || cat2 == "ving" || cat2 == "ven") {
                jetFS.put("pa", new FeatureSet("head", word.toLowerCase().intern()));
            }
            wordDefn = new Annotation("constit", span, jetFS);
            doc.addAnnotation(wordDefn);
            return wordDefn;
        }
        wordDefn = new Annotation("constit", span, new FeatureSet("cat", pennPOS.toLowerCase().intern()));
        doc.addAnnotation(wordDefn);
        return wordDefn;
    }

    public static Set<Annotation> descendants(Annotation node) {
        HashSet<Annotation> d = new HashSet<Annotation>();
        d.add(node);
        Annotation[] children = ParseTreeNode.children(node);
        if (children != null) {
            for (int i = 0; i < children.length; ++i) {
                if (children[i] == null) continue;
                d.addAll(StatParser.descendants(children[i]));
            }
        }
        return d;
    }

    private static void parseCollection(String inputDir, String outputDir, String fileList) throws IOException {
        DocumentCollection col = new DocumentCollection(inputDir, fileList);
        col.open();
        for (int docCount = 0; docCount < col.size(); ++docCount) {
            ExternalDocument doc = col.get(docCount);
            String docFile = doc.fileName();
            if (new File(outputDir, docFile).exists()) {
                System.out.println("\nSkipping document   " + docCount + ": " + doc.fileName());
                continue;
            }
            System.out.println("\nProcessing document " + docCount + ": " + doc.fileName());
            doc.setAllTags(true);
            doc.open();
            SpecialZoner.findSpecialZones(doc);
            BigramHMMemitter.useBigrams = Ace.monocase = Ace.allLowerCase(doc);
            HMMstate.otherPreference = Ace.monocase ? 1.0 : 0.0;
            Control.processDocument(doc, null, docCount == -1, docCount);
            doc.removeAnnotationsOfType("ENAMEX");
            StatParser.clearInputAnnotations(doc);
            doc.removeAnnotationsOfType("tagger");
            doc.saveIn(outputDir);
        }
    }

    public static void clearInputAnnotations(Document doc) {
        Vector<Annotation> anns = doc.annotationsOfType("ANNOTATION");
        if (anns == null) {
            return;
        }
        for (int i = 0; i < anns.size(); ++i) {
            Annotation ann = anns.get(i);
            int start = ann.span().start();
            int end = ann.span().end();
            for (int j = start; j < end; ++j) {
                if (Character.isWhitespace(doc.charAt(i))) continue;
                doc.setCharAt(i, ' ');
            }
        }
        doc.removeAnnotationsOfType("ANNOTATION");
    }

    static {
        initialized = false;
        headFinder = null;
    }
}

