/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.format;

import edu.nyu.jet.format.InvalidFormatException;
import edu.nyu.jet.format.Treebank;
import edu.nyu.jet.lisp.FeatureSet;
import edu.nyu.jet.parser.HeadRule;
import edu.nyu.jet.parser.ParseTreeNode;
import edu.nyu.jet.parser.StatParser;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.Span;
import edu.nyu.jet.util.IOUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class PTBReader {
    static Pattern tagNamePattern = Pattern.compile("([^-=]+) (?: - ([\\-a-zA-Z]+)*)? (?: [-=] ([\\-\\d]+))?", 4);
    static Pattern specialTagNamePattern = Pattern.compile("-.*-");
    private static final Map<String, String> TRANSFORM_TABLE = new HashMap<String, String>();
    private static final Set<String> PUNCTUATIONS;
    private static final Set<String> NO_FOLLOWING_SPACE;
    private static final Set<String> DELETE_PREVIOUS_SPACE;
    private boolean backslashAsEscapeChar = true;
    private boolean isAddingTokens = false;
    HeadRule hr = null;
    private static final String[] skip;
    private static final String[] match;
    List<Integer> offsets;
    private StringBuffer comment = new StringBuffer();
    private int offset = -1;

    public void addAnnotations(ParseTreeNode tree, Document doc, Span span, boolean jetCategories) {
        List<ParseTreeNode> terminalNodes = this.getTerminalNodes(tree);
        String text = doc.text();
        int offset = span.start();
        for (ParseTreeNode terminal : terminalNodes) {
            int matchLength;
            while (offset < span.end() && Character.isWhitespace(text.charAt(offset))) {
                ++offset;
            }
            for (String skipString : skip) {
                if (!text.startsWith(skipString, offset)) continue;
                offset += skipString.length();
                while (offset < span.end() && Character.isWhitespace(text.charAt(offset))) {
                    ++offset;
                }
                break;
            }
            if ((matchLength = PTBReader.matchTextToTree(text, offset, terminal.word)) > 0) {
                int endOffset;
                for (endOffset = offset + matchLength; endOffset < span.end() && Character.isWhitespace(text.charAt(endOffset)); ++endOffset) {
                }
                terminal.start = offset;
                terminal.end = endOffset;
                offset = endOffset;
                continue;
            }
            System.err.println("PTBReader.addAnnotations:  Cannot determine parse tree offset for word " + terminal.word);
            System.err.println("  at document offset " + offset + " in sentence");
            System.err.println("  " + doc.text(span));
            return;
        }
        if (jetCategories) {
            this.setJetAnnotations(tree, span, doc);
            StatParser.deleteUnusedConstits(doc, span, tree.ann);
        } else {
            this.determineNonTerminalSpans(tree, span.start());
            this.setAnnotations(tree, doc);
        }
    }

    private static int matchTextToTree(String text, int offset, String word) {
        if (word.equals("can") && text.startsWith("can't", offset)) {
            return 2;
        }
        if (word.equals("Can") && text.startsWith("Can't", offset)) {
            return 2;
        }
        for (int i = 0; i < match.length; i += 2) {
            String textPattern = match[i];
            String treePattern = match[i + 1];
            if (!text.startsWith(textPattern, offset) || !word.equals(treePattern)) continue;
            return textPattern.length();
        }
        if (text.startsWith(word, offset)) {
            return word.length();
        }
        if (text.startsWith("." + word, offset)) {
            return word.length() + 1;
        }
        return -1;
    }

    public void addAnnotations(List<ParseTreeNode> trees, Document doc, String targetAnnotation, Span span, boolean jetCategories) {
        Vector<Annotation> targetList = doc.annotationsOfType(targetAnnotation, span);
        Comparator<Annotation> cmp = new Comparator<Annotation>(){

            @Override
            public int compare(Annotation a, Annotation b) {
                return a.span().compareTo(b.span());
            }
        };
        Collections.sort(targetList, cmp);
        if (trees.size() != targetList.size()) {
            System.err.println("PTBReader.addAnnotations:  mismatch between number of " + targetAnnotation + " (" + targetList.size() + ") and number of trees (" + trees.size() + ")");
        }
        int n = Math.min(trees.size(), targetList.size());
        for (int i = 0; i < n; ++i) {
            ParseTreeNode tree = trees.get(i);
            this.addAnnotations(tree, doc, ((Annotation)targetList.get(i)).span(), jetCategories);
            ((Annotation)targetList.get(i)).put("parse", tree.ann);
        }
    }

    public void addAnnotations(List<ParseTreeNode> trees, List<Integer> offsets, Document doc, String targetAnnotation, Span span, boolean jetCategories) {
        if (trees.size() != offsets.size()) {
            System.err.println("PTBReader.addAnnotations:  mismatch between number of trees (" + trees.size() + ") and number of offsets (" + offsets.size() + ")");
            return;
        }
        for (int i = 0; i < trees.size(); ++i) {
            ParseTreeNode tree = trees.get(i);
            int start = offsets.get(i);
            if (start < 0) {
                System.err.println("PTBReader.addAnnotations:  offset missing for  parse tree " + i);
                continue;
            }
            int end = i + 1 == offsets.size() ? span.end() : offsets.get(i + 1).intValue();
            Span sentenceSpan = new Span(start, end);
            this.addAnnotations(tree, doc, sentenceSpan, jetCategories);
            Vector<Annotation> anns = doc.annotationsAt(start, targetAnnotation);
            if (anns == null || anns.size() <= 0) continue;
            Annotation ann = anns.get(0);
            ann.put("parse", tree.ann);
        }
    }

    public List<ParseTreeNode> loadParseTrees(Reader in) throws IOException, InvalidFormatException {
        ArrayList<ParseTreeNode> list = new ArrayList<ParseTreeNode>();
        this.offsets = new ArrayList<Integer>();
        PushbackReader input = new PushbackReader(in);
        while (true) {
            this.skipWhitespaceAndComment(input);
            if (this.lookAhead(input) == -1) break;
            this.offsets.add(this.offset);
            ParseTreeNode node = this.readNode(input);
            list.add(node);
        }
        return list;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public List<ParseTreeNode> loadParseTrees(File file) throws IOException, InvalidFormatException {
        List<ParseTreeNode> list;
        BufferedReader in = null;
        try {
            in = new BufferedReader(new FileReader(file));
            list = this.loadParseTrees(in);
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(in);
            throw throwable;
        }
        IOUtils.closeQuietly(in);
        return list;
    }

    public List<Integer> getOffsets() {
        return this.offsets;
    }

    public Treebank load(Reader in) throws IOException, InvalidFormatException {
        ArrayList<ParseTreeNode> trees = new ArrayList<ParseTreeNode>();
        PushbackReader input = new PushbackReader(in);
        int start = 0;
        while (true) {
            this.skipWhitespace(input);
            if (this.lookAhead(input) == -1) break;
            ParseTreeNode tree = this.readNode(input);
            trees.add(tree);
            this.determineSpans(tree, start);
            this.setAnnotations(tree, null);
            start = tree.end;
        }
        String text = this.buildDocumentString(trees);
        Document doc = new Document(text);
        for (ParseTreeNode tree : trees) {
            doc.annotate("sentence", new Span(tree.start, tree.end), new FeatureSet());
            this.annotate(doc, tree);
        }
        return new Treebank(doc, trees);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public Treebank load(File file) throws IOException, InvalidFormatException {
        Treebank treebank;
        BufferedReader in = null;
        try {
            in = new BufferedReader(new FileReader(file));
            treebank = this.load(in);
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(in);
            throw throwable;
        }
        IOUtils.closeQuietly(in);
        return treebank;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public Treebank load(File file, String encoding) throws IOException, InvalidFormatException {
        Treebank treebank;
        FileInputStream fin = null;
        Reader in = null;
        try {
            fin = new FileInputStream(file);
            in = new InputStreamReader((InputStream)fin, encoding);
            in = new BufferedReader(in);
            treebank = this.load(in);
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(in);
            IOUtils.closeQuietly(fin);
            throw throwable;
        }
        IOUtils.closeQuietly(in);
        IOUtils.closeQuietly(fin);
        return treebank;
    }

    public void setBackslashAsEscapeCharacter(boolean b) {
        this.backslashAsEscapeChar = b;
    }

    public void setAddingToken(boolean b) {
        this.isAddingTokens = b;
    }

    private static boolean isNullNode(ParseTreeNode node) {
        return node.category.equals("-none-");
    }

    private void modifyAnnotationEnd(List<Annotation> annotations, StringBuilder buffer) {
        Annotation a;
        ListIterator<Annotation> it = annotations.listIterator(annotations.size());
        if (buffer.length() == 0) {
            return;
        }
        if (!Character.isWhitespace(buffer.charAt(buffer.length() - 1))) {
            return;
        }
        while (it.hasPrevious() && (a = it.previous()).end() == buffer.length()) {
            Span span = new Span(a.start(), a.end() - 1);
            Annotation replacement = new Annotation(a.type(), span, a.attributes());
            it.set(replacement);
        }
        buffer.deleteCharAt(buffer.length() - 1);
    }

    private ParseTreeNode readNode(PushbackReader in) throws IOException, InvalidFormatException {
        ParseTreeNode node;
        int c = in.read();
        if (c != 40) {
            throw new InvalidFormatException();
        }
        c = this.lookAhead(in);
        if (c == -1) {
            throw new InvalidFormatException();
        }
        if (Character.isWhitespace(c) || c == 40) {
            this.skipWhitespace(in);
            ParseTreeNode node2 = this.readNode(in);
            this.skipWhitespace(in);
            c = (char)in.read();
            if (c != 41) {
                throw new InvalidFormatException();
            }
            return node2;
        }
        String tag = this.readTagName(in);
        String function = null;
        Matcher m = tagNamePattern.matcher(tag);
        if (m.matches()) {
            tag = m.group(1);
            function = m.group(2);
        } else if (!specialTagNamePattern.matcher(tag).matches()) {
            throw new InvalidFormatException(tag + " is invalid format.");
        }
        if (this.skipWhitespace(in) == 0) {
            return null;
        }
        if (this.lookAhead(in) == 40) {
            ArrayList<ParseTreeNode> children = new ArrayList<ParseTreeNode>();
            do {
                ParseTreeNode child;
                if (!PTBReader.isNullNode(child = this.readNode(in))) {
                    children.add(child);
                }
                this.skipWhitespace(in);
            } while (this.lookAhead(in) != 41);
            node = new ParseTreeNode((Object)tag, children.toArray(new ParseTreeNode[0]), 0, 0, 0, function);
        } else {
            String word = this.readWord(in);
            node = new ParseTreeNode(tag, null, 0, 0, null, word, function);
        }
        this.skipWhitespace(in);
        if (in.read() != 41) {
            throw new InvalidFormatException();
        }
        return node;
    }

    private int skipWhitespace(PushbackReader in) throws IOException {
        int c;
        int count = 0;
        do {
            c = in.read();
            ++count;
        } while (Character.isWhitespace(c) && c != -1);
        if (c != -1) {
            in.unread(c);
        }
        return count - 1;
    }

    private int skipWhitespaceAndComment(PushbackReader in) throws IOException {
        int c;
        int count = 0;
        boolean inComment = false;
        this.offset = -1;
        do {
            c = in.read();
            ++count;
            if (c == 35 && !inComment) {
                inComment = true;
                this.comment.setLength(0);
                continue;
            }
            if (c == 10 && inComment) {
                try {
                    this.offset = Integer.parseInt(this.comment.toString().trim());
                }
                catch (NumberFormatException numberFormatException) {
                    // empty catch block
                }
                inComment = false;
                continue;
            }
            if (!inComment) continue;
            this.comment.append((char)c);
        } while ((Character.isWhitespace(c) || inComment) && c != -1);
        if (c != -1) {
            in.unread(c);
        }
        return count - 1;
    }

    private String readTagName(PushbackReader in) throws IOException, InvalidFormatException {
        int c;
        StringBuilder buffer = new StringBuilder();
        while (true) {
            if ((c = in.read()) == -1) {
                throw new InvalidFormatException();
            }
            if (Character.isWhitespace(c)) break;
            buffer.append((char)c);
        }
        in.unread(c);
        if (buffer.length() == 0) {
            throw new InvalidFormatException();
        }
        return buffer.toString().toLowerCase().intern();
    }

    private String readWord(PushbackReader in) throws IOException, InvalidFormatException {
        int c;
        StringBuilder buffer = new StringBuilder();
        while (true) {
            if ((c = in.read()) != -1 && this.backslashAsEscapeChar && c == 92) {
                c = in.read();
            }
            if (c == 41) break;
            if (c == -1) {
                throw new InvalidFormatException();
            }
            buffer.append((char)c);
        }
        in.unread(c);
        String word = buffer.toString();
        if (TRANSFORM_TABLE.containsKey(word)) {
            word = TRANSFORM_TABLE.get(word);
        }
        return word;
    }

    private int lookAhead(PushbackReader in) throws IOException {
        int c = in.read();
        if (c != -1) {
            in.unread(c);
        }
        return c;
    }

    public static void main(String[] args) throws Exception {
        if (args.length != 2) {
            System.out.println("usage: java " + PTBReader.class.getName() + " ");
            System.exit(1);
        }
        File inputDir = new File(args[0]);
        File outputDir = new File(args[1]);
        PTBReader parser = new PTBReader();
        for (File file : PTBReader.getFiles(new File(args[0]), ".mrg")) {
            String outFilename = PTBReader.removeSuffix(PTBReader.getRelativePath(inputDir, file));
            File outFile = new File(outputDir, outFilename);
            outFile.getParentFile().mkdirs();
            FileWriter out = new FileWriter(outFile);
            Document doc = parser.load(file).getDocument();
            out.write(doc.text());
            ((Writer)out).close();
        }
    }

    private static List<File> getFiles(File dir, String suffix) throws IOException {
        ArrayList<File> list = new ArrayList<File>();
        for (File file : dir.listFiles()) {
            if (file.isFile() && file.getName().endsWith(suffix)) {
                list.add(file);
                continue;
            }
            if (!file.isDirectory()) continue;
            list.addAll(PTBReader.getFiles(file, suffix));
        }
        return list;
    }

    private static String getRelativePath(File base, File file) {
        return file.getAbsolutePath().substring(base.getAbsolutePath().length());
    }

    private static String removeSuffix(String filename) {
        int index = filename.lastIndexOf(46);
        if (index >= 0) {
            return filename.substring(0, index);
        }
        return filename;
    }

    private String buildDocumentString(List<ParseTreeNode> trees) {
        StringBuilder buffer = new StringBuilder();
        for (ParseTreeNode tree : trees) {
            List<ParseTreeNode> terminals = this.getTerminalNodes(tree);
            for (ParseTreeNode terminal : terminals) {
                if (terminal.word == null) continue;
                buffer.append(terminal.word);
                while (buffer.length() < terminal.end) {
                    buffer.append(' ');
                }
            }
            if (buffer.charAt(buffer.length() - 1) != ' ') continue;
            buffer.setCharAt(buffer.length() - 1, '\n');
        }
        return buffer.toString();
    }

    private void determineSpans(ParseTreeNode tree, int offset) {
        List<ParseTreeNode> terminals = this.getTerminalNodes(tree);
        this.determineTerminalSpans(terminals, offset);
        this.determineNonTerminalSpans(tree, offset);
    }

    private void determineTerminalSpans(List<ParseTreeNode> terminals, int offset) {
        int start = offset;
        int n = terminals.size();
        for (int i = 0; i < n; ++i) {
            ParseTreeNode current = terminals.get(i);
            ParseTreeNode prev = i > 0 ? terminals.get(i - 1) : null;
            String word = current.word;
            int end = start + (word != null ? word.length() + 1 : 0);
            if (!this.hasAfterSpace(word)) {
                --end;
            }
            if (this.hasBeforeSpace(word) && prev != null && this.hasAfterSpace(prev.word)) {
                --prev.end;
            }
            current.start = --start;
            current.end = --end;
            start = end;
        }
    }

    private int determineNonTerminalSpans(ParseTreeNode tree, int offset) {
        if (this.isTerminalNode(tree)) {
            return tree.end;
        }
        ParseTreeNode[] children = tree.children;
        if (children.length > 0) {
            for (ParseTreeNode child : children) {
                offset = this.determineNonTerminalSpans(child, offset);
            }
            tree.start = children[0].start;
            tree.end = children[children.length - 1].end;
        } else {
            tree.start = offset;
            tree.end = offset;
        }
        return tree.end;
    }

    private boolean hasAfterSpace(String word) {
        return !NO_FOLLOWING_SPACE.contains(word);
    }

    private boolean hasBeforeSpace(String word) {
        if (DELETE_PREVIOUS_SPACE.contains(word)) {
            return true;
        }
        return this.isPartOfShortenedForm(word);
    }

    private boolean isPartOfShortenedForm(String word) {
        if (word != null) {
            return word.startsWith("'") || word.equals("n't");
        }
        return false;
    }

    private void annotate(Document doc, ParseTreeNode node) {
        doc.addAnnotation(node.ann);
        if (node.children != null) {
            Annotation[] children = new Annotation[node.children.length];
            for (int i = 0; i < node.children.length; ++i) {
                children[i] = node.children[i].ann;
            }
            node.ann.put("children", children);
            for (ParseTreeNode child : node.children) {
                this.annotate(doc, child);
            }
        }
        if (node.children == null && this.isAddingTokens) {
            doc.annotate("token", node.ann.span(), new FeatureSet());
        }
    }

    private List<ParseTreeNode> getTerminalNodes(ParseTreeNode tree) {
        if (tree.children == null || tree.children.length == 0) {
            if (tree.word != null) {
                return Collections.singletonList(tree);
            }
            return Collections.emptyList();
        }
        ArrayList<ParseTreeNode> list = new ArrayList<ParseTreeNode>();
        for (ParseTreeNode child : tree.children) {
            list.addAll(this.getTerminalNodes(child));
        }
        return list;
    }

    private boolean isTerminalNode(ParseTreeNode node) {
        return node.children == null;
    }

    private void setAnnotations(ParseTreeNode node, Document doc) {
        Span span = new Span(node.start, node.end);
        FeatureSet attrs = new FeatureSet();
        attrs.put("cat", node.category);
        if (node.head != 0) {
            attrs.put("head", node.head);
        }
        if (node.function != null) {
            attrs.put("func", node.function);
        }
        node.ann = new Annotation("constit", span, attrs);
        if (doc != null) {
            doc.addAnnotation(node.ann);
        }
        if (node.children != null) {
            for (ParseTreeNode child : node.children) {
                this.setAnnotations(child, doc);
            }
        }
    }

    private void setJetAnnotations(ParseTreeNode node, Span treeSpan, Document doc) {
        StatParser.buildParserInput(doc, treeSpan.start(), treeSpan.end(), false);
        StatParser.fixHyphenatedItems(doc);
        int nameConstitEnd = -1;
        List<ParseTreeNode> terminals = this.getTerminalNodes(node);
        for (ParseTreeNode terminal : terminals) {
            int terminalEnd = terminal.end;
            Vector<Annotation> constits = doc.annotationsAt(terminal.start, "constit");
            Annotation constit = null;
            Annotation nameConstit = null;
            Annotation hyphword = null;
            if (constits != null) {
                for (Annotation c : constits) {
                    if (c.get("cat") == "name") {
                        nameConstit = c;
                    } else if (c.get("cat") == "hyphword") {
                        hyphword = c;
                    }
                    if (constit != null) continue;
                    constit = c;
                }
            }
            if (hyphword != null) {
                nameConstit = null;
                constit = hyphword;
            }
            if (nameConstit != null) {
                terminal.end = nameConstit.end();
                terminal.ann = nameConstit;
                nameConstitEnd = nameConstit.end();
            } else if (nameConstitEnd >= 0) {
                terminal.word = null;
            } else {
                Span span = new Span(terminal.start, terminal.end);
                String pennPOS = ((String)terminal.category).toUpperCase().intern();
                String word = terminal.word;
                terminal.ann = StatParser.buildWordDefn(doc, word, span, constit, pennPOS);
            }
            if (nameConstitEnd != terminalEnd) continue;
            nameConstitEnd = -1;
        }
        this.pruneTree(node);
        this.determineNonTerminalSpans(node, treeSpan.start());
        if (this.hr == null) {
            this.hr = HeadRule.createDefaultRule();
        }
        this.hr.apply(node);
        ParseTreeNode.makeParseAnnotations(doc, node);
    }

    private ParseTreeNode pruneTree(ParseTreeNode node) {
        Object children = node.children;
        if (children != null) {
            ArrayList<ParseTreeNode> newChildren = new ArrayList<ParseTreeNode>();
            for (ParseTreeNode child : children) {
                ParseTreeNode c = this.pruneTree(child);
                if (c == null) continue;
                newChildren.add(c);
            }
            children = newChildren.isEmpty() ? null : newChildren.toArray(new ParseTreeNode[0]);
            node.children = children;
        }
        if (node.word == null && children == null) {
            return null;
        }
        return node;
    }

    static {
        TRANSFORM_TABLE.put("-LRB-", "(");
        TRANSFORM_TABLE.put("-LCB-", "{");
        TRANSFORM_TABLE.put("-LSB-", "[");
        TRANSFORM_TABLE.put("-RRB-", ")");
        TRANSFORM_TABLE.put("-RCB-", "}");
        TRANSFORM_TABLE.put("-RSB-", "]");
        PUNCTUATIONS = new HashSet<String>();
        PUNCTUATIONS.add(".");
        PUNCTUATIONS.add(",");
        PUNCTUATIONS.add("?");
        PUNCTUATIONS.add("!");
        NO_FOLLOWING_SPACE = new HashSet<String>();
        NO_FOLLOWING_SPACE.add("(");
        NO_FOLLOWING_SPACE.add("{");
        NO_FOLLOWING_SPACE.add("[");
        DELETE_PREVIOUS_SPACE = new HashSet<String>();
        DELETE_PREVIOUS_SPACE.add(")");
        DELETE_PREVIOUS_SPACE.add("}");
        DELETE_PREVIOUS_SPACE.add("]");
        DELETE_PREVIOUS_SPACE.add(".");
        DELETE_PREVIOUS_SPACE.add(",");
        skip = new String[]{"....", "...", "uh,", "Uh,", "um,", "Um,", "&lt;", "&LT;", "&gt;", "&GT;", "_"};
        match = new String[]{"\"", "``", "\"", "''", "&quot;", "``", "&quot;", "''", "&QUOT;", "``", "&QUOT;", "''", "&amp;", "&", "&AMP;", "&", "wo", "will", "Wo", "Will", "((", "(", "))", ")"};
    }
}

