/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.util;

import edu.nyu.jet.lisp.FeatureSet;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.ExternalDocument;
import edu.nyu.jet.tipster.Span;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class AceUtils {
    private static final String TEXT_SEGMENT_TAG_NAME = "TEXT";
    private static final DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    private static final Map<String, String> typeMap;
    private static final Map<String, NameAnnotator> annotators;

    public static Document loadAnnotatedDocument(File file) throws ParserConfigurationException, SAXException, IOException {
        DocumentBuilder parser = builderFactory.newDocumentBuilder();
        parser.setEntityResolver(new ACEEntityResolver());
        Element root = parser.parse(file).getDocumentElement();
        File sourceFile = AceUtils.getSourceFile(file.getParentFile(), root);
        ExternalDocument doc = new ExternalDocument("sgml", sourceFile.getPath());
        doc.setAllTags(true);
        if (!doc.open()) {
            throw new IOException();
        }
        DocumentType docType = root.getOwnerDocument().getDoctype();
        NameAnnotator annotator = annotators.get(docType.getSystemId());
        if (annotator == null) {
            throw new RuntimeException("DOCTYPE " + docType.getSystemId() + " is not supported.");
        }
        annotator.annotate(doc, root);
        return doc;
    }

    public static Collection<Document> loadAnnotatedDocumentsFromDirectory(File dir) throws ParserConfigurationException, SAXException, IOException {
        ArrayList<Document> docs = new ArrayList<Document>();
        for (File file : dir.listFiles()) {
            if (!file.getName().toLowerCase().endsWith(".apf.xml")) continue;
            docs.add(AceUtils.loadAnnotatedDocument(file));
        }
        return docs;
    }

    private static File getSourceFile(File base, Element root) {
        File source = new File(base, root.getAttribute("URI"));
        if (source.exists()) {
            return source;
        }
        String docid = AceUtils.getChild(root, "document").getAttribute("DOCID");
        source = new File(base, docid + ".sgm");
        if (source.exists()) {
            return source;
        }
        return null;
    }

    public static void writeNamedEntities(Document doc, Writer out) throws ParserConfigurationException, TransformerFactoryConfigurationError, TransformerException {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        org.w3c.dom.Document dom = builder.newDocument();
        String id = doc.normalizedText(doc.annotationsOfType("DOCNO").get(0));
        Element source_file = dom.createElement("source_file");
        dom.appendChild(source_file);
        source_file.setAttribute("TYPE", "text");
        source_file.setAttribute("VERSION", "1.2");
        source_file.setAttribute("URI", id);
        Element document = dom.createElement("document");
        source_file.appendChild(document);
        document.setAttribute("DOCID", id);
        Vector<Annotation> names = doc.annotationsOfType("ENAMEX");
        int index = 1;
        for (Annotation name : names) {
            Element entity = dom.createElement("entity");
            document.appendChild(entity);
            entity.setAttribute("ID", id + "-" + index);
            Element entityType = dom.createElement("entity_type");
            entity.appendChild(entityType);
            entityType.appendChild(dom.createTextNode((String)name.get("TYPE")));
            Element entityMention = dom.createElement("entity_mention");
            entity.appendChild(entityMention);
            entityMention.setAttribute("TYPE", "NAME");
            Element head = dom.createElement("head");
            entityMention.appendChild(head);
            head.appendChild(AceUtils.createCharseqElement(dom, doc, name.span()));
            Element extent = dom.createElement("extent");
            entityMention.appendChild(extent);
            extent.appendChild(AceUtils.createCharseqElement(dom, doc, name.span()));
            ++index;
        }
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
        transformer.setOutputProperty("indent", "yes");
        DOMSource source = new DOMSource(dom);
        StreamResult result = new StreamResult(out);
        transformer.transform(source, result);
    }

    private static Element createCharseqElement(org.w3c.dom.Document dom, Document doc, Span span) {
        int end;
        int start = span.start();
        for (end = span.end() - 1; end > 0 && Character.isWhitespace(doc.charAt(end)); --end) {
        }
        Element charseq = dom.createElement("charseq");
        Element startElement = dom.createElement("start");
        charseq.appendChild(startElement);
        startElement.appendChild(dom.createTextNode(Integer.toString(start)));
        Element endElement = dom.createElement("end");
        charseq.appendChild(endElement);
        endElement.appendChild(dom.createTextNode(Integer.toString(end)));
        return charseq;
    }

    private static final int skipWhitespace(Document doc, Span span, int offset) {
        int end = span.end();
        while (offset < end && Character.isWhitespace(doc.charAt(offset))) {
            ++offset;
        }
        return offset;
    }

    private static List<Element> getChildren(Element element, String name) {
        NodeList children = element.getChildNodes();
        ArrayList<Element> elements = new ArrayList<Element>();
        for (int i = 0; i < children.getLength(); ++i) {
            Node node = children.item(i);
            if (node.getNodeType() != 1) continue;
            Element childElement = (Element)node;
            if (name != null && !name.equals(childElement.getTagName())) continue;
            elements.add(childElement);
        }
        return elements;
    }

    private static Element getChild(Element element, String tagName) {
        NodeList children = element.getChildNodes();
        for (int i = 0; i < children.getLength(); ++i) {
            Element child;
            Node node = children.item(i);
            if (node.getNodeType() != 1 || !(child = (Element)node).getTagName().equals(tagName)) continue;
            return child;
        }
        return null;
    }

    public static void main(String[] args) throws Exception {
        File dir = new File("../corpus/test");
        Collection<Document> docs = AceUtils.loadAnnotatedDocumentsFromDirectory(dir);
        for (Document doc : docs) {
            doc.setSGMLwrapMargin(0);
            System.out.println(doc.writeSGML(null));
        }
    }

    static {
        HashMap<String, String> m = new HashMap<String, String>();
        m.put("GSP", "GPE");
        m.put("PER", "PERSON");
        m.put("ORG", "ORGANIZATION");
        m.put("LOC", "LOCATION");
        m.put("FAC", "FACILITY");
        typeMap = Collections.unmodifiableMap(m);
        HashMap<String, NameAnnotator> a = new HashMap<String, NameAnnotator>();
        a.put("apf.v5.1.1.dtd", Ace2005and2004NameAnnotator.getInstance());
        a.put("apf.v4.0.1.dtd", Ace2005and2004NameAnnotator.getInstance());
        a.put("ace-rdc.v2.0.1.dtd", Ace2003NameAnnotator.getInstance());
        a.put("ace-pilot-ref.dtd", Ace2001NameAnnotator.getInstance());
        annotators = Collections.unmodifiableMap(a);
    }

    private static class ACEEntityResolver
    implements EntityResolver {
        private ACEEntityResolver() {
        }

        public InputSource resolveEntity(String publicId, String systemId) {
            String path;
            ClassLoader loader = this.getClass().getClassLoader();
            InputStream in = loader.getResourceAsStream(path = "edu/nyu/jet/util/dtd/" + ACEEntityResolver.basename(systemId));
            if (in == null) {
                System.out.println("not found");
                return null;
            }
            return new InputSource(in);
        }

        private static String basename(String uri) {
            int index = uri.lastIndexOf("/");
            if (index >= 0) {
                return uri.substring(index + 1);
            }
            return uri;
        }
    }

    private static class Ace2001NameAnnotator
    implements NameAnnotator {
        private static final Ace2001NameAnnotator instance = new Ace2001NameAnnotator();

        private Ace2001NameAnnotator() {
        }

        public static Ace2001NameAnnotator getInstance() {
            return instance;
        }

        public void annotate(Document doc, Element root) {
            Vector<Annotation> textSegments = doc.annotationsOfType(AceUtils.TEXT_SEGMENT_TAG_NAME);
            assert (textSegments != null);
            assert (textSegments.size() == 1);
            Annotation textSegment = (Annotation)textSegments.get(0);
            Element document = AceUtils.getChild(root, "document");
            List entities = AceUtils.getChildren(document, "entity");
            for (Element entity : entities) {
                String entityType = AceUtils.getChild(entity, "entity_type").getTextContent();
                if (typeMap.containsKey(entityType)) {
                    entityType = (String)typeMap.get(entityType);
                }
                List entityMentions = AceUtils.getChildren(entity, "entity_mention");
                for (Element entityMention : entityMentions) {
                    String type = entityMention.getAttribute("TYPE");
                    if (!type.equals("NAME")) continue;
                    Element head = AceUtils.getChild(entityMention, "head");
                    Element charseq = AceUtils.getChild(head, "charseq");
                    String startText = AceUtils.getChild(charseq, "start").getTextContent();
                    String endText = AceUtils.getChild(charseq, "end").getTextContent();
                    int start = Integer.parseInt(startText);
                    int end = Integer.parseInt(endText) + 1;
                    end = AceUtils.skipWhitespace(doc, textSegment.span(), end);
                    FeatureSet attrs = new FeatureSet();
                    attrs.put("TYPE", entityType);
                    doc.annotate("ENAMEX", new Span(start, end), attrs);
                }
            }
        }
    }

    private static class Ace2003NameAnnotator
    implements NameAnnotator {
        private static Ace2003NameAnnotator instance = new Ace2003NameAnnotator();

        private Ace2003NameAnnotator() {
        }

        public static Ace2003NameAnnotator getInstance() {
            return instance;
        }

        public void annotate(Document doc, Element root) {
            Element document = AceUtils.getChild(root, "document");
            List entities = AceUtils.getChildren(document, "entity");
            Vector<Annotation> textSegments = doc.annotationsOfType(AceUtils.TEXT_SEGMENT_TAG_NAME);
            Annotation textSegment = (Annotation)textSegments.get(0);
            for (Element entity : entities) {
                String entityType = AceUtils.getChild(entity, "entity_type").getTextContent();
                if (typeMap.containsKey(entityType)) {
                    entityType = (String)typeMap.get(entityType);
                }
                List entityMentions = AceUtils.getChildren(entity, "entity_mention");
                for (Element entityMention : entityMentions) {
                    String type = entityMention.getAttribute("TYPE");
                    if (!type.equals("NAME")) continue;
                    Element head = AceUtils.getChild(entityMention, "head");
                    Element charseq = AceUtils.getChild(head, "charseq");
                    String startText = AceUtils.getChild(charseq, "start").getTextContent();
                    String endText = AceUtils.getChild(charseq, "end").getTextContent();
                    int start = Integer.parseInt(startText);
                    int end = Integer.parseInt(endText) + 1;
                    end = AceUtils.skipWhitespace(doc, textSegment.span(), end);
                    FeatureSet fs = new FeatureSet();
                    fs.put("TYPE", entityType);
                    doc.annotate("ENAMEX", new Span(start, end), fs);
                }
            }
        }
    }

    private static class Ace2005and2004NameAnnotator
    implements NameAnnotator {
        private static Ace2005and2004NameAnnotator instance = new Ace2005and2004NameAnnotator();

        private Ace2005and2004NameAnnotator() {
        }

        public static Ace2005and2004NameAnnotator getInstance() {
            return instance;
        }

        public void annotate(Document doc, Element root) {
            Element document = AceUtils.getChild(root, "document");
            List entities = AceUtils.getChildren(document, "entity");
            Vector<Annotation> textSegments = doc.annotationsOfType(AceUtils.TEXT_SEGMENT_TAG_NAME);
            Annotation textSegment = (Annotation)textSegments.get(0);
            for (Element entity : entities) {
                String type = entity.getAttribute("TYPE");
                if (typeMap.containsKey(type)) {
                    type = (String)typeMap.get(type);
                }
                List entityMentions = AceUtils.getChildren(entity, "entity_mention");
                for (Element entityMention : entityMentions) {
                    String nameType = entityMention.getAttribute("TYPE");
                    if (!nameType.equals("NAM")) continue;
                    Element head = AceUtils.getChild(entityMention, "head");
                    Element charseq = AceUtils.getChild(head, "charseq");
                    int start = Integer.parseInt(charseq.getAttribute("START"));
                    int end = Integer.parseInt(charseq.getAttribute("END"));
                    end = AceUtils.skipWhitespace(doc, textSegment.span(), end + 1);
                    FeatureSet attrs = new FeatureSet();
                    attrs.put("TYPE", type);
                    doc.annotate("ENAMEX", new Span(start, end), attrs);
                }
            }
        }
    }

    private static interface NameAnnotator {
        public void annotate(Document var1, Element var2);
    }
}

