/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.ne;

import edu.nyu.jet.aceJet.AceDocument;
import edu.nyu.jet.aceJet.AceEntity;
import edu.nyu.jet.aceJet.AceEntityMention;
import edu.nyu.jet.hmm.HMMNameTagger;
import edu.nyu.jet.hmm.HMMTagger;
import edu.nyu.jet.hmm.WordFeatureHMMemitter;
import edu.nyu.jet.lex.Tokenizer;
import edu.nyu.jet.lisp.FeatureSet;
import edu.nyu.jet.ne.NameAnnotator;
import edu.nyu.jet.ne.RuleFormatException;
import edu.nyu.jet.ne.TrieDictionary;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.Span;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Evaluate {
    private File inputDir;
    private File outputDir;
    private HMMTagger hmmTagger;
    private HMMNameTagger hmmNameTagger;
    private NameAnnotator annotator = new NameAnnotator();
    private String format;
    private String target;
    private int countOfExtracted;
    private int countOfGold;
    private int countOfExactMatch;
    private int countOfPositionMatch;

    public Evaluate(String inputDirectory, String outputDirectory, Properties props) throws IOException, RuleFormatException {
        this.inputDir = new File(inputDirectory);
        this.outputDir = new File(outputDirectory);
        if (!this.inputDir.isDirectory()) {
            throw new IllegalArgumentException("inputDirectory must be directory");
        }
        if (this.outputDir.exists()) {
            if (!this.outputDir.isDirectory()) {
                throw new IllegalArgumentException("inputDirectory must be directory");
            }
        } else {
            this.outputDir.mkdirs();
        }
        this.init(props);
    }

    private void init(Properties props) throws IOException, RuleFormatException {
        String[] checkParamNames;
        String dictTrie = props.getProperty("ne.dict.trie");
        String dictCdb = props.getProperty("ne.dict.cdb");
        String neHierarchyFile = props.getProperty("ne.hierarchy");
        String hmmPosFile = props.getProperty("hmm.model.pos");
        String hmmNameFile = props.getProperty("hmm.model.ne");
        String ruleFile = props.getProperty("ne.rule");
        String neMapFile = props.getProperty("ne.map");
        this.target = props.getProperty("target");
        this.format = props.getProperty("format");
        for (String name : checkParamNames = new String[]{"ne.dict.trie", "ne.dict.cdb", "ne.hierarchy", "hmm.model.pos", "ne.rule", "format", "target"}) {
            String value = props.getProperty(name);
            if (value != null && value.trim().length() != 0) continue;
            String message = name + " must be specified";
            throw new RuntimeException(message);
        }
        TrieDictionary dict = new TrieDictionary(dictTrie, dictCdb);
        this.annotator.setDictionary(dict);
        this.hmmTagger = new HMMTagger();
        this.hmmTagger.load(hmmPosFile);
        if (hmmNameFile != null) {
            this.hmmNameTagger = new HMMNameTagger(WordFeatureHMMemitter.class);
            this.hmmNameTagger.load(hmmNameFile);
            this.hmmNameTagger.nameHMM.newDocument();
        }
        this.annotator.loadRules(new File(ruleFile));
        this.annotator.loadClassHierarchy(new File(neHierarchyFile));
        if (neMapFile != null) {
            Map<String, String> neMap = this.loadNamedEntityMap(neMapFile);
            this.annotator.setAliasMap(neMap);
        }
    }

    public void run() throws IOException {
        this.countOfGold = 0;
        this.countOfExtracted = 0;
        this.countOfExactMatch = 0;
        this.countOfPositionMatch = 0;
        for (File file : this.inputDir.listFiles()) {
            String filename = file.getPath();
            if (!filename.endsWith(".sgm") && !filename.endsWith(".sgml")) continue;
            Document doc = null;
            if (this.format.equalsIgnoreCase("sgml")) {
                doc = this.readSGML(file);
            } else if (this.format.equalsIgnoreCase("sgml+apf")) {
                File apfFile = new File(file.getParent(), file.getName() + ".apf.xml");
                doc = this.readSGMLAndAPF(file, apfFile);
            } else {
                throw new RuntimeException("Illegal format : " + this.format);
            }
            this.processDocument(doc, file);
        }
        System.out.println("total (exactMatch) :");
        System.out.printf("recall    : %.2f", this.percentage(this.countOfExactMatch, this.countOfGold));
        System.out.println();
        System.out.printf("precision : %.2f", this.percentage(this.countOfExactMatch, this.countOfExtracted));
        System.out.println();
        System.out.println("total: (positionMatch)");
        System.out.printf("recall    : %.2f", this.percentage(this.countOfPositionMatch, this.countOfGold));
        System.out.println();
        System.out.printf("precision : %.2f", this.percentage(this.countOfPositionMatch, this.countOfExtracted));
        System.out.println();
        System.out.println();
    }

    private double percentage(int count, int total) {
        return (double)count / (double)total * 100.0;
    }

    private void processDocument(Document taggedDoc, File file) throws IOException {
        Document doc = new Document(taggedDoc.text());
        doc.annotateWithTag(this.target);
        Vector<Annotation> texts = doc.annotationsOfType(this.target);
        for (Annotation text : texts) {
            Tokenizer.tokenize(doc, text.span());
            this.hmmTagger.tagJet(doc, text.span());
            if (this.hmmNameTagger != null) {
                this.hmmNameTagger.tag(doc, text.span());
            }
            this.annotator.annotate(doc, text.span());
        }
        SortedMap<Span, String> gold = this.extractEntities(taggedDoc);
        SortedMap<Span, String> extracted = this.extractEntities(doc);
        File goldOut = new File(this.outputDir, file.getName() + ".gold");
        File systemOut = new File(this.outputDir, file.getName() + ".out");
        this.writeEntities(taggedDoc, gold, new PrintStream(goldOut));
        this.writeEntities(doc, extracted, new PrintStream(systemOut));
        int exactMatch = 0;
        int positionMatch = 0;
        for (Map.Entry<Span, String> entry : extracted.entrySet()) {
            String val = (String)gold.get(entry.getKey());
            if (val == null) continue;
            ++positionMatch;
            if (!entry.getValue().equals(val)) continue;
            ++exactMatch;
        }
        System.out.println(file.getName() + " (exactMatch) :");
        System.out.printf("recall:    %.2f", this.percentage(exactMatch, gold.size()));
        System.out.println();
        System.out.printf("precision: %.2f", this.percentage(exactMatch, extracted.size()));
        System.out.println();
        System.out.println(file.getName() + " (positionMatch) :");
        System.out.printf("recall:    %.2f", this.percentage(positionMatch, gold.size()));
        System.out.println();
        System.out.printf("precision: %.2f", this.percentage(positionMatch, extracted.size()));
        System.out.println();
        this.countOfGold += gold.size();
        this.countOfExactMatch += exactMatch;
        this.countOfExtracted += extracted.size();
        this.countOfPositionMatch += positionMatch;
    }

    private void writeEntities(Document doc, SortedMap<Span, String> entities, PrintStream out) {
        for (Map.Entry<Span, String> entry : entities.entrySet()) {
            String word = doc.normalizedText(entry.getKey()).replaceAll("\\s+", " ");
            String type = entry.getValue();
            out.print(word);
            out.print(' ');
            out.print(type);
            out.println();
        }
    }

    private Document readSGML(File file) throws IOException {
        String line;
        FileInputStream fin = null;
        BufferedReader in = null;
        fin = new FileInputStream(file);
        in = new BufferedReader(new InputStreamReader((InputStream)fin, "ISO-8859-1"));
        StringBuilder buffer = new StringBuilder();
        while ((line = in.readLine()) != null) {
            buffer.append(line);
            buffer.append('\n');
        }
        Pattern textTagPattern = this.makeTargetTagPattern(this.target);
        Matcher textMatcher = textTagPattern.matcher(buffer);
        Pattern tagPattern = Pattern.compile("<(.*?)>(\\s*)(.*?)</\\1>(\\s*)", 32);
        Matcher tagMatcher = tagPattern.matcher(buffer);
        StringBuilder text = new StringBuilder();
        TreeMap<Span, String> entities = new TreeMap<Span, String>();
        int lastMatchedOffset = 0;
        while (textMatcher.find()) {
            text.append(buffer, lastMatchedOffset, textMatcher.start(1));
            tagMatcher.region(textMatcher.start(), textMatcher.end(1));
            int lastTagMatchedOffset = textMatcher.start();
            while (tagMatcher.find()) {
                text.append(buffer, lastTagMatchedOffset, tagMatcher.start());
                String type = tagMatcher.group(1);
                String headingSpaces = tagMatcher.group(2);
                String word = tagMatcher.group(3);
                String trailingSpaces = tagMatcher.group(4);
                int start = text.length() + headingSpaces.length();
                int end = start + word.length() + trailingSpaces.length();
                entities.put(new Span(start, end), type);
                text.append(headingSpaces);
                text.append(word);
                text.append(trailingSpaces);
                lastTagMatchedOffset = tagMatcher.end();
            }
            text.append(buffer, lastTagMatchedOffset, textMatcher.end());
            lastMatchedOffset = textMatcher.end();
        }
        text.append(buffer, lastMatchedOffset, buffer.length());
        Document doc = new Document(text.toString());
        for (Map.Entry entry : entities.entrySet()) {
            FeatureSet attrs = new FeatureSet();
            attrs.put("TYPE", entry.getValue());
            doc.annotate("ENAMEX", (Span)entry.getKey(), attrs);
        }
        return doc;
    }

    private Document readSGMLAndAPF(File sgml, File apf) {
        AceDocument.ace2004 = false;
        AceDocument.ace2005 = true;
        AceDocument aceDoc = new AceDocument(sgml.getPath(), apf.getPath());
        Document doc = aceDoc.JetDocument();
        ArrayList<AceEntity> entities = aceDoc.entities;
        String originalText = aceDoc.JetDocument().text();
        int[] map = this.computeOffsetMap(originalText);
        for (AceEntity entity : entities) {
            for (AceEntityMention mention : entity.mentions) {
                if (!mention.type.equals("NAME")) continue;
                int start = map[mention.getJetHead().start()];
                int end = map[mention.getJetHead().end()];
                FeatureSet attrs = new FeatureSet();
                attrs.put("TYPE", entity.type);
                Span span = new Span(start, end);
                doc.annotate("ENAMEX", span, attrs);
            }
        }
        return doc;
    }

    private int[] computeOffsetMap(CharSequence text) {
        int len = text.length();
        int[] map = new int[len];
        boolean inTag = false;
        int pos = 0;
        block4: for (int i = 0; i < len; ++i) {
            switch (text.charAt(i)) {
                case '<': {
                    inTag = true;
                    continue block4;
                }
                case '>': {
                    inTag = false;
                    continue block4;
                }
                default: {
                    if (inTag) continue block4;
                    map[pos++] = i;
                }
            }
        }
        int[] result = new int[pos];
        System.arraycopy(map, 0, result, 0, pos);
        return result;
    }

    private SortedMap<Span, String> extractEntities(Document doc) {
        Vector<Annotation> list = doc.annotationsOfType("ENAMEX");
        TreeMap<Span, String> entities = new TreeMap<Span, String>();
        if (list != null) {
            for (Annotation a : list) {
                String type = (String)a.get("TYPE");
                entities.put(a.span(), type);
            }
        }
        return entities;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Map<String, String> loadNamedEntityMap(String filename) throws IOException {
        FileInputStream fin = null;
        BufferedReader in = null;
        HashMap<String, String> map = new HashMap<String, String>();
        try {
            String line;
            fin = new FileInputStream(filename);
            in = new BufferedReader(new InputStreamReader((InputStream)fin, "ISO-8859-1"));
            while ((line = in.readLine()) != null) {
                String[] tmp = line.split("\\s+", 2);
                assert (tmp.length == 2);
                map.put(tmp[0], tmp[1]);
            }
        }
        finally {
            if (in != null) {
                in.close();
            } else if (fin != null) {
                fin.close();
            }
        }
        return map;
    }

    private Pattern makeTargetTagPattern(String type) {
        StringBuilder buffer = new StringBuilder();
        buffer.append("<");
        buffer.append(Pattern.quote(type));
        buffer.append(">(.*?)");
        buffer.append("</");
        buffer.append(Pattern.quote(type));
        buffer.append(">");
        return Pattern.compile(buffer.toString(), 32);
    }

    public static void main(String[] args) throws IOException, RuleFormatException {
        if (args.length != 3) {
            Evaluate.usage();
            System.exit(-1);
        }
        String propFile = args[0];
        String inputDir = args[1];
        String outputDir = args[2];
        Properties props = new Properties();
        try {
            props.load(new FileInputStream(propFile));
        }
        catch (IOException ex) {
            System.err.println(ex.getMessage());
            System.exit(1);
        }
        Evaluate eval = new Evaluate(inputDir, outputDir, props);
        eval.run();
    }

    private static void usage() {
        System.err.printf("Usage: java %s propertyFile inputDir outputDir\n", Evaluate.class.getName());
    }
}

