/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.ne;

import edu.nyu.jet.lisp.FeatureSet;
import edu.nyu.jet.ne.Dictionary;
import edu.nyu.jet.ne.DictionaryTagger;
import edu.nyu.jet.ne.NamedEntityAttribute;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.Span;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.Token;
import edu.umass.cs.mallet.base.types.TokenSequence;
import java.util.HashSet;
import java.util.Set;
import java.util.Vector;
import java.util.logging.Logger;

public class NamedEntityInDictionaryFeature
extends Pipe {
    private String prefix;

    public NamedEntityInDictionaryFeature(String prefix) {
        this.prefix = prefix;
    }

    public Instance pipe(Instance carrier) {
        TokenSequence tokens = (TokenSequence)carrier.getData();
        Dictionary dict = (Dictionary)carrier.getProperty("dictionary");
        if (dict == null) {
            return carrier;
        }
        Document doc = (Document)carrier.getProperty("document");
        Span span = (Span)carrier.getProperty("span");
        DictionaryTagger tagger = new DictionaryTagger();
        tagger.setDictionary(dict);
        Logger.global.info(Integer.toString(tokens.size()));
        this.annotateNETokens(doc, tokens);
        tagger.annotate(doc, span);
        Vector<Annotation> neTokens = doc.annotationsOfType("NE_INTERNAL", span);
        assert (tokens.size() == neTokens.size()) : tokens.size() + " != " + neTokens.size();
        for (int i = 0; i < neTokens.size(); ++i) {
            Token token = tokens.getToken(i);
            Annotation neToken = (Annotation)neTokens.get(i);
            Set categories = (Set)neToken.get("categories");
            for (NamedEntityAttribute attr : categories) {
                String type = attr.toString();
                String name = (this.prefix + type).intern();
                token.setFeatureValue(name, 1.0);
            }
        }
        doc.removeAnnotationsOfType("NE_INTERNAL");
        return carrier;
    }

    private void annotateNETokens(Document doc, TokenSequence tokens) {
        for (int i = 0; i < tokens.size(); ++i) {
            Token token = tokens.getToken(i);
            Span span = (Span)token.getProperty("span");
            HashSet categories = new HashSet();
            FeatureSet fs = new FeatureSet();
            fs.put("categories", categories);
            doc.annotate("NE_INTERNAL", span, fs);
        }
    }
}

