/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.jnet.uima;

import cc.mallet.fst.CRF;
import cc.mallet.types.Alphabet;
import de.julielab.jcore.ae.jnet.uima.ConsistencyPreservation;
import de.julielab.jcore.ae.jnet.uima.Interval;
import de.julielab.jcore.ae.jnet.uima.NegativeList;
import de.julielab.jcore.types.Abbreviation;
import de.julielab.jcore.types.EntityMention;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationTools;
import de.julielab.jcore.utility.index.JCoReCoverIndex;
import de.julielab.jnet.tagger.NETagger;
import de.julielab.jnet.tagger.Unit;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class EntityAnnotator
extends JCasAnnotator_ImplBase {
    private static final String COMPONENT_ID = "de.julielab.jules.ae.netagger.EntityAnnotator";
    private static final Logger LOGGER = LoggerFactory.getLogger(EntityAnnotator.class);
    private static final String OUTSIDE_LABEL = "O";
    protected static final String ABBREV_PATTERN = "[A-Z]{2,3}s?";
    public Pattern abbrevPattern = null;
    private HashMap<String, String> entityMap;
    private NETagger tagger;
    protected boolean expandAbbr = false;
    protected ConsistencyPreservation consistencyPreservation = null;
    protected float confidenceThresholdForConsistencyPreservation = -1.0f;
    protected boolean showSegmentConf = false;
    protected TreeSet<String> entityMentionTypes = null;
    protected NegativeList negativeList;
    Properties featureConfig = null;
    ArrayList<String> activatedMetas = null;
    ArrayList<FSIterator<Annotation>> annotationIterators = null;
    ArrayList<String> valueMethods = null;
    private String maxEnt_parameter = "maxEnt";
    private String iteration_parameter = "iterations";
    private boolean maxEnt = false;
    private int iterations_number = 0;

    @Override
    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        LOGGER.info("initialize() - initializing JNET...");
        super.initialize(aContext);
        try {
            this.setModel(aContext);
            this.setEntityTypes(aContext);
            this.setShowSegmentConfidence(aContext);
            this.setNegativeList(aContext);
            Object tmp = aContext.getConfigParameterValue("ExpandAbbreviations");
            if (tmp != null) {
                this.expandAbbr = (Boolean)tmp;
            }
            if ((tmp = aContext.getConfigParameterValue("ConsistencyPreservation")) != null) {
                this.consistencyPreservation = new ConsistencyPreservation((String)tmp);
            }
            if ((tmp = aContext.getConfigParameterValue("ConfidenceThresholdForConsistencyPreservation")) != null) {
                this.confidenceThresholdForConsistencyPreservation = ((Float)tmp).floatValue();
            }
            if ((tmp = aContext.getConfigParameterValue("IgnoreNotIntroducedAbbreviations")) != null && ((Boolean)tmp).booleanValue()) {
                this.abbrevPattern = Pattern.compile(ABBREV_PATTERN);
            }
            if ((tmp = aContext.getConfigParameterValue(this.maxEnt_parameter)) != null && ((Boolean)tmp).booleanValue()) {
                this.maxEnt = true;
                this.tagger.set_Max_Ent(this.maxEnt);
            }
            if ((tmp = aContext.getConfigParameterValue(this.iteration_parameter)) != null && (Integer)tmp != null) {
                this.iterations_number = (Integer)tmp;
                this.tagger.set_Number_Iterations(this.iterations_number);
            }
            LOGGER.info("initialize() - abbreviation expansion: " + this.expandAbbr);
            LOGGER.info("initialize() - negative list: " + (this.negativeList != null));
            LOGGER.info("initialize() - show confidence: " + this.showSegmentConf);
            LOGGER.info("initialize() - consistency preservation: " + (this.consistencyPreservation != null ? this.consistencyPreservation.toString() : "none"));
            LOGGER.info("initialize() - ignore not introduces abbreviations: " + (this.abbrevPattern != null));
        }
        catch (AnnotatorContextException e) {
            throw new ResourceInitializationException(e);
        }
        catch (AnnotatorConfigurationException e) {
            throw new ResourceInitializationException(e);
        }
        catch (AnnotatorInitializationException e) {
            throw new ResourceInitializationException(e);
        }
    }

    private void retrieveMetaInformation(JCas aJCas) throws AnalysisEngineProcessException {
        JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
        this.featureConfig = this.tagger.getFeatureConfig();
        this.activatedMetas = new ArrayList();
        this.annotationIterators = new ArrayList();
        this.valueMethods = new ArrayList();
        Enumeration<Object> keys = this.featureConfig.keys();
        while (keys.hasMoreElements()) {
            String key = (String)keys.nextElement();
            String meta = "";
            if (key.matches("[A-Za-z]+_feat_enabled") && this.featureConfig.getProperty(key).matches("true")) {
                meta = key.substring(0, key.indexOf("_feat_enabled"));
                this.activatedMetas.add(meta);
            }
            if (!key.matches("[A-Za-z]+_feat_valMethod")) continue;
            meta = key.substring(0, key.indexOf("_feat_valMethod"));
            this.valueMethods.add(this.featureConfig.getProperty(key));
        }
        for (int i = 0; i < this.activatedMetas.size(); ++i) {
            Annotation ann = null;
            try {
                String typeClassName = this.featureConfig.getProperty(this.activatedMetas.get(i) + "_feat_data");
                typeClassName = typeClassName.replaceAll("jules", "jcore");
                ann = JCoReAnnotationTools.getAnnotationByClassName(aJCas, typeClassName);
                this.annotationIterators.add(indexes.getAnnotationIndex(ann.getTypeIndexID()).iterator());
                continue;
            }
            catch (Exception e) {
                throw new AnalysisEngineProcessException(e);
            }
        }
    }

    private void setEntityTypes(UimaContext aContext) throws ResourceInitializationException, AnnotatorContextException, AnnotatorConfigurationException {
        this.entityMentionTypes = new TreeSet();
        Object o = aContext.getConfigParameterValue("EntityTypes");
        if (o == null) {
            LOGGER.error("setEntityTypes() - descriptor incomplete, entity types not specified!");
            throw new AnnotatorConfigurationException();
        }
        String[] entityTypes = (String[])o;
        this.entityMap = new HashMap();
        for (int i = 0; i < entityTypes.length; ++i) {
            String[] entityParts = entityTypes[i].split("=");
            this.entityMap.put(entityParts[0], entityParts[1]);
            this.entityMentionTypes.add(entityParts[1]);
        }
        CRF model = (CRF)this.tagger.getModel();
        int j = 0;
        if (model != null) {
            Alphabet alpha = model.getOutputAlphabet();
            Object[] modelLabels = alpha.toArray();
            for (int i = 0; i < entityTypes.length; ++i) {
                String[] entityParts = entityTypes[i].split("=");
                boolean entityFound = false;
                for (j = 0; j < modelLabels.length; ++j) {
                    if (!entityParts[0].equals(modelLabels[j])) continue;
                    entityFound = true;
                }
                if (entityFound) continue;
                LOGGER.error("setEntityTypes() - Could not find entity label \"{}\" from descriptor in the tagger's OutputAlphabet.", (Object)entityParts[0]);
                throw new AnnotatorConfigurationException();
            }
        }
        LOGGER.debug("Entity mention types: " + this.entityMentionTypes.toString());
    }

    private void setModel(UimaContext aContext) throws AnnotatorConfigurationException, AnnotatorContextException, AnnotatorInitializationException {
        String modelFilename = "";
        Object o = aContext.getConfigParameterValue("ModelFilename");
        if (o == null) {
            LOGGER.error("setModel() - descriptor incomplete, no model file specified!");
            throw new AnnotatorConfigurationException();
        }
        modelFilename = (String)o;
        this.tagger = new NETagger();
        try {
            InputStream is;
            File modelPath = new File(modelFilename);
            if (modelPath.exists()) {
                LOGGER.info("Loading model from file {}", (Object)modelPath);
                is = new FileInputStream(modelPath);
            } else {
                Object cpResource = modelFilename.startsWith("/") ? modelFilename : "/" + modelFilename;
                is = this.getClass().getResourceAsStream((String)cpResource);
                if (is != null) {
                    LOGGER.info("Loading model from classpath location {}", cpResource);
                }
            }
            this.tagger.readModel(is);
        }
        catch (Exception e) {
            LOGGER.error("setModel() - Could not load JNET model: " + e.getMessage(), e);
            throw new AnnotatorInitializationException();
        }
    }

    private void setNegativeList(UimaContext aContext) throws AnnotatorConfigurationException, AnnotatorContextException {
        Object o = aContext.getConfigParameterValue("NegativeList");
        if (o != null) {
            File listFile = new File((String)o);
            try {
                InputStream is;
                if (listFile.exists()) {
                    LOGGER.debug("setNegativeList() - using negative list from file: {}", (Object)listFile);
                    is = new FileInputStream(listFile);
                } else {
                    Object cpResource = (String)o;
                    if (!((String)cpResource).startsWith("/")) {
                        cpResource = "/" + (String)cpResource;
                    }
                    if (null != (is = this.getClass().getResourceAsStream((String)cpResource))) {
                        LOGGER.info("Read negative list from classpath location {}", cpResource);
                    }
                }
                this.negativeList = new NegativeList(is);
            }
            catch (IOException e) {
                LOGGER.error("setNegativeList() - specified negative list file cannot be read: " + e.getMessage());
                throw new AnnotatorConfigurationException(e);
            }
        } else {
            LOGGER.info("No negative list file given.");
        }
    }

    private void setShowSegmentConfidence(UimaContext aContext) throws AnnotatorContextException {
        Object o = aContext.getConfigParameterValue("ShowSegmentConfidence");
        if (o != null) {
            this.showSegmentConf = (Boolean)o;
        }
        LOGGER.debug("setShowSegmentConfidence() - show segment confidence: " + this.showSegmentConf);
    }

    @Override
    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        LOGGER.debug("process() - processing next document");
        JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
        this.retrieveMetaInformation(aJCas);
        FSIterator sentenceIter = indexes.getAnnotationIndex(Sentence.type).iterator();
        JCoReCoverIndex<Token> tokenIndex = new JCoReCoverIndex<Token>(aJCas, Token.type);
        JCoReCoverIndex<Abbreviation> abbreviationIndex = new JCoReCoverIndex<Abbreviation>(aJCas, Abbreviation.type);
        while (sentenceIter.hasNext()) {
            Sentence sentence = (Sentence)sentenceIter.next();
            List<Token> tokenList = tokenIndex.search(sentence).collect(Collectors.toList());
            ArrayList<HashMap<String, String>> metaList = this.getMetaList(tokenList);
            if (tokenList.size() != metaList.size()) {
                LOGGER.error("process() - token list, and meta list for this sentence not of same size!");
                throw new AnalysisEngineProcessException();
            }
            de.julielab.jnet.tagger.Sentence unitSentence = this.createUnitSentence(tokenList, aJCas, metaList, abbreviationIndex, tokenIndex);
            LOGGER.debug("process() - original sentence: " + sentence.getCoveredText());
            StringBuffer unitS = new StringBuffer();
            for (Unit unit : unitSentence.getUnits()) {
                unitS.append(unit.getRep() + " ");
            }
            LOGGER.debug("process() - sentence for prediction: " + unitSentence.toString());
            try {
                this.tagger.predict(unitSentence, this.showSegmentConf);
            }
            catch (IllegalStateException e) {
                LOGGER.error("process() - predicting with JNET failed: " + e.getMessage());
                throw new AnalysisEngineProcessException();
            }
            if (this.expandAbbr) {
                unitSentence = this.removeDuplicatedTokens(unitSentence);
            }
            LOGGER.debug("process() - sentence with labels: " + unitSentence.toString());
            this.writeToCAS(unitSentence, aJCas, abbreviationIndex);
        }
        if (this.consistencyPreservation != null) {
            LOGGER.debug("process() - running consistency preservation");
            this.consistencyPreservation.stringMatch(aJCas, this.entityMentionTypes, this.confidenceThresholdForConsistencyPreservation);
            this.consistencyPreservation.acroMatch(aJCas, this.entityMentionTypes);
        }
    }

    protected de.julielab.jnet.tagger.Sentence removeDuplicatedTokens(de.julielab.jnet.tagger.Sentence unitSentence) {
        de.julielab.jnet.tagger.Sentence newUnitSentence = new de.julielab.jnet.tagger.Sentence();
        Object lastPos = null;
        Unit lastUnit = null;
        TreeSet<String> lastLabels = new TreeSet<String>();
        for (int k = 0; k < unitSentence.getUnits().size(); ++k) {
            Unit unit = unitSentence.get(k);
            lastLabels.add(unit.getLabel());
            String currPos = unit.begin + "@" + unit.end;
            if (lastPos != null && ((String)lastPos).equals(currPos)) {
                lastLabels.add(unit.getLabel());
                if (lastLabels.size() > 1) {
                    lastUnit.setLabel(OUTSIDE_LABEL);
                }
            } else {
                newUnitSentence.add(unit);
                lastLabels = new TreeSet();
                lastLabels.add(unit.getLabel());
            }
            lastPos = currPos;
            lastUnit = unit;
        }
        return newUnitSentence;
    }

    protected de.julielab.jnet.tagger.Sentence createUnitSentence(List<Token> tokenList, JCas JCas2, ArrayList<HashMap<String, String>> metaList, JCoReCoverIndex<Abbreviation> abbreviationIndex, JCoReCoverIndex<Token> tokenIndex) {
        de.julielab.jnet.tagger.Sentence unitSentence = new de.julielab.jnet.tagger.Sentence();
        ArrayList<Abbreviation> abbreviationList = this.getAbbreviationList(tokenList, JCas2, abbreviationIndex);
        for (int i = 0; i < tokenList.size(); ++i) {
            Unit unit;
            Token token = tokenList.get(i);
            HashMap<String, String> metas = metaList.get(i);
            Abbreviation abbreviation = abbreviationList.get(i);
            String tokenRepresentation = token.getCoveredText();
            if (this.expandAbbr && abbreviation != null) {
                tokenRepresentation = abbreviation.getDefinedHere() ? null : abbreviation.getTextReference().getCoveredText();
            }
            if (tokenRepresentation == null) continue;
            if (tokenRepresentation.equals(token.getCoveredText())) {
                Unit unit2 = new Unit(token.getBegin(), token.getEnd(), tokenRepresentation, "", metas);
                unitSentence.add(unit2);
                continue;
            }
            List abbrevTokens = tokenIndex.search(abbreviation.getTextReference()).collect(Collectors.toList());
            if (abbreviation.getTextReference().getCoveredText().length() > 0 && abbrevTokens.size() == 0) {
                StringTokenizer st = new StringTokenizer(tokenRepresentation);
                while (st.hasMoreTokens()) {
                    String fullformToken = st.nextToken();
                    unit = new Unit(token.getBegin(), token.getEnd(), fullformToken, "", metas);
                    unitSentence.add(unit);
                }
                continue;
            }
            for (Token abbrevToken : abbrevTokens) {
                unit = new Unit(token.getBegin(), token.getEnd(), abbrevToken.getCoveredText(), "", metas);
                unitSentence.add(unit);
            }
        }
        if (this.expandAbbr) {
            unitSentence = this.removeConsecutiveBrackets(unitSentence);
        }
        return unitSentence;
    }

    private de.julielab.jnet.tagger.Sentence removeConsecutiveBrackets(de.julielab.jnet.tagger.Sentence unitSentence) {
        de.julielab.jnet.tagger.Sentence finalUnitSentence = new de.julielab.jnet.tagger.Sentence();
        for (int i = 0; i < unitSentence.getUnits().size(); ++i) {
            Unit currentUnit = unitSentence.getUnits().get(i);
            if (i + 1 < unitSentence.getUnits().size()) {
                Unit nextUnit = unitSentence.getUnits().get(i + 1);
                if (currentUnit.getRep().equals("(") && nextUnit.getRep().equals(")") || currentUnit.getRep().equals("[") && nextUnit.getRep().equals("]")) {
                    ++i;
                    continue;
                }
            }
            finalUnitSentence.add(currentUnit);
        }
        return finalUnitSentence;
    }

    private ArrayList<Abbreviation> getAbbreviationList(List<Token> tokenList, JCas JCas2, JCoReCoverIndex<Abbreviation> abbreviationIndex) {
        ArrayList<Abbreviation> abbreviationList = new ArrayList<Abbreviation>();
        for (Token token : tokenList) {
            List abbreviations = abbreviationIndex.search(token).collect(Collectors.toList());
            if (abbreviations != null && abbreviations.size() > 0) {
                abbreviationList.add((Abbreviation)abbreviations.get(0));
                continue;
            }
            abbreviationList.add(null);
        }
        return abbreviationList;
    }

    private ArrayList<HashMap<String, String>> getMetaList(List<Token> tokenList) {
        ArrayList<HashMap<String, String>> metaList = new ArrayList<HashMap<String, String>>();
        Interval[] metaAnnotationValues = new Interval[this.activatedMetas.size()];
        for (int i = 0; i < metaAnnotationValues.length; ++i) {
            metaAnnotationValues[i] = null;
        }
        for (Token token : tokenList) {
            metaList.add(this.getMetas(token, metaAnnotationValues));
        }
        return metaList;
    }

    private HashMap<String, String> getMetas(Token token, Interval[] metaAnnotationValues) {
        int i = 0;
        HashMap<String, String> metaInfos = new HashMap<String, String>();
        if (this.featureConfig == null) {
            return metaInfos;
        }
        try {
            for (i = 0; i < this.annotationIterators.size(); ++i) {
                if (!this.annotationIterators.get(i).hasNext() || metaAnnotationValues[i] != null) continue;
                Annotation ann = (Annotation)this.annotationIterators.get(i).next();
                String valueMethodName = this.valueMethods.get(i);
                Method valueMethod = ann.getClass().getMethod(valueMethodName, new Class[0]);
                metaAnnotationValues[i] = new Interval(ann.getBegin(), ann.getEnd(), "" + valueMethod.invoke((Object)ann, (Object[])null));
            }
            for (i = 0; i < this.activatedMetas.size(); ++i) {
                Interval annotationInterval = metaAnnotationValues[i];
                String metaName = this.featureConfig.getProperty(this.activatedMetas.get(i) + "_feat_unit");
                if (annotationInterval == null || !annotationInterval.isIn(token.getBegin(), token.getEnd())) continue;
                if (this.featureConfig.getProperty(this.activatedMetas.get(i) + "_begin_flag").equals("true") && annotationInterval.getBegin() == token.getBegin()) {
                    metaInfos.put(metaName, "B_" + metaAnnotationValues[i].getAnnotation());
                } else {
                    metaInfos.put(metaName, metaAnnotationValues[i].getAnnotation());
                }
                if (annotationInterval.getEnd() != token.getEnd()) continue;
                metaAnnotationValues[i] = null;
            }
        }
        catch (Exception e) {
            LOGGER.warn("getMetas() - failed getting meta information for current token. No metas used!");
            metaInfos = new HashMap();
        }
        return metaInfos;
    }

    public void writeToCAS(de.julielab.jnet.tagger.Sentence unitSentence, JCas aJCas, JCoReCoverIndex<Abbreviation> abbreviationIndex) {
        String lastLabel = OUTSIDE_LABEL;
        int lastStart = 0;
        int lastEnd = 0;
        double conf = -1.0;
        double lastConf = -1.0;
        Unit unit = null;
        for (int i = 0; i < unitSentence.size(); ++i) {
            unit = unitSentence.get(i);
            String label = unit.getLabel();
            conf = unit.getConfidence();
            if (lastLabel.equals(OUTSIDE_LABEL) && !label.equals(OUTSIDE_LABEL)) {
                lastStart = unit.begin;
            } else if (!lastLabel.equals(OUTSIDE_LABEL) && !label.equals(OUTSIDE_LABEL) && !lastLabel.equals(label) || !lastLabel.equals(OUTSIDE_LABEL) && label.equals(OUTSIDE_LABEL)) {
                this.addAnnotation(aJCas, lastStart, lastEnd, lastLabel, lastConf, abbreviationIndex);
                lastStart = unit.begin;
            }
            lastLabel = label;
            lastEnd = unit.end;
            lastConf = conf;
            if (i != unitSentence.size() - 1 || label.equals(OUTSIDE_LABEL)) continue;
            lastEnd = unit.end;
            this.addAnnotation(aJCas, lastStart, lastEnd, lastLabel, lastConf, abbreviationIndex);
        }
    }

    private void addAnnotation(JCas aJCas, int start, int end, String label, double confidence, JCoReCoverIndex<Abbreviation> abbreviationIndex) {
        String coveredText = aJCas.getDocumentText().substring(start, end);
        if (this.ignoreLabel(aJCas, start, end, abbreviationIndex)) {
            return;
        }
        if (this.negativeList != null && this.negativeList.contains(coveredText, label)) {
            LOGGER.debug("addAnnotation() - ignoring current entity mention as contained in negativeList");
            return;
        }
        EntityMention entity = null;
        String entityType = this.entityMap.get(label);
        if (entityType != null) {
            try {
                entity = (EntityMention)JCoReAnnotationTools.getAnnotationByClassName(aJCas, entityType);
                entity.setBegin(start);
                entity.setEnd(end);
                entity.setTextualRepresentation(aJCas.getDocumentText().substring(start, end));
                entity.setSpecificType(label);
                entity.setComponentId(COMPONENT_ID);
                if (this.showSegmentConf) {
                    entity.setConfidence("" + confidence);
                }
                entity.addToIndexes();
            }
            catch (Exception e) {
                LOGGER.error("addAnnotation() - could not generate new EntityMention", e);
            }
        } else {
            LOGGER.debug("addAnnotation() - ommitted entity mention for label: " + label);
        }
    }

    protected boolean ignoreLabel(JCas aJCas, int start, int end, JCoReCoverIndex<Abbreviation> abbreviationIndex) {
        String coveredText = aJCas.getDocumentText().substring(start, end);
        if (this.abbrevPattern != null && this.abbrevPattern.matcher(coveredText).matches()) {
            List abbreviations = abbreviationIndex.search(start, end).collect(Collectors.toList());
            if (abbreviations != null && abbreviations.size() > 0) {
                LOGGER.debug("ignoreLabel() - found JACRO-recognized abbreviations under this string: " + coveredText);
                for (Abbreviation abbreviation : abbreviations) {
                    if (abbreviation.getTextReference() == null || !abbreviation.getCoveredText().matches(ABBREV_PATTERN)) continue;
                    LOGGER.debug("ignoreLabel() - abbreviation: " + abbreviation.getCoveredText() + " introduced for: " + abbreviation.getTextReference().getCoveredText());
                    return false;
                }
            }
            LOGGER.debug("ignoreLabel() - ignoring annotations on " + coveredText + " because it is a not introduced abbreviation!");
            return true;
        }
        return false;
    }
}

