/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.bionlpgenesmerger;

import de.julielab.jcore.types.Annotation;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.Protein;
import de.julielab.jcore.utility.index.JCoReOverlapAnnotationIndex;
import java.util.ArrayList;
import java.util.List;
import java.util.OptionalInt;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe BioNLP Gold and Predicted Genes Merge AE", description="Given the gold BioNLP ST gene mentions and other gene mentions - possibly from gene recognizer - merges the two different sources of genes. For simplicity, this component employs two different types to represent genes. The BioNLP ST reader uses the de.julielab.jcore.types.Gene type. The other genes should be realized with de.julielab.jcore.types.Protein annotations.", vendor="JULIE Lab Jena, Germany")
@TypeCapability(inputs={"de.julielab.jcore.types.Gene", "de.julielab.jcore.types.Protein"}, outputs={"de.julielab.jcore.types.Gene"})
public class BioNLPGeneMerger
extends JCasAnnotator_ImplBase {
    private static final Logger log = LoggerFactory.getLogger(BioNLPGeneMerger.class);

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        this.removeFamilyNameAnnotations(aJCas);
        OptionalInt maxGeneIdNumber = StreamSupport.stream(aJCas.getAnnotationIndex(Gene.type).spliterator(), false).map(Annotation::getId).mapToInt(id -> Integer.parseInt(id.substring(1))).max();
        this.handleTpsFns(aJCas);
        this.handleFps(aJCas, maxGeneIdNumber);
        this.removePredictions(aJCas);
    }

    private void removeFamilyNameAnnotations(JCas aJCas) {
        List<Protein> famProts = StreamSupport.stream(aJCas.getAnnotationIndex(Protein.type).spliterator(), false).filter(p -> p.getSpecificType() != null ? p.getSpecificType().equals("FamilyName") || p.getSpecificType().equals("protein_familiy_or_group") : false).collect(Collectors.toList());
        famProts.forEach(TOP::removeFromIndexes);
    }

    private void removePredictions(JCas aJCas) {
        List<Protein> predProts = StreamSupport.stream(aJCas.getAnnotationIndex(Protein.type).spliterator(), false).collect(Collectors.toList());
        predProts.forEach(TOP::removeFromIndexes);
    }

    private void handleFps(JCas aJCas, OptionalInt maxGeneIdNumber) {
        if (maxGeneIdNumber.isPresent()) {
            log.debug("maxTID: {}", (Object)maxGeneIdNumber.getAsInt());
            int newIdCounter = maxGeneIdNumber.getAsInt() + 1;
            JCoReOverlapAnnotationIndex geneIndex = new JCoReOverlapAnnotationIndex(aJCas, Gene.type);
            for (Protein predProt : aJCas.getAnnotationIndex(Protein.type)) {
                List genes4protein = geneIndex.search((org.apache.uima.jcas.tcas.Annotation)predProt);
                if (!genes4protein.isEmpty()) continue;
                Gene falsePositiveGene = new Gene(aJCas, predProt.getBegin(), predProt.getEnd());
                falsePositiveGene.setComponentId("[FP] " + predProt.getComponentId() + " / " + ((Object)((Object)this)).getClass().getSimpleName());
                falsePositiveGene.setId("T" + newIdCounter++);
                log.debug("[FP] {}: {}-{}", new Object[]{falsePositiveGene.getCoveredText(), falsePositiveGene.getBegin(), falsePositiveGene.getEnd()});
                log.debug(falsePositiveGene.getId() + " " + falsePositiveGene.getCoveredText());
                falsePositiveGene.setSpecificType("protein");
                falsePositiveGene.setResourceEntryList(predProt.getResourceEntryList());
                falsePositiveGene.addToIndexes();
            }
        }
    }

    private void handleTpsFns(JCas aJCas) {
        JCoReOverlapAnnotationIndex proteinIndex = new JCoReOverlapAnnotationIndex(aJCas, Protein.type);
        ArrayList<Gene> falseNegatives = new ArrayList<Gene>();
        for (Gene goldGene : aJCas.getAnnotationIndex(Gene.type)) {
            List proteins4gene = proteinIndex.search((org.apache.uima.jcas.tcas.Annotation)goldGene);
            if (!proteins4gene.isEmpty()) {
                for (Protein predProt : proteins4gene) {
                    goldGene.setComponentId("[TP] " + goldGene.getComponentId() + " / " + ((Object)((Object)this)).getClass().getSimpleName());
                    goldGene.setResourceEntryList(predProt.getResourceEntryList());
                    log.debug("[TP] {}: {}-{}", new Object[]{goldGene.getCoveredText(), goldGene.getBegin(), goldGene.getEnd()});
                }
                continue;
            }
            falseNegatives.add(goldGene);
            log.debug("[FN] {}: {}-{}", new Object[]{goldGene.getCoveredText(), goldGene.getBegin(), goldGene.getEnd()});
        }
        falseNegatives.forEach(TOP::removeFromIndexes);
    }
}

