/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.gene.candidateretrieval.scoring;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Instance;
import cc.mallet.types.LabelAlphabet;
import de.julielab.gene.candidateretrieval.GeneRecordHit;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorerPairExtractor;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.geneexpbase.scoring.SimpleScorer;
import de.julielab.geneexpbase.scoring.TokenJaroSimilarity;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MaxEntScorerFeaturePipe
extends Pipe
implements Serializable {
    public static long featureCreationTime = 0L;
    private static final long serialVersionUID = 1L;
    private static final Logger LOGGER = LoggerFactory.getLogger(MaxEntScorerFeaturePipe.class);
    private final String GREEK = "(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";
    private final String GREEK_ALPHA = "alpha";
    private final String NUMBER = "[0-9]+";
    private final String ONE = "1";
    private final String CHAR = "[a-z]";
    private final String ALPHA = "[a-z]+";
    private final String MOL_WEIGHT = "p [0-9][0-9]?";
    private final boolean lexicalize = true;
    private final boolean debug = false;
    private final String MODIFIER = "(receptor|tranporter|regulator|inhibitor|activator|suppressor|enhancer|repressor|adaptor|interactor|modulator|mediator|inducer|effector|coactivator|supressor|integrator|facilitator|binder|terminator|acceptor|proactivator|exchanger|enhancer|adapter|responder|modifier|ligand|cofactor|tranporting|regulating|inhibiting|activating|suppressing|enhancing|repressing|adapting|interacting|modulating|mediating|inducing|effecting|coactivating|supressing|integrating|facilitating|binding|terminating|accepting|responding|proactivating|exchanging|enhancing|adapting|modifying|coreceptor|cotranporter|coregulator|coinhibitor|coactivator|cosuppressor|coenhancer|corepressor|coadaptor|cointeractor|comodulator|comediator|coinducer|coeffector|coactivator|cointegrator|cofacilitator|cobinder|coterminator|coacceptor|proactivator|coexchanger|coenhancer|coadapter|coresponder|comodifier|coligand|cofactor)";
    private final String NON_DESCRIPTIVE = "(fragment|antigen|precursor|protein|chain|domain|gene|homolog|homologue|isoform|isolog|isotype|motif|ortholog|precursor|precursors|product|sequence|subtype|subunit)";
    private final TokenJaroSimilarity jaroSim = new TokenJaroSimilarity();

    public MaxEntScorerFeaturePipe() {
        super(new Alphabet(), (Alphabet)new LabelAlphabet());
    }

    public MaxEntScorerFeaturePipe(Alphabet dataAlphabet) {
        super(dataAlphabet, null);
    }

    public Instance pipe(Instance carrier) {
        long time = 0L;
        if (Scorer.SCORING_TIME != null) {
            time = System.nanoTime();
        }
        String featurePrefix = "";
        Object[] pair = (Object[])carrier.getData();
        Map featureMap = (Map)pair[0];
        GeneRecordHit synHit = (GeneRecordHit)((Object)pair[1]);
        float label = ((Float)pair[2]).floatValue();
        featurePrefix = (String)pair[3];
        String geneText = synHit.getMappedGeneName().getText();
        this.setFeatures(geneText, synHit.getSynonym(), "", featureMap, 1);
        if (Scorer.SCORING_TIME != null) {
            featureCreationTime += System.nanoTime() - time;
        }
        return carrier;
    }

    public void setFeatures(String textGeneMention, String synhitSynonm, String featurePrefix, Map<String, Double> featureMap, int normConstant) {
        if (textGeneMention == null || textGeneMention.isBlank() || synhitSynonm == null || synhitSynonm.isBlank()) {
            return;
        }
        MaxEntScorerPairExtractor ext = new MaxEntScorerPairExtractor();
        String[][] results = ext.compareStrings(textGeneMention, synhitSynonm);
        String[] allBigramsTerm1 = this.allBigrams(textGeneMention);
        String[] allBigramsTerm2 = this.allBigrams(synhitSynonm);
        String[] diffBigrams = this.differentBigrams(textGeneMention, synhitSynonm);
        String[] commonBigrams = this.commonBigrams(textGeneMention, synhitSynonm);
        String[] diffTrigrams = this.differentTrigrams(textGeneMention, synhitSynonm);
        String[] commonTrigrams = this.commonTrigrams(textGeneMention, synhitSynonm);
        boolean term1HasMolWeight = false;
        boolean term2HasMolWeight = false;
        for (String bigram1 : allBigramsTerm1) {
            if (!bigram1.matches("p [0-9][0-9]?")) continue;
            term1HasMolWeight = true;
        }
        for (String bigram2 : allBigramsTerm2) {
            if (!bigram2.matches("p [0-9][0-9]?")) continue;
            term2HasMolWeight = true;
        }
        for (String bigram : diffBigrams) {
            if (!bigram.matches("p [0-9][0-9]?") || !term1HasMolWeight || !term2HasMolWeight) continue;
            featureMap.merge(featurePrefix + "DIFF_MOL_WEIGHT", 1.0 / (double)normConstant, Double::sum);
        }
        for (String bigram : commonBigrams) {
            featureMap.merge(featurePrefix + "COMMON_BIGRAM=" + bigram, 1.0 / (double)normConstant, Double::sum);
            if (!bigram.matches("p [0-9][0-9]?")) continue;
            featureMap.merge(featurePrefix + "SAME_MOL_WEIGHT", 1.0 / (double)normConstant, Double::sum);
        }
        for (String trigram : commonTrigrams) {
            featureMap.merge(featurePrefix + "COMMON_TRIGRAM=" + trigram, 1.0 / (double)normConstant, Double::sum);
        }
        double simpleScore = new SimpleScorer().getScore(textGeneMention, synhitSynonm);
        if (simpleScore == 1.0) {
            featureMap.merge(featurePrefix + "SIMPLESCORE=1", 1.0 / (double)normConstant, Double::sum);
        } else if (simpleScore >= 0.9) {
            featureMap.merge(featurePrefix + "SIMPLESCORE>=0.9", 1.0 / (double)normConstant, Double::sum);
        } else if (simpleScore >= 0.8) {
            featureMap.merge(featurePrefix + "SIMPLESCORE>=0.8", 1.0 / (double)normConstant, Double::sum);
        } else if (simpleScore >= 0.7) {
            featureMap.merge(featurePrefix + "SIMPLESCORE>=0.7", 1.0 / (double)normConstant, Double::sum);
        } else if (simpleScore >= 0.6) {
            featureMap.merge(featurePrefix + "SIMPLESCORE>=0.6", 1.0 / (double)normConstant, Double::sum);
        } else if (simpleScore >= 0.5) {
            featureMap.merge(featurePrefix + "SIMPLESCORE>=0.5", 1.0 / (double)normConstant, Double::sum);
        } else if (simpleScore >= 0.3) {
            featureMap.merge(featurePrefix + "SIMPLESCORE>=0.3", 1.0 / (double)normConstant, Double::sum);
        }
        if (textGeneMention.indexOf(synhitSynonm) > -1 || synhitSynonm.indexOf(textGeneMention) > -1) {
            featureMap.merge(featurePrefix + "SUBSTRING", 1.0 / (double)normConstant, Double::sum);
        }
        int transpositions = this.jaroSim.getTokenTranspositions(textGeneMention, synhitSynonm);
        featureMap.merge(featurePrefix + "TRANSPOSITIONS=" + transpositions, 1.0 / (double)normConstant, Double::sum);
        HashMap<String, Integer> sames = new HashMap<String, Integer>();
        for (int j = 0; j < results[0].length; ++j) {
            String sameToken = results[0][j];
            if (sameToken.matches("[0-9]+")) {
                this.add2HashMap(sames, "SAME_NUM");
                continue;
            }
            if (sameToken.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                this.add2HashMap(sames, "SAME_GREEK");
                continue;
            }
            if (sameToken.matches("[a-z]")) continue;
            if (sameToken.matches("[a-z]+")) {
                this.add2HashMap(sames, "SAME_ALPHA");
                continue;
            }
            if (sameToken.matches("(receptor|tranporter|regulator|inhibitor|activator|suppressor|enhancer|repressor|adaptor|interactor|modulator|mediator|inducer|effector|coactivator|supressor|integrator|facilitator|binder|terminator|acceptor|proactivator|exchanger|enhancer|adapter|responder|modifier|ligand|cofactor|tranporting|regulating|inhibiting|activating|suppressing|enhancing|repressing|adapting|interacting|modulating|mediating|inducing|effecting|coactivating|supressing|integrating|facilitating|binding|terminating|accepting|responding|proactivating|exchanging|enhancing|adapting|modifying|coreceptor|cotranporter|coregulator|coinhibitor|coactivator|cosuppressor|coenhancer|corepressor|coadaptor|cointeractor|comodulator|comediator|coinducer|coeffector|coactivator|cointegrator|cofacilitator|cobinder|coterminator|coacceptor|proactivator|coexchanger|coenhancer|coadapter|coresponder|comodifier|coligand|cofactor)")) {
                this.add2HashMap(sames, "SAME_MODIFIER");
                continue;
            }
            if (sameToken.matches("(fragment|antigen|precursor|protein|chain|domain|gene|homolog|homologue|isoform|isolog|isotype|motif|ortholog|precursor|precursors|product|sequence|subtype|subunit)")) continue;
            sames.put("SAME_STRING=" + sameToken, 1);
        }
        for (String key : sames.keySet()) {
            int count = sames.get(key);
            featureMap.merge(featurePrefix + key + "=" + count, 1.0 / (double)normConstant, Double::sum);
        }
        int numOfSames = results[0].length;
        featureMap.merge(featurePrefix + "NUM_OF_SAMES=" + numOfSames, 1.0 / (double)normConstant, Double::sum);
        if (results[0].length == 1) {
            String onlySame = results[0][0];
            if (onlySame.matches("[0-9]+")) {
                featureMap.merge(featurePrefix + "ONLY_SAME_NUMBER", 1.0 / (double)normConstant, Double::sum);
            } else if (onlySame.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                featureMap.merge(featurePrefix + "ONLY_SAME_GREEK", 1.0 / (double)normConstant, Double::sum);
            } else if (onlySame.matches("[a-z]")) {
                featureMap.merge(featurePrefix + "ONLY_SAME_CHAR", 1.0 / (double)normConstant, Double::sum);
            } else if (onlySame.matches("[a-z]+")) {
                featureMap.merge(featurePrefix + "ONLY_SAME_ALPHA", 1.0 / (double)normConstant, Double::sum);
            } else if (!onlySame.matches("(receptor|tranporter|regulator|inhibitor|activator|suppressor|enhancer|repressor|adaptor|interactor|modulator|mediator|inducer|effector|coactivator|supressor|integrator|facilitator|binder|terminator|acceptor|proactivator|exchanger|enhancer|adapter|responder|modifier|ligand|cofactor|tranporting|regulating|inhibiting|activating|suppressing|enhancing|repressing|adapting|interacting|modulating|mediating|inducing|effecting|coactivating|supressing|integrating|facilitating|binding|terminating|accepting|responding|proactivating|exchanging|enhancing|adapting|modifying|coreceptor|cotranporter|coregulator|coinhibitor|coactivator|cosuppressor|coenhancer|corepressor|coadaptor|cointeractor|comodulator|comediator|coinducer|coeffector|coactivator|cointegrator|cofacilitator|cobinder|coterminator|coacceptor|proactivator|coexchanger|coenhancer|coadapter|coresponder|comodifier|coligand|cofactor)") && !onlySame.matches("(fragment|antigen|precursor|protein|chain|domain|gene|homolog|homologue|isoform|isolog|isotype|motif|ortholog|precursor|precursors|product|sequence|subtype|subunit)")) {
                sames.put("ONLY_SAME_STRING=" + onlySame, 1);
            }
        }
        HashMap<String, Integer> diffs = new HashMap<String, Integer>();
        for (int j = 0; j < results[1].length; ++j) {
            String diffToken = results[1][j];
            if (diffToken.matches("[0-9]+")) {
                this.add2HashMap(diffs, "DIFF_NUM");
                continue;
            }
            if (diffToken.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                this.add2HashMap(diffs, "DIFF_GREEK");
                continue;
            }
            if (diffToken.matches("[a-z]")) {
                this.add2HashMap(diffs, "DIFF_CHAR");
                continue;
            }
            if (diffToken.matches("[a-z]+")) {
                this.add2HashMap(diffs, "DIFF_ALPHA");
                continue;
            }
            if (diffToken.matches("(receptor|tranporter|regulator|inhibitor|activator|suppressor|enhancer|repressor|adaptor|interactor|modulator|mediator|inducer|effector|coactivator|supressor|integrator|facilitator|binder|terminator|acceptor|proactivator|exchanger|enhancer|adapter|responder|modifier|ligand|cofactor|tranporting|regulating|inhibiting|activating|suppressing|enhancing|repressing|adapting|interacting|modulating|mediating|inducing|effecting|coactivating|supressing|integrating|facilitating|binding|terminating|accepting|responding|proactivating|exchanging|enhancing|adapting|modifying|coreceptor|cotranporter|coregulator|coinhibitor|coactivator|cosuppressor|coenhancer|corepressor|coadaptor|cointeractor|comodulator|comediator|coinducer|coeffector|coactivator|cointegrator|cofacilitator|cobinder|coterminator|coacceptor|proactivator|coexchanger|coenhancer|coadapter|coresponder|comodifier|coligand|cofactor)")) {
                this.add2HashMap(diffs, "DIFF_MODIFIER");
                continue;
            }
            if (diffToken.matches("(fragment|antigen|precursor|protein|chain|domain|gene|homolog|homologue|isoform|isolog|isotype|motif|ortholog|precursor|precursors|product|sequence|subtype|subunit)")) continue;
            diffs.put("DIFF_STRING=" + diffToken, 1);
        }
        for (String key : diffs.keySet()) {
            int count = diffs.get(key);
            featureMap.merge(featurePrefix + key + "=" + count, 1.0 / (double)normConstant, Double::sum);
        }
        featureMap.merge(featurePrefix + "NUM_OF_DIFFS=" + results[1].length, 1.0 / (double)normConstant, Double::sum);
        if (results[1].length == 1) {
            String onlyDiff = results[1][0];
            if (onlyDiff.matches("1")) {
                featureMap.merge(featurePrefix + "ONLY_DIFF_ONE", 1.0 / (double)normConstant, Double::sum);
            } else if (onlyDiff.matches("[0-9]+")) {
                featureMap.merge(featurePrefix + "ONLY_DIFF_NUMBER", 1.0 / (double)normConstant, Double::sum);
            } else if (onlyDiff.matches("alpha")) {
                featureMap.merge(featurePrefix + "ONLY_DIFF_GREEK_ALPHA", 1.0 / (double)normConstant, Double::sum);
            } else if (onlyDiff.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                featureMap.merge(featurePrefix + "ONLY_DIFF_GREEK", 1.0 / (double)normConstant, Double::sum);
            } else if (onlyDiff.matches("[a-z]+")) {
                featureMap.merge(featurePrefix + "ONLY_DIFF_ALPHA", 1.0 / (double)normConstant, Double::sum);
            } else if (onlyDiff.matches("(receptor|tranporter|regulator|inhibitor|activator|suppressor|enhancer|repressor|adaptor|interactor|modulator|mediator|inducer|effector|coactivator|supressor|integrator|facilitator|binder|terminator|acceptor|proactivator|exchanger|enhancer|adapter|responder|modifier|ligand|cofactor|tranporting|regulating|inhibiting|activating|suppressing|enhancing|repressing|adapting|interacting|modulating|mediating|inducing|effecting|coactivating|supressing|integrating|facilitating|binding|terminating|accepting|responding|proactivating|exchanging|enhancing|adapting|modifying|coreceptor|cotranporter|coregulator|coinhibitor|coactivator|cosuppressor|coenhancer|corepressor|coadaptor|cointeractor|comodulator|comediator|coinducer|coeffector|coactivator|cointegrator|cofacilitator|cobinder|coterminator|coacceptor|proactivator|coexchanger|coenhancer|coadapter|coresponder|comodifier|coligand|cofactor)")) {
                featureMap.merge(featurePrefix + "ONLY_DIFF_MODIFIER", 1.0 / (double)normConstant, Double::sum);
            } else if (onlyDiff.matches("(fragment|antigen|precursor|protein|chain|domain|gene|homolog|homologue|isoform|isolog|isotype|motif|ortholog|precursor|precursors|product|sequence|subtype|subunit)")) {
                featureMap.merge(featurePrefix + "ONLY_DIFF_NON_DESCRIPTIVE", 1.0 / (double)normConstant, Double::sum);
            } else {
                sames.put("ONLY_DIFF_STRING=" + onlyDiff, 1);
            }
        }
        int lenDiff = Math.abs(textGeneMention.split(" ").length - synhitSynonm.split(" ").length);
        featureMap.merge(featurePrefix + "LENGTHDIFF=" + lenDiff, 1.0 / (double)normConstant, Double::sum);
        int maxLen = Math.max(textGeneMention.split(" ").length, synhitSynonm.split(" ").length);
        double relLenDiff = 1.0 - (double)lenDiff / (double)maxLen;
        if (relLenDiff >= 0.9) {
            featureMap.merge(featurePrefix + "RELLENGTHDIFF>=0.9", 1.0 / (double)normConstant, Double::sum);
        } else if (relLenDiff >= 0.7) {
            featureMap.merge(featurePrefix + "RELLENGTHDIFF>=0.7", 1.0 / (double)normConstant, Double::sum);
        } else if (relLenDiff >= 0.5) {
            featureMap.merge(featurePrefix + "RELLENGTHDIFF>=0.5", 1.0 / (double)normConstant, Double::sum);
        } else {
            featureMap.merge(featurePrefix + "RELLENGTHDIFF<0.5", 1.0 / (double)normConstant, Double::sum);
        }
    }

    private ArrayList<String> makeBigrams(String term) {
        String[] split = term.split(" ");
        ArrayList<String> bigrams = new ArrayList<String>();
        for (int i = 1; i < split.length; ++i) {
            Object bigram = split[i - 1] + " " + split[i];
            bigram = ((String)bigram).trim();
            bigrams.add((String)bigram);
        }
        return bigrams;
    }

    private String[] allBigrams(String term) {
        ArrayList<String> bigrams = this.makeBigrams(term);
        String[] bigramArray = bigrams.toArray(new String[0]);
        return bigramArray;
    }

    private String[] commonBigrams(String term1, String term2) {
        ArrayList<String> commons = new ArrayList<String>();
        ArrayList<String> bigrams1 = this.makeBigrams(term1);
        String[] bigramList1 = bigrams1.toArray(new String[0]);
        ArrayList<String> bigrams2 = this.makeBigrams(term2);
        String[] bigramList2 = bigrams2.toArray(new String[0]);
        for (String bigram1 : bigramList1) {
            if (!bigrams2.contains(bigram1)) continue;
            commons.add(bigram1);
        }
        for (String bigram2 : bigramList2) {
            if (!bigrams1.contains(bigram2) || commons.contains(bigram2)) continue;
            commons.add(bigram2);
        }
        return commons.toArray(new String[0]);
    }

    private ArrayList<String> makeCharTrigrams(String term) {
        StringBuilder sb = new StringBuilder(term);
        ArrayList<String> trigrams = new ArrayList<String>();
        for (int i = 2; i < sb.length(); ++i) {
            String trigram = "" + sb.charAt(i - 2) + sb.charAt(i - 1) + sb.charAt(i);
            trigrams.add(trigram);
        }
        return trigrams;
    }

    private String[] commonCharTrigrams(String term1, String term2) {
        ArrayList<String> commons = new ArrayList<String>();
        ArrayList<String> trigrams1 = this.makeCharTrigrams(term1);
        String[] trigramList1 = trigrams1.toArray(new String[0]);
        ArrayList<String> trigrams2 = this.makeCharTrigrams(term2);
        String[] trigramList2 = trigrams2.toArray(new String[0]);
        for (String trigram1 : trigramList1) {
            if (!trigrams2.contains(trigram1)) continue;
            commons.add(trigram1);
        }
        for (String trigram2 : trigramList2) {
            if (!trigrams1.contains(trigram2) || commons.contains(trigram2)) continue;
            commons.add(trigram2);
        }
        return commons.toArray(new String[0]);
    }

    private ArrayList<String> makeTrigrams(String term) {
        String[] split = term.split(" ");
        ArrayList<String> trigrams = new ArrayList<String>();
        for (int i = 2; i < split.length; ++i) {
            Object trigram = split[i - 2] + " " + split[i - 1] + " " + split[i];
            trigram = ((String)trigram).trim();
            trigrams.add((String)trigram);
        }
        return trigrams;
    }

    private String[] commonTrigrams(String term1, String term2) {
        ArrayList<String> commons = new ArrayList<String>();
        ArrayList<String> trigrams1 = this.makeTrigrams(term1);
        String[] trigramList1 = trigrams1.toArray(new String[0]);
        ArrayList<String> trigrams2 = this.makeTrigrams(term2);
        String[] trigramList2 = trigrams2.toArray(new String[0]);
        for (String trigram1 : trigramList1) {
            if (!trigrams2.contains(trigram1)) continue;
            commons.add(trigram1);
        }
        for (String trigram2 : trigramList2) {
            if (!trigrams1.contains(trigram2) || commons.contains(trigram2)) continue;
            commons.add(trigram2);
        }
        return commons.toArray(new String[0]);
    }

    private String[] differentBigrams(String term1, String term2) {
        ArrayList<String> differents = new ArrayList<String>();
        ArrayList<String> bigrams1 = this.makeBigrams(term1);
        String[] bigramList1 = bigrams1.toArray(new String[0]);
        ArrayList<String> bigrams2 = this.makeBigrams(term2);
        String[] bigramList2 = bigrams2.toArray(new String[0]);
        for (String bigram1 : bigramList1) {
            if (bigrams2.contains(bigram1)) continue;
            differents.add(bigram1);
        }
        for (String bigram2 : bigramList2) {
            if (bigrams1.contains(bigram2) || differents.contains(bigram2)) continue;
            differents.add(bigram2);
        }
        return differents.toArray(new String[0]);
    }

    private String[] differentTrigrams(String term1, String term2) {
        ArrayList<String> differents = new ArrayList<String>();
        ArrayList<String> trigrams1 = this.makeTrigrams(term1);
        String[] trigramList1 = trigrams1.toArray(new String[0]);
        ArrayList<String> trigrams2 = this.makeTrigrams(term2);
        String[] trigramList2 = trigrams2.toArray(new String[0]);
        for (String trigram1 : trigramList1) {
            if (trigrams2.contains(trigram1)) continue;
            differents.add(trigram1);
        }
        for (String trigram2 : trigramList2) {
            if (trigrams1.contains(trigram2) || differents.contains(trigram2)) continue;
            differents.add(trigram2);
        }
        return differents.toArray(new String[0]);
    }

    private void add2HashMap(HashMap<String, Integer> map, String key) {
        int count = 0;
        if (map.containsKey(key)) {
            count = map.get(key);
        }
        map.put(key, ++count);
    }
}

