/*
 * Decompiled with CFR 0.152.
 */
package GNormPluslib;

import GNormPluslib.GNPProcessingData;
import GNormPluslib.GNormPlus;
import java.io.IOException;
import java.lang.invoke.CallSite;
import java.math.RoundingMode;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;

public class GN {
    public static HashMap<String, String> MatchedTokens_hash = new HashMap();
    private GNPProcessingData data;

    public GN(GNPProcessingData data) {
        this.data = data;
    }

    private double ScoringFunction(String geneid, HashMap<String, String> Mention_hash, String LF) {
        LF = LF.toLowerCase();
        LF = LF.replaceAll("([0-9])([a-z])", "$1 $2");
        LF = LF.replaceAll("([a-z])([0-9])", "$1 $2");
        LF = LF.replaceAll("([\\W\\-\\_])", " ");
        LF = LF.replaceAll("[ ]+", " ");
        String[] LF_tkn = LF.split(" ");
        int LF_ParticalMatch = 0;
        Pattern ptmp = Pattern.compile("[0-9]+\\-([0-9]+)");
        Matcher mtmp = ptmp.matcher((CharSequence)geneid);
        Pattern ptmp2 = Pattern.compile("([0-9]+)");
        Matcher mtmp2 = ptmp.matcher((CharSequence)geneid);
        geneid = mtmp.find() ? "Homo:" + mtmp.group(1) : "Gene:" + (String)geneid;
        if (GNormPlus.GeneScoring_hash.containsKey(geneid)) {
            HashMap<String, Double> TF = new HashMap<String, Double>();
            HashMap<String, Double> TermFrequency = new HashMap<String, Double>();
            String[] l = GNormPlus.GeneScoring_hash.get(geneid).split("\t");
            String[] tkns_Gene = l[0].split(",");
            for (int i = 0; i < tkns_Gene.length; ++i) {
                String[] Tkn_Freq = tkns_Gene[i].split("-");
                TermFrequency.put(Tkn_Freq[0], Double.parseDouble(Tkn_Freq[1]));
            }
            Double Cj = Double.parseDouble(l[1]);
            Double AllTknNum = Double.parseDouble(l[2]);
            Double Norm = Double.parseDouble(l[5]);
            if (Norm == 0.0) {
                Norm = 1.0;
            }
            for (String Mention : Mention_hash.keySet()) {
                Mention = Mention.toLowerCase();
                Mention = Mention.replaceAll("([0-9])([a-z])", "$1 $2");
                Mention = Mention.replaceAll("([a-z])([0-9])", "$1 $2");
                Mention = Mention.replaceAll("([\\W\\-\\_])", " ");
                Mention = Mention.replaceAll("[ ]+", " ");
                String[] tkns_Mention = Mention.split(" ");
                for (int i = 0; i < tkns_Mention.length; ++i) {
                    if (!TermFrequency.containsKey(tkns_Mention[i])) continue;
                    TF.put(tkns_Mention[i], (Double)TermFrequency.get(tkns_Mention[i]));
                }
            }
            Double score = 0.0;
            for (String Tkn : TF.keySet()) {
                for (int t = 0; t < LF_tkn.length; ++t) {
                    if (!LF_tkn[t].equals(Tkn)) continue;
                    ++LF_ParticalMatch;
                }
                double TFij = (Double)TF.get(Tkn) / AllTknNum;
                double IDFi = GNormPlus.GeneScoringDF_hash.get(Tkn);
                score = score + TFij * IDFi * (1.0 / (1.0 - TFij));
            }
            if (LF_ParticalMatch > 0) {
                score = score + (double)LF_ParticalMatch;
            }
            return score;
        }
        return 0.0;
    }

    public void PreProcessing4GN(String Filename, String FilenameBioC) throws IOException, XMLStreamException {
        for (int i = 0; i < this.data.getBioCDocobj().Annotations.size(); ++i) {
            for (int j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j).size(); ++k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\t");
                    String start = anno[0];
                    String last = anno[1];
                    Object mentions = anno[2];
                    String type = anno[3];
                    String id = "";
                    if (anno.length >= 5) {
                        id = anno[4];
                    }
                    if (!type.equals("Gene")) continue;
                    String[] mentionArr = ((String)mentions).split("\\|");
                    boolean update = false;
                    for (int m = 0; m < mentionArr.length; ++m) {
                        Pattern ptmp = Pattern.compile("^(.*[0-9A-Z])[ ]*p$");
                        Matcher mtmp = ptmp.matcher(mentionArr[m]);
                        Pattern ptmp2 = Pattern.compile("^(.+)nu$");
                        Matcher mtmp2 = ptmp2.matcher(mentionArr[m]);
                        Pattern ptmp3 = Pattern.compile("^(.*)alpha(.*)$");
                        Matcher mtmp3 = ptmp3.matcher(mentionArr[m]);
                        Pattern ptmp4 = Pattern.compile("^(.*)beta(.*)$");
                        Matcher mtmp4 = ptmp4.matcher(mentionArr[m]);
                        Pattern ptmp5 = Pattern.compile("^(.+[0-9])a$");
                        Matcher mtmp5 = ptmp5.matcher(mentionArr[m]);
                        Pattern ptmp6 = Pattern.compile("^(.+[0-9])b$");
                        Matcher mtmp6 = ptmp6.matcher(mentionArr[m]);
                        Pattern ptmp7 = Pattern.compile("^(.+)II([a-z])$");
                        Matcher mtmp7 = ptmp7.matcher(mentionArr[m]);
                        Pattern ptmp8 = Pattern.compile("^(.+)III([a-z])$");
                        Matcher mtmp8 = ptmp8.matcher(mentionArr[m]);
                        if (mtmp.find()) {
                            mentions = (String)mentions + "|" + mtmp.group(1);
                            update = true;
                        }
                        if (mtmp2.find()) {
                            mentions = (String)mentions + "|" + mtmp2.group(1);
                            update = true;
                        }
                        if (mtmp3.find()) {
                            mentions = (String)mentions + "|" + mtmp3.group(1) + "a" + mtmp3.group(2);
                            update = true;
                        }
                        if (mtmp4.find()) {
                            mentions = (String)mentions + "|" + mtmp4.group(1) + "b" + mtmp4.group(2);
                            update = true;
                        }
                        if (mtmp5.find()) {
                            mentions = (String)mentions + "|" + mtmp5.group(1) + "alpha";
                            update = true;
                        }
                        if (mtmp6.find()) {
                            mentions = (String)mentions + "|" + mtmp6.group(1) + "beta";
                            update = true;
                        }
                        if (mtmp7.find()) {
                            mentions = (String)mentions + "|" + mtmp7.group(1) + "2" + mtmp7.group(2);
                            update = true;
                        }
                        if (!mtmp8.find()) continue;
                        mentions = (String)mentions + "|" + mtmp8.group(1) + "3" + mtmp8.group(2);
                        update = true;
                    }
                    if (!update) continue;
                    this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, start + "\t" + last + "\t" + (String)mentions + "\t" + type + "\t" + id);
                }
            }
        }
    }

    public void ChromosomeRecognition(String Filename, String FilenameBioC) throws IOException, XMLStreamException {
        for (int i = 0; i < this.data.getBioCDocobj().PMIDs.size(); ++i) {
            String Pmid = this.data.getBioCDocobj().PMIDs.get(i);
            for (int j = 0; j < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j) {
                String PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j);
                ArrayList<String> locations = GNormPlus.PT_GeneChromosome.SearchMentionLocation(PassageContext, "ChromosomeLocation");
                for (int k = 0; k < locations.size(); ++k) {
                    String[] anno = locations.get(k).split("\t");
                    String ids = anno[3];
                    String[] IDs = ids.split("[\\|,]");
                    for (int idcount = 0; idcount < IDs.length; ++idcount) {
                        this.data.getPmid2ChromosomeGene_hash().put(Pmid + "\t" + IDs[idcount], "");
                    }
                }
            }
        }
    }

    public void GeneNormalization(String Filename, String FilenameBioC, boolean GeneIDMatch) throws IOException, XMLStreamException {
        DecimalFormat df = new DecimalFormat("0.####");
        df.setRoundingMode(RoundingMode.HALF_UP);
        for (int i = 0; i < this.data.getBioCDocobj().Annotations.size(); ++i) {
            int j;
            int j2;
            String[] MT;
            Object GeneMentionTax2;
            Object mentions;
            String Pmid = this.data.getBioCDocobj().PMIDs.get(i);
            HashMap<String, String> Species_hash = new HashMap<String, String>();
            for (int j3 = 0; j3 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j3) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j3).size(); ++k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j3).get(k).split("\t");
                    String mentions2 = anno[2];
                    String type = anno[3];
                    if (!type.matches("(Species|Genus|Strain|CellLine|Cell)")) continue;
                    Species_hash.put(mentions2, "");
                }
            }
            Object tiabs = "";
            for (int j4 = 0; j4 < this.data.getBioCDocobj().PassageContexts.get(i).size(); ++j4) {
                tiabs = (String)tiabs + this.data.getBioCDocobj().PassageContexts.get(i).get(j4).toLowerCase();
            }
            HashMap GeneMention_hash = new HashMap();
            HashMap<String, String> Mention_hash = new HashMap<String, String>();
            for (int j5 = 0; j5 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j5) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j5).size(); ++k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j5).get(k).split("\t");
                    String start = anno[0];
                    Object last = anno[1];
                    mentions = anno[2];
                    Object type = anno[3];
                    Object taxids = "Tax:9606";
                    if (anno.length >= 5) {
                        taxids = anno[4];
                    }
                    String mentions_tmp = ((String)mentions).toLowerCase();
                    mentions_tmp = mentions_tmp.replaceAll("[\\W\\-\\_]", "");
                    mentions_tmp = mentions_tmp.replaceAll("[0-9]", "0");
                    if (((String)(taxids = ((String)taxids).replaceAll("(Focus|Right|Left|Prefix|Tax):", ""))).equals("")) {
                        taxids = "9606";
                    }
                    boolean found_filter = false;
                    if (this.data.getFiltering_hash().containsKey(mentions_tmp)) {
                        found_filter = true;
                    }
                    if (!found_filter) {
                        for (String f : GNormPlus.Filtering_WithLongForm_hash.keySet()) {
                            String lf;
                            if (!this.data.getBioCDocobj().Annotations.get(i).get(j5).get(k).matches(".*[\\t\\|]" + f + "\tGene.*") && !this.data.getBioCDocobj().Annotations.get(i).get(j5).get(k).matches(".*\\t" + f + "\\|[^\t]+\tGene.*") || !((String)tiabs).matches(".*" + (lf = GNormPlus.Filtering_WithLongForm_hash.get(f)) + ".*")) continue;
                            found_filter = true;
                            break;
                        }
                    }
                    if (!found_filter && (this.data.getBioCDocobj().Annotations.get(i).get(j5).get(k).matches(".*[\\t\\|][a-z]\tGene.*") || this.data.getBioCDocobj().Annotations.get(i).get(j5).get(k).matches(".*\\t[a-z]\\|[^\t]+\tGene.*"))) {
                        found_filter = true;
                    }
                    if (found_filter) continue;
                    if (((String)type).matches("Gene")) {
                        if (GeneMention_hash.containsKey((String)mentions + "\t" + (String)taxids)) {
                            ((HashMap)GeneMention_hash.get((String)mentions + "\t" + (String)taxids)).put(start + "\t" + (String)last, "");
                            continue;
                        }
                        HashMap<CallSite, String> offset_hash = new HashMap<CallSite, String>();
                        offset_hash.put((CallSite)((Object)(start + "\t" + (String)last)), "");
                        GeneMention_hash.put((CallSite)((Object)((String)mentions + "\t" + (String)taxids)), offset_hash);
                        ((HashMap)GeneMention_hash.get((String)mentions + "\t" + (String)taxids)).put("type", type);
                        Mention_hash.put((String)mentions, "Gene");
                        continue;
                    }
                    if (!((String)type).matches("(FamilyName|DomainMotif)")) continue;
                    String[] GMs = ((String)mentions).split("\\|");
                    for (int g = 0; g < GMs.length; ++g) {
                        String mention = GMs[g];
                        Mention_hash.put(mention, "FamilyDomain");
                    }
                }
            }
            HashMap<Object, String> GuaranteedGene2ID = new HashMap<Object, String>();
            HashMap<Object, String> MultiGene2ID = new HashMap<Object, String>();
            block8: for (Object GeneMentionTax2 : GeneMention_hash.keySet()) {
                String[] GT = ((String)GeneMentionTax2).split("\\t");
                mentions = GT[0];
                String taxids = GT[1];
                String[] GMs = ((String)mentions).split("\\|");
                HashMap<String, String> taxids_hash = new HashMap<String, String>();
                String[] taxids_arr = taxids.split(",");
                for (int t = 0; t < taxids_arr.length; ++t) {
                    taxids_hash.put(taxids_arr[t], "");
                }
                for (int ms = 0; ms < GMs.length; ++ms) {
                    String mention = GMs[ms];
                    String IDstr = GNormPlus.PT_Gene.MentionMatch(mention);
                    String[] IDs = IDstr.split("\\|");
                    for (int c = 0; c < IDs.length; ++c) {
                        String[] tax2ID = IDs[c].split(":");
                        if (!taxids_hash.containsKey(tax2ID[0])) continue;
                        String geneid = tax2ID[1];
                        String TargetTax = tax2ID[0];
                        ((HashMap)GeneMention_hash.get(GeneMentionTax2)).put("ID", geneid);
                        ((HashMap)GeneMention_hash.get(GeneMentionTax2)).put("TargetTax", TargetTax);
                        break;
                    }
                    if (((HashMap)GeneMention_hash.get(GeneMentionTax2)).containsKey("ID")) {
                        Pattern ptmp = Pattern.compile("\\*([0-9]+(\\-[0-9]+|))");
                        Matcher mtmp = ptmp.matcher((CharSequence)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID"));
                        if (mtmp.find()) {
                            ((HashMap)GeneMention_hash.get(GeneMentionTax2)).put("ID", mtmp.group(1));
                            GuaranteedGene2ID.put(GeneMentionTax2, mtmp.group(1));
                        } else if (((String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID")).matches("[0-9]+(\\-[0-9]+|)")) {
                            GuaranteedGene2ID.put(GeneMentionTax2, (String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID"));
                        } else {
                            String[] ID = ((String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID")).split(",");
                            boolean FoundByChroLoca = false;
                            for (int idcount = 0; idcount < ID.length; ++idcount) {
                                if (!this.data.getPmid2ChromosomeGene_hash().containsKey(Pmid + "\t" + ID[idcount])) continue;
                                GuaranteedGene2ID.put(GeneMentionTax2, ID[idcount]);
                                FoundByChroLoca = true;
                                break;
                            }
                            if (!FoundByChroLoca) {
                                MultiGene2ID.put(GeneMentionTax2, (String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID"));
                            }
                        }
                    }
                    if (GNormPlus.suffixprefix_orig2modified.containsKey(mention) && !IDstr.equals("-1") && !IDstr.equals("-2") && !IDstr.equals("-3")) continue block8;
                }
            }
            for (String GeneMentionTax_M : MultiGene2ID.keySet()) {
                for (String GeneMentionTax_G : GuaranteedGene2ID.keySet()) {
                    String[] MG = ((String)MultiGene2ID.get(GeneMentionTax_M)).split(",");
                    for (int m = 0; m < MG.length; ++m) {
                        if (!MG[m].equals(GuaranteedGene2ID.get(GeneMentionTax_G))) continue;
                        ((HashMap)GeneMention_hash.get(GeneMentionTax_M)).put("ID", MG[m]);
                    }
                }
            }
            for (Object GeneMentionTax2 : GeneMention_hash.keySet()) {
                String GeneMentionTax_Abb;
                MT = ((String)GeneMentionTax2).split("\\t");
                if (!this.data.getPmidLF2Abb_hash().containsKey(Pmid + "\t" + MT[0]) || !GeneMention_hash.containsKey(GeneMentionTax_Abb = this.data.getPmidLF2Abb_hash().get(Pmid + "\t" + MT[0]) + "\t" + MT[1]) || !((HashMap)GeneMention_hash.get(GeneMentionTax2)).containsKey("ID")) continue;
                ((HashMap)GeneMention_hash.get(GeneMentionTax_Abb)).put("ID", (String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID"));
            }
            for (Object GeneMentionTax2 : GeneMention_hash.keySet()) {
                if (!((HashMap)GeneMention_hash.get(GeneMentionTax2)).containsKey("ID") || !((String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID")).matches(".+,.+")) continue;
                String geneids = (String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID");
                String[] geneid = geneids.split(",");
                String OutputStyle = "Top1";
                if (OutputStyle.equals("Top1")) {
                    double max_score = 0.0;
                    String target_geneid = "";
                    for (int g = 0; g < geneid.length; ++g) {
                        double score;
                        String[] MT2 = ((String)GeneMentionTax2).split("\\t");
                        String LF = "";
                        if (this.data.getPmidAbb2LF_hash().containsKey(Pmid + "\t" + MT2[0])) {
                            LF = this.data.getPmidAbb2LF_hash().get(Pmid + "\t" + MT2[0]);
                        }
                        if ((score = this.ScoringFunction(geneid[g], Mention_hash, LF)) > max_score) {
                            max_score = score;
                            target_geneid = geneid[g];
                            continue;
                        }
                        if (score != 0.0) continue;
                    }
                    ((HashMap)GeneMention_hash.get(GeneMentionTax2)).put("ID", target_geneid);
                    continue;
                }
                Object geneSTR = "";
                for (int g = 0; g < geneid.length; ++g) {
                    String[] MT3 = ((String)GeneMentionTax2).split("\\t");
                    String LF = "";
                    if (this.data.getPmidAbb2LF_hash().containsKey(Pmid + "\t" + MT3[0])) {
                        LF = this.data.getPmidAbb2LF_hash().get(Pmid + "\t" + MT3[0]);
                    }
                    double score = this.ScoringFunction(geneid[g], Mention_hash, LF);
                    String hoge = df.format(score);
                    score = Double.parseDouble(hoge);
                    geneSTR = ((String)geneSTR).equals("") ? geneid[g] + "-" + score : (String)geneSTR + "," + geneid[g] + "-" + score;
                }
                ((HashMap)GeneMention_hash.get(GeneMentionTax2)).put("ID", geneSTR);
            }
            for (Object GeneMentionTax2 : GeneMention_hash.keySet()) {
                String GeneMentionTax_LF;
                MT = ((String)GeneMentionTax2).split("\\t");
                if (!this.data.getPmidAbb2LF_hash().containsKey(Pmid + "\t" + MT[0]) || !GeneMention_hash.containsKey(GeneMentionTax_LF = this.data.getPmidAbb2LF_hash().get(Pmid + "\t" + MT[0]) + "\t" + MT[1]) || !((HashMap)GeneMention_hash.get(GeneMentionTax2)).containsKey("ID")) continue;
                ((HashMap)GeneMention_hash.get(GeneMentionTax_LF)).put("ID", (String)((HashMap)GeneMention_hash.get(GeneMentionTax2)).get("ID"));
            }
            ArrayList<Object> removeGMT = new ArrayList<Object>();
            GeneMentionTax2 = GeneMention_hash.keySet().iterator();
            while (GeneMentionTax2.hasNext()) {
                String GeneMentionTax3 = (String)GeneMentionTax2.next();
                String[] GT = GeneMentionTax3.split("\\t");
                String mentions3 = GT[0];
                String tax = GT[1];
                if (!((HashMap)GeneMention_hash.get(GeneMentionTax3)).containsKey("type") || !((String)((HashMap)GeneMention_hash.get(GeneMentionTax3)).get("type")).equals("Gene") || !((HashMap)GeneMention_hash.get(GeneMentionTax3)).containsKey("ID")) continue;
                String type = (String)((HashMap)GeneMention_hash.get(GeneMentionTax3)).get("type");
                String id = (String)((HashMap)GeneMention_hash.get(GeneMentionTax3)).get("ID");
                Object geneid = "";
                Pattern ptmp1 = Pattern.compile("^([0-9]+)\\-([0-9]+)$");
                Pattern ptmp2 = Pattern.compile("^([0-9]+)$");
                Matcher mtmp1 = ptmp1.matcher(id);
                Matcher mtmp2 = ptmp2.matcher(id);
                if (mtmp1.find()) {
                    geneid = "Homo:" + mtmp1.group(2);
                } else if (mtmp2.find()) {
                    geneid = "Gene:" + mtmp2.group(1);
                }
                boolean LongFormTknMatch = false;
                boolean LongFormExist = true;
                if (GNormPlus.GeneScoring_hash.containsKey(geneid)) {
                    if (this.data.getPmidAbb2LF_lc_hash().containsKey(Pmid + "\t" + mentions3.toLowerCase())) {
                        String[] l = GNormPlus.GeneScoring_hash.get(geneid).split("\t");
                        String[] tkns_Gene = l[0].split(",");
                        ArrayList<String> tkn_lexicon = new ArrayList<String>();
                        for (int ti = 0; ti < tkns_Gene.length; ++ti) {
                            String[] Tkn_Freq = tkns_Gene[ti].split("-");
                            tkn_lexicon.add(Tkn_Freq[0]);
                        }
                        String LF_lc = this.data.getPmidAbb2LF_lc_hash().get(Pmid + "\t" + mentions3.toLowerCase());
                        LF_lc = LF_lc.replaceAll("([0-9])([A-Za-z])", "$1 $2");
                        LF_lc = LF_lc.replaceAll("([A-Za-z])([0-9])", "$1 $2");
                        String[] tkn_mention = LF_lc.split("[\\W\\-\\_]");
                        for (int tl = 0; tl < tkn_lexicon.size(); ++tl) {
                            for (int tm = 0; tm < tkn_mention.length; ++tm) {
                                if (!((String)tkn_lexicon.get(tl)).equals(tkn_mention[tm]) || tkn_mention[tm].matches("[0-9]+")) continue;
                                LongFormTknMatch = true;
                            }
                        }
                    } else {
                        LongFormExist = false;
                    }
                } else {
                    LongFormTknMatch = true;
                }
                if (!LongFormTknMatch && LongFormExist) {
                    removeGMT.add(GeneMentionTax3);
                    removeGMT.add(this.data.getPmidAbb2LF_hash().get(Pmid + "\t" + mentions3) + "\t" + tax);
                    continue;
                }
                if (mentions3.length() > 2 || LongFormExist) continue;
                removeGMT.add(GeneMentionTax3);
            }
            for (int gmti = 0; gmti < removeGMT.size(); ++gmti) {
                GeneMention_hash.remove(removeGMT.get(gmti));
            }
            for (int j6 = 0; j6 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j6) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j6).size(); ++k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j6).get(k).split("\t");
                    String start = anno[0];
                    String last = anno[1];
                    String mentions4 = anno[2];
                    String type = anno[3];
                    String taxid_org = "Tax:9606";
                    if (anno.length >= 5) {
                        taxid_org = anno[4];
                    }
                    String taxids = taxid_org.replaceAll("(Focus|Right|Left|Prefix|Tax):", "");
                    String[] GMs = mentions4.split("\\|");
                    if (GeneMention_hash.containsKey(mentions4 + "\t" + taxids) && ((HashMap)GeneMention_hash.get(mentions4 + "\t" + taxids)).containsKey("TargetTax")) {
                        String taxtype = taxid_org.replaceAll(":([0-9,]+)", "");
                        String taxid = (String)((HashMap)GeneMention_hash.get(mentions4 + "\t" + taxids)).get("TargetTax");
                        this.data.getBioCDocobj().Annotations.get(i).get(j6).set(k, start + "\t" + last + "\t" + mentions4 + "\t" + type + "\t" + taxtype + ":" + taxid);
                    }
                    if (!type.equals("Gene")) continue;
                    this.data.getBioCDocobj().Annotations.get(i).get(j6).set(k, this.data.getBioCDocobj().Annotations.get(i).get(j6).get(k) + "|");
                    if (GeneMention_hash.containsKey(mentions4 + "\t" + taxids) && ((HashMap)GeneMention_hash.get(mentions4 + "\t" + taxids)).containsKey("ID")) {
                        this.data.getBioCDocobj().Annotations.get(i).get(j6).set(k, this.data.getBioCDocobj().Annotations.get(i).get(j6).get(k) + (String)((HashMap)GeneMention_hash.get(mentions4 + "\t" + taxids)).get("ID") + ",");
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j6).set(k, this.data.getBioCDocobj().Annotations.get(i).get(j6).get(k).substring(0, this.data.getBioCDocobj().Annotations.get(i).get(j6).get(k).length() - 1));
                }
            }
            HashMap<String, String> GeneMentions = new HashMap<String, String>();
            HashMap<CallSite, String> GeneMentionLocation = new HashMap<CallSite, String>();
            for (j2 = 0; j2 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j2) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j2).size(); ++k) {
                    int s;
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j2).get(k).split("\t");
                    int start = Integer.parseInt(anno[0]);
                    int last = Integer.parseInt(anno[1]);
                    String mentions5 = anno[2];
                    String type = anno[3];
                    String id = "Tax:9606";
                    if (anno.length >= 5) {
                        id = anno[4];
                    }
                    if (type.equals("Gene") && id.matches("(Focus|Right|Left|Prefix|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)")) {
                        GeneMentions.put(mentions5.toLowerCase(), id);
                        for (s = start; s <= last; ++s) {
                            GeneMentionLocation.put((CallSite)((Object)(j2 + "\t" + s)), "");
                        }
                        continue;
                    }
                    if (!type.equals("Gene") || !id.matches("(Focus|Right|Left|Prefix|Tax)\\:([0-9]+)\\|([0-9]+)")) continue;
                    GeneMentions.put(mentions5.toLowerCase(), id);
                    for (s = start; s <= last; ++s) {
                        GeneMentionLocation.put((CallSite)((Object)(j2 + "\t" + s)), "");
                    }
                }
            }
            for (j2 = 0; j2 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j2) {
                if (this.data.getBioCDocobj().PassageContexts.size() <= i || this.data.getBioCDocobj().PassageContexts.get(i).size() <= j2) continue;
                String PassageContexts = " " + this.data.getBioCDocobj().PassageContexts.get(i).get(j2) + " ";
                Object PassageContexts_tmp = PassageContexts.toLowerCase();
                for (String gm : GeneMentions.keySet()) {
                    String id = (String)GeneMentions.get(gm);
                    if (gm.length() < 3) continue;
                    gm = gm.replaceAll("[ ]*[\\|]*$", "");
                    gm = gm.replaceAll("^[\\|]*[ ]*", "");
                    if ((gm = gm.replaceAll("[\\|][\\|]+", "\\|")).matches("[\\W\\-\\_]*")) continue;
                    gm = gm.replaceAll("([^A-Za-z0-9\\| ])", "\\\\$1");
                    Pattern ptmp = Pattern.compile("^(.*[\\W\\-\\_])(" + gm + ")([\\W\\-\\_].*)$");
                    Matcher mtmp = ptmp.matcher((CharSequence)PassageContexts_tmp);
                    while (mtmp.find()) {
                        String pre = mtmp.group(1);
                        String gmtmp = mtmp.group(2);
                        String post = mtmp.group(3);
                        int start = pre.length() - 1;
                        int last = start + gmtmp.length();
                        if (PassageContexts.length() >= last + 1) {
                            String mention = PassageContexts.substring(start + 1, last + 1);
                            if (!GeneMentionLocation.containsKey(j2 + "\t" + start) && !GeneMentionLocation.containsKey(j2 + "\t" + last)) {
                                this.data.getBioCDocobj().Annotations.get(i).get(j2).add(start + "\t" + last + "\t" + mention + "\tGene\t" + id);
                            }
                        }
                        gmtmp = gmtmp.replaceAll(".", "\\@");
                        PassageContexts_tmp = pre + gmtmp + post;
                        mtmp = ptmp.matcher((CharSequence)PassageContexts_tmp);
                    }
                }
            }
            HashMap<String, String> geneids = new HashMap<String, String>();
            for (j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j).size(); ++k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\t");
                    String type = anno[3];
                    if (!type.equals("Gene")) continue;
                    String id = "Tax:9606";
                    if (anno.length >= 5) {
                        id = anno[4];
                    }
                    Pattern ptmp0 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)$");
                    Matcher mtmp0 = ptmp0.matcher(id);
                    Pattern ptmp1 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)$");
                    Matcher mtmp1 = ptmp1.matcher(id);
                    if (mtmp0.find()) {
                        geneids.put(mtmp0.group(3), "");
                    }
                    if (!mtmp1.find()) continue;
                    geneids.put(mtmp1.group(3), "");
                }
            }
            for (j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                for (int k = this.data.getBioCDocobj().Annotations.get(i).get(j).size() - 1; k >= 0; --k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\t");
                    String mention = anno[2];
                    String type = anno[3];
                    if (!type.matches("(FamilyName|DomainMotif)")) continue;
                    String id = "Tax:9606";
                    if (anno.length >= 5) {
                        id = anno[4];
                    }
                    String IDstrs = GNormPlus.PT_FamilyName.MentionMatch(mention);
                    String[] IDstr = IDstrs.split("\\|");
                    Object ids = "";
                    for (int id_i = 0; id_i < IDstr.length; ++id_i) {
                        if (!geneids.containsKey(IDstr[id_i])) continue;
                        ids = ((String)ids).equals("") ? IDstr[id_i] : (String)ids + ";" + IDstr[id_i];
                    }
                    if (!((String)ids).equals("")) {
                        if (type.equals("FamilyName")) {
                            type = "Gene";
                        }
                        String Annotation_k = anno[0] + "\t" + anno[1] + "\t" + anno[2] + "\t" + type + "\tTax:9606";
                        if (anno.length >= 5) {
                            Annotation_k = anno[0] + "\t" + anno[1] + "\t" + anno[2] + "\t" + type + "\t" + anno[4];
                        }
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, Annotation_k + "|" + (String)ids);
                        continue;
                    }
                    if (!type.equals("DomainMotif") && Integer.parseInt(anno[1]) - Integer.parseInt(anno[0]) > 0) continue;
                    this.data.getBioCDocobj().Annotations.get(i).get(j).remove(k);
                }
            }
            for (j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                for (int k = this.data.getBioCDocobj().Annotations.get(i).get(j).size() - 1; k >= 0; --k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\t");
                    String type = anno[3];
                    if (!type.equals("Species") && !type.equals("Genus") && !type.equals("Strain") && !type.equals("CellLine") && !type.equals("Cell")) continue;
                    String id = anno[4];
                    id = id.replaceAll("\\*", "");
                    id = id.replaceAll("\\(anti\\)", "");
                    String Annotation_k = anno[0] + "\t" + anno[1] + "\t" + anno[2] + "\t" + type + "\t" + id;
                    this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, Annotation_k);
                }
            }
            for (j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                block43: for (int k = this.data.getBioCDocobj().Annotations.get(i).get(j).size() - 1; k >= 0; --k) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\t");
                    int start = Integer.parseInt(anno[0]);
                    int last = Integer.parseInt(anno[1]);
                    String mention = anno[2];
                    String type = anno[3];
                    String id = anno[4];
                    if (type.equals("Gene") && Species_hash.containsKey(mention)) {
                        this.data.getBioCDocobj().Annotations.get(i).get(j).remove(k);
                        continue;
                    }
                    if (type.equals("Gene") && id.equals("")) {
                        this.data.getBioCDocobj().Annotations.get(i).get(j).remove(k);
                        continue;
                    }
                    for (int k1 = this.data.getBioCDocobj().Annotations.get(i).get(j).size() - 1; k1 >= 0; --k1) {
                        if (k1 == k) continue;
                        String[] anno1 = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k1).split("\t");
                        int start1 = Integer.parseInt(anno1[0]);
                        int last1 = Integer.parseInt(anno1[1]);
                        if ((start1 >= start || last1 < last) && (start1 > start || last1 <= last)) continue;
                        this.data.getBioCDocobj().Annotations.get(i).get(j).remove(k);
                        continue block43;
                    }
                }
            }
        }
        if (!GeneIDMatch) {
            this.data.getBioCDocobj().BioCOutput(Filename, FilenameBioC, this.data.getBioCDocobj().Annotations, true, true);
        }
    }

    public ArrayList<String> SearchGeneIDLocation(String Doc) {
        ArrayList<String> location = new ArrayList<String>();
        String Doc_tmp = " " + Doc + " ";
        Pattern ptmp = Pattern.compile("^(.*[^A-Za-z0-9]+)([0-9]+\\S*[A-Za-z]+|[A-Za-z]+\\S*[0-9]+|[0-9]+\\S*[A-Za-z]+\\S*[0-9]+|[A-Za-z]+\\S*[0-9]+\\S*[A-Za-z]+)([^A-Za-z0-9]+.*)$");
        Matcher mtmp = ptmp.matcher(Doc_tmp);
        while (mtmp.find()) {
            String str1 = mtmp.group(1);
            String str2 = mtmp.group(2);
            String str3 = mtmp.group(3);
            for (int m = str1.length(); m <= str1.length() + str2.length(); ++m) {
                int last;
                int start = str1.length() - 1;
                String mention = Doc.substring(start, last = start + str2.length());
                if (mention.matches(".*[\\'\\;\\[\\]\\+\\*\\\\].*")) continue;
                if (last - start > 6 && (mention.matches(".*\\(.*\\).*") || mention.matches("[^\\(\\)]+"))) {
                    int Num1;
                    String S1;
                    Pattern ptmp1 = Pattern.compile("^(.+[^0-9])([0-9]+)\\-([0-9]+)$");
                    Matcher mtmp1 = ptmp1.matcher(mention);
                    Pattern ptmp2 = Pattern.compile("^(.+[^0-9])([0-9]+)\\-(.+[^0-9])([0-9]+)$");
                    Matcher mtmp2 = ptmp2.matcher(mention);
                    if (mtmp1.find()) {
                        S1 = mtmp1.group(1);
                        if (mtmp1.group(2).length() <= 6 && mtmp1.group(3).length() <= 6) {
                            Num1 = Integer.parseInt(mtmp1.group(2));
                            int Num2 = Integer.parseInt(mtmp1.group(3));
                            String prefix = "";
                            Pattern ptmp3 = Pattern.compile("^([0]+)");
                            Matcher mtmp3 = ptmp3.matcher(mtmp1.group(2));
                            if (mtmp3.find()) {
                                prefix = mtmp3.group(1);
                            }
                            if (Num2 - Num1 > 0 && Num2 - Num1 <= 20) {
                                for (int n = Num1; n <= Num2; ++n) {
                                    String StrNum = S1 + prefix + n;
                                    if (StrNum.length() < 5) continue;
                                    location.add(start + "\t" + last + "\t" + StrNum + "\tGeneID");
                                }
                            }
                        }
                    } else if (mtmp2.find() && mtmp2.group(2).length() <= 6 && mtmp2.group(4).length() <= 6) {
                        S1 = mtmp2.group(1);
                        Num1 = Integer.parseInt(mtmp2.group(2));
                        String S2 = mtmp2.group(3);
                        int Num2 = Integer.parseInt(mtmp2.group(4));
                        if (S1.equals(S2)) {
                            String prefix = "";
                            Pattern ptmp3 = Pattern.compile("^([0]+)");
                            Matcher mtmp3 = ptmp3.matcher(mtmp2.group(2));
                            if (mtmp3.find()) {
                                prefix = mtmp3.group(1);
                            }
                            if (Num2 - Num1 > 0 && Num2 - Num1 <= 20) {
                                for (int n = Num1; n <= Num2; ++n) {
                                    String StrNum = S1 + prefix + n;
                                    if (StrNum.length() < 5) continue;
                                    location.add(start + "\t" + last + "\t" + StrNum + "\tGeneID");
                                }
                            }
                        }
                    }
                }
                location.add(start + "\t" + last + "\t" + mention + "\tGeneID");
            }
            Object men = "";
            for (int m = 0; m < str2.length(); ++m) {
                men = (String)men + "@";
            }
            Doc_tmp = str1 + (String)men + str3;
            mtmp = ptmp.matcher(Doc_tmp);
        }
        return location;
    }

    public void GeneIDRecognition(String Filename, String FilenameBioC) throws IOException, XMLStreamException {
        for (int i = 0; i < this.data.getBioCDocobj().PMIDs.size(); ++i) {
            for (int j = 0; j < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j) {
                String PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j);
                ArrayList<String> locations = this.SearchGeneIDLocation(PassageContext);
                for (int k = 0; k < locations.size(); ++k) {
                    String[] anno = locations.get(k).split("\t");
                    String mention = anno[2].toLowerCase();
                    if (!GNormPlus.GeneIDs_hash.containsKey(mention = mention.replaceAll("[\\W\\-\\_]+", ""))) continue;
                    this.data.getBioCDocobj().Annotations.get(i).get(j).add(locations.get(k) + "\tGeneID:" + GNormPlus.GeneIDs_hash.get(mention));
                }
            }
        }
        this.data.getBioCDocobj().BioCOutput(Filename, FilenameBioC, this.data.getBioCDocobj().Annotations, true, true);
    }
}

