/*
 * Decompiled with CFR 0.152.
 */
package GNormPluslib;

import GNormPluslib.GNPProcessingData;
import GNormPluslib.GNormPlus;
import GNormPluslib.InconsistentDataException;
import GNormPluslib.PrefixTree;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.lang.invoke.CallSite;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;

public class SR {
    private GNPProcessingData data;

    public SR(GNPProcessingData data) {
        this.data = data;
    }

    public void SpeciesRecognition(String Filename, String FilenameBioC, String StrainFilename, String FilterAntibody) throws IOException, XMLStreamException {
        for (int i = 0; i < this.data.getBioCDocobj().PMIDs.size(); ++i) {
            int j;
            String mention;
            String last;
            String mentions_tmp;
            String[] anno;
            String PassageContext;
            int j2;
            String Pmid = this.data.getBioCDocobj().PMIDs.get(i);
            PrefixTree PT_Genus = new PrefixTree();
            HashMap<String, String> SPID_hash = new HashMap<String, String>();
            ArrayList<CallSite> TargetedLocation = new ArrayList<CallSite>();
            HashMap<String, String> GenusNames = new HashMap<String, String>();
            HashMap<String, Object> Mention2ID_lc = new HashMap<String, Object>();
            ArrayList<Object> IDset = new ArrayList<Object>();
            for (j2 = 0; j2 < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j2) {
                Matcher mtmp;
                Pattern ptmp;
                String patt;
                int last2;
                int start;
                int k;
                PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j2);
                ArrayList<String> locations = GNormPlus.PT_Species.SearchMentionLocation(PassageContext, "Species");
                for (k = 0; k < locations.size(); ++k) {
                    anno = locations.get(k).split("\t");
                    start = Integer.parseInt(anno[0]);
                    last2 = Integer.parseInt(anno[1]);
                    String ForwardSTR = "";
                    String BackwardSTR = "";
                    try {
                        ForwardSTR = start > 21 ? (PassageContext + "ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(start - 21, last2) : (PassageContext + "ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(0, last2);
                        BackwardSTR = PassageContext.length() > last2 + 21 ? PassageContext.substring(start, last2 + 21) : PassageContext.substring(start, PassageContext.length());
                    }
                    catch (Exception e) {
                        throw new RuntimeException("Exception in document " + Pmid + " in paragraph with offset " + this.data.getBioCDocobj().PassageOffsets.get(i).get(j2) + " and length " + PassageContext.length() + " beginning with " + PassageContext.substring(0, Math.min(PassageContext.length(), 80)), e);
                    }
                    Object mention2 = anno[2];
                    String id = anno[3];
                    String mention_tmp = ((String)mention2).toLowerCase();
                    mention_tmp = mention_tmp.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1");
                    String antibody = "";
                    if (ForwardSTR.toLowerCase().matches(".*(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg)[\\W\\-\\_]+" + mention_tmp)) {
                        antibody = "(anti)";
                    } else if (BackwardSTR.toLowerCase().matches(mention_tmp + "[\\W\\-\\_]+(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg).*")) {
                        antibody = "(anti)";
                    } else if (BackwardSTR.toLowerCase().matches(mention_tmp + "[\\W\\-\\_]+[A-Za-z0-9]+[\\W\\-\\_]+(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg).*")) {
                        antibody = "(anti)";
                    }
                    if (((String)mention2).matches(".*[\\(\\[\\{].*") && BackwardSTR.toLowerCase().matches(mention_tmp + "\\).*")) {
                        ++last2;
                        mention2 = (String)mention2 + ")";
                    }
                    if (BackwardSTR.toLowerCase().matches(mention_tmp + "[0-9].*") || ((String)mention2).matches(".*[;:,].*") && ((String)mention2).length() <= 10 || ((String)mention2).matches("to[\\W\\-\\_]+[0-9]+") || ((String)mention2).matches("[a-z][\\)\\]\\}].*") && !((String)mention2).matches(".*[\\(\\[\\{].*") && ((String)mention2).length() <= 10 || ((String)mention2).matches(".*[\\(\\[\\{].*") && !((String)mention2).matches(".*[\\)\\]\\}].*") && ((String)mention2).length() <= 10 || id.equals("NA") || this.data.getBioCDocobj().Annotations.size() <= i || this.data.getBioCDocobj().Annotations.get(i).size() <= j2 || ((String)mention2).matches("^[A-Za-z] [A-Za-z0-9]+$") || ((String)mention2).length() < 3 || !FilterAntibody.equals("False") && antibody.equals("(anti)")) continue;
                    patt = "^(.+?) [sS]train";
                    ptmp = Pattern.compile(patt);
                    mtmp = ptmp.matcher((CharSequence)mention2);
                    if (mtmp.find()) {
                        mention2 = mtmp.group(1);
                        last2 -= 7;
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j2).add(start + "\t" + last2 + "\t" + (String)mention2 + "\tSpecies\t" + id);
                    mentions_tmp = ((String)mention2).toLowerCase();
                    mentions_tmp = mentions_tmp.replaceAll("[\\W\\-\\_]", "");
                    mentions_tmp = mentions_tmp.replaceAll("[0-9]", "0");
                    this.data.getFiltering_hash().put(mentions_tmp, "");
                    Mention2ID_lc.put(((String)mention2).toLowerCase(), id);
                    String mention_genus = "";
                    patt = "^([A-Za-z]+) ";
                    ptmp = Pattern.compile(patt);
                    mtmp = ptmp.matcher((CharSequence)mention2);
                    if (mtmp.find()) {
                        mention_genus = mtmp.group(1);
                    }
                    IDset.add(id);
                    for (int s = start; s < last2; ++s) {
                        TargetedLocation.add((CallSite)((Object)(j2 + "\t" + s)));
                    }
                    String[] ids = id.split(";");
                    for (int x = 0; x < ids.length; ++x) {
                        patt = "^\\**([0-9]+)";
                        ptmp = Pattern.compile(patt);
                        mtmp = ptmp.matcher(ids[x]);
                        if (!mtmp.find()) continue;
                        SPID_hash.put(mtmp.group(1), mention_genus);
                    }
                }
                locations = GNormPlus.PT_Cell.SearchMentionLocation(PassageContext, "Cell");
                for (k = 0; k < locations.size(); ++k) {
                    anno = locations.get(k).split("\t");
                    start = Integer.parseInt(anno[0]);
                    last2 = Integer.parseInt(anno[1]);
                    if (last2 > PassageContext.length()) continue;
                    String mention3 = anno[2];
                    Object id = anno[3];
                    if (this.data.getBioCDocobj().Annotations.size() <= i || this.data.getBioCDocobj().Annotations.get(i).size() <= j2 || TargetedLocation.contains(j2 + "\t" + start)) continue;
                    int last40 = 0;
                    last40 = PassageContext.length() >= last2 + 40 ? last2 + 40 : PassageContext.length();
                    String ForwardSTR = "";
                    String BackwardSTR = "";
                    ForwardSTR = start > 21 ? PassageContext.substring(start - 21, last2) : PassageContext.substring(0, last2);
                    BackwardSTR = PassageContext.length() > last2 + 21 ? PassageContext.substring(start, last2 + 21) : PassageContext.substring(start, PassageContext.length());
                    String mention_tmp = mention3.toLowerCase();
                    if ((mention_tmp = mention_tmp.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1")).matches(".*[\\[\\]\\(\\)\\{\\}].*") || BackwardSTR.toLowerCase().matches(mention_tmp + "[0-9\\-\\_].*") || ForwardSTR.toLowerCase().matches(".*[0-9\\-\\_]" + mention_tmp) || !(mtmp = (ptmp = Pattern.compile(patt = "[\\W\\-]cell([\\- ]*line|)[s]*[\\W\\-]")).matcher(PassageContext.substring(last2, last40).toLowerCase())).find()) continue;
                    if (GNormPlus.taxid4gene.contains(id)) {
                        id = "*" + (String)id;
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j2).add(start + "\t" + last2 + "\t" + mention3 + "\tCell\t" + (String)id);
                    mentions_tmp = mention3.toLowerCase();
                    mentions_tmp = mentions_tmp.replaceAll("[\\W\\-\\_]", "");
                    mentions_tmp = mentions_tmp.replaceAll("[0-9]", "0");
                    this.data.getFiltering_hash().put(mentions_tmp, "");
                    IDset.add(id);
                    for (int s = start; s < last2; ++s) {
                        TargetedLocation.add((CallSite)((Object)(j2 + "\t" + s)));
                    }
                }
                for (String ID : SPID_hash.keySet()) {
                    if (GNormPlus.GenusID_hash.containsKey(ID)) {
                        GenusNames.put(ID, GNormPlus.GenusID_hash.get(ID));
                    }
                    if (((String)SPID_hash.get(ID)).length() < 7) continue;
                    GenusNames.put(ID, (String)SPID_hash.get(ID));
                }
            }
            GenusNames.put("3702", "arabidopsis");
            GenusNames.put("4932", "saccharomyces");
            GenusNames.put("562", "escherichia");
            GenusNames.put("7227", "drosophila");
            GenusNames.put("8355", "xenopus");
            PT_Genus.Hash2Tree(GenusNames);
            for (j2 = 0; j2 < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j2) {
                if (this.data.getBioCDocobj().PassageContexts.size() <= i || this.data.getBioCDocobj().PassageContexts.get(i).size() <= j2 || this.data.getBioCDocobj().Annotations.size() <= i || this.data.getBioCDocobj().Annotations.get(i).size() <= j2) continue;
                PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j2);
                ArrayList<String> locations_Genus = PT_Genus.SearchMentionLocation(PassageContext, "Genus");
                for (int k = 0; k < locations_Genus.size(); ++k) {
                    anno = locations_Genus.get(k).split("\t");
                    String start = anno[0];
                    String last3 = anno[1];
                    String mention4 = anno[2];
                    Object id = anno[3];
                    if (TargetedLocation.contains(j2 + "\t" + start)) continue;
                    String patt = "^\\**([0-9]+)$";
                    Pattern ptmp = Pattern.compile(patt);
                    Matcher mtmp = ptmp.matcher((CharSequence)id);
                    if (mtmp.find()) {
                        id = mtmp.group(1);
                    }
                    if (GNormPlus.taxid4gene.contains(id)) {
                        id = "*" + (String)id;
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j2).add(start + "\t" + last3 + "\t" + mention4 + "\tGenus\t" + (String)id);
                    String mentions_tmp2 = mention4.toLowerCase();
                    mentions_tmp2 = mentions_tmp2.replaceAll("[\\W\\-\\_]", "");
                    mentions_tmp2 = mentions_tmp2.replaceAll("[0-9]", "0");
                    this.data.getFiltering_hash().put(mentions_tmp2, "");
                    IDset.add(id);
                    for (int s = Integer.parseInt(start); s < Integer.parseInt(last3); ++s) {
                        TargetedLocation.add((CallSite)((Object)(j2 + "\t" + s)));
                    }
                }
            }
            PrefixTree PT_Strain = new PrefixTree();
            HashMap<String, String> StrainID_hash = new HashMap<String, String>();
            BufferedReader br = new BufferedReader(new FileReader(StrainFilename));
            String line = "";
            while ((line = br.readLine()) != null) {
                String[] l = line.split("\t");
                String ancestor = l[0];
                String tax_id = l[1];
                String tax_names = l[2];
                if (SPID_hash.containsKey(ancestor)) {
                    StrainID_hash.put(tax_id, tax_names);
                    continue;
                }
                if (!SPID_hash.containsKey(tax_id)) continue;
                StrainID_hash.put(tax_id, tax_names);
            }
            br.close();
            HashMap<String, String> StrainNames = new HashMap<String, String>();
            for (String ID : StrainID_hash.keySet()) {
                StrainNames.put(ID, (String)StrainID_hash.get(ID));
            }
            PT_Strain.Hash2Tree(StrainNames);
            for (int j3 = 0; j3 < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j3) {
                if (this.data.getBioCDocobj().PassageContexts.size() <= i || this.data.getBioCDocobj().PassageContexts.get(i).size() <= j3 || this.data.getBioCDocobj().Annotations.size() <= i || this.data.getBioCDocobj().Annotations.get(i).size() <= j3) continue;
                String PassageContext2 = this.data.getBioCDocobj().PassageContexts.get(i).get(j3);
                ArrayList<String> locations_Strain = PT_Strain.SearchMentionLocation(PassageContext2, "Strain");
                for (int k = 0; k < locations_Strain.size(); ++k) {
                    String[] anno2 = locations_Strain.get(k).split("\t");
                    String start = anno2[0];
                    last = anno2[1];
                    mention = anno2[2];
                    Object id = anno2[3];
                    if (TargetedLocation.contains(j3 + "\t" + start) || mention.matches(".*[;,\\{\\}\\(\\)\\[\\]].*") || mention.matches("[a-z]{1,4} [0-9]{1,3}")) continue;
                    if (GNormPlus.taxid4gene.contains(id)) {
                        id = "*" + (String)id;
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j3).add(start + "\t" + last + "\t" + mention + "\tStrain\t" + (String)id);
                    String mentions_tmp3 = mention.toLowerCase();
                    mentions_tmp3 = mentions_tmp3.replaceAll("[\\W\\-\\_]", "");
                    mentions_tmp3 = mentions_tmp3.replaceAll("[0-9]", "0");
                    this.data.getFiltering_hash().put(mentions_tmp3, "");
                    IDset.add(id);
                    for (int s = Integer.parseInt(start); s < Integer.parseInt(last); ++s) {
                        TargetedLocation.add((CallSite)((Object)(j3 + "\t" + s)));
                    }
                }
            }
            HashMap<String, String> OtherNames = new HashMap<String, String>();
            for (String men : Mention2ID_lc.keySet()) {
                String men_id = (String)Mention2ID_lc.get(men);
                if (this.data.getPmidLF2Abb_lc_hash().containsKey(Pmid + "\t" + men)) {
                    String Abb = this.data.getPmidLF2Abb_lc_hash().get(Pmid + "\t" + men);
                    if (OtherNames.containsKey(men_id)) {
                        OtherNames.put(men_id, OtherNames.get(men_id) + "|" + Abb);
                    } else {
                        OtherNames.put(men_id, Abb);
                    }
                }
                String men_nospace = men.replaceAll(" ", "");
                if (OtherNames.containsKey(men_id)) {
                    OtherNames.put(men_id, OtherNames.get(men_id) + "|" + men_nospace);
                    continue;
                }
                OtherNames.put(men_id, men_nospace);
            }
            PrefixTree PT_Others = new PrefixTree();
            PT_Others.Hash2Tree(OtherNames);
            for (j = 0; j < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j) {
                if (this.data.getBioCDocobj().PassageContexts.size() <= i || this.data.getBioCDocobj().PassageContexts.get(i).size() <= j || this.data.getBioCDocobj().Annotations.size() <= i || this.data.getBioCDocobj().Annotations.get(i).size() <= j) continue;
                String PassageContext3 = this.data.getBioCDocobj().PassageContexts.get(i).get(j);
                ArrayList<String> locations_Abb = PT_Others.SearchMentionLocation(PassageContext3, "Species");
                for (int k = 0; k < locations_Abb.size(); ++k) {
                    String[] anno3 = locations_Abb.get(k).split("\t");
                    String start = anno3[0];
                    String last4 = anno3[1];
                    String mention5 = anno3[2];
                    Object id = anno3[3];
                    if (TargetedLocation.contains(j + "\t" + start)) continue;
                    if (GNormPlus.taxid4gene.contains(id)) {
                        id = "*" + (String)id;
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j).add(start + "\t" + last4 + "\t" + mention5 + "\tSpecies\t" + (String)id);
                    mentions_tmp = mention5.toLowerCase();
                    mentions_tmp = mentions_tmp.replaceAll("[\\W\\-\\_]", "");
                    mentions_tmp = mentions_tmp.replaceAll("[0-9]", "0");
                    this.data.getFiltering_hash().put(mentions_tmp, "");
                    Mention2ID_lc.put(mention5.toLowerCase(), id);
                    IDset.add(id);
                    for (int s = Integer.parseInt(start); s < Integer.parseInt(last4); ++s) {
                        TargetedLocation.add((CallSite)((Object)(j + "\t" + s)));
                    }
                }
            }
            for (j = 0; j < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j) {
                if (this.data.getBioCDocobj().PassageContexts.size() <= i || this.data.getBioCDocobj().PassageContexts.get(i).size() <= j || this.data.getBioCDocobj().Annotations.size() <= i || this.data.getBioCDocobj().Annotations.get(i).size() <= j) continue;
                for (int a = 0; a < this.data.getBioCDocobj().Annotations.get(i).get(j).size(); ++a) {
                    String id;
                    String[] id_split;
                    String[] SpAnno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(a).split("\t");
                    String start = SpAnno[0];
                    last = SpAnno[1];
                    mention = SpAnno[2];
                    String type = SpAnno[3];
                    if (this.data.getPmidAbb2LF_lc_hash().containsKey(Pmid + "\t" + mention.toLowerCase()) && Mention2ID_lc.containsKey(this.data.getPmidAbb2LF_lc_hash().containsKey(Pmid + "\t" + mention.toLowerCase()))) {
                        String LF_lc = this.data.getPmidAbb2LF_lc_hash().get(Pmid + "\t" + mention.toLowerCase());
                        if (!Mention2ID_lc.containsKey(LF_lc)) continue;
                        String LF_ID = (String)Mention2ID_lc.get(LF_lc);
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(a, start + "\t" + last + "\t" + mention + "\t" + type + "\t" + LF_ID);
                        String mentions_tmp4 = mention.toLowerCase();
                        mentions_tmp4 = mentions_tmp4.replaceAll("[\\W\\-\\_]", "");
                        mentions_tmp4 = mentions_tmp4.replaceAll("[0-9]", "0");
                        this.data.getFiltering_hash().put(mentions_tmp4, "");
                        continue;
                    }
                    if (SpAnno.length <= 4 || (id_split = (id = SpAnno[4]).split(";")).length < 2) continue;
                    boolean found = false;
                    for (int x = 0; x < IDset.size(); ++x) {
                        String id_tmp = (String)IDset.get(x);
                        for (int y = 0; y < id_split.length; ++y) {
                            if (!id_split[y].equals(id_tmp)) continue;
                            found = true;
                        }
                        if (!found) continue;
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(a, start + "\t" + last + "\t" + mention + "\t" + type + "\t" + id_tmp);
                        String mentions_tmp5 = mention.toLowerCase();
                        mentions_tmp5 = mentions_tmp5.replaceAll("[\\W\\-\\_]", "");
                        mentions_tmp5 = mentions_tmp5.replaceAll("[0-9]", "0");
                        this.data.getFiltering_hash().put(mentions_tmp5, "");
                        x = 1000000;
                    }
                    if (found) continue;
                    int min = 10000000;
                    Object min_id = "";
                    for (int y = 0; y < id_split.length; ++y) {
                        String id_tmp = id_split[y];
                        String patt = "^\\**([0-9]+)";
                        Pattern ptmp = Pattern.compile(patt);
                        Matcher mtmp = ptmp.matcher(id_tmp);
                        if (mtmp.find()) {
                            id_tmp = mtmp.group(1);
                        }
                        if (y == 0) {
                            min_id = id_split[y];
                            min = Integer.parseInt(id_tmp);
                            continue;
                        }
                        if (Integer.parseInt(id_tmp) >= min) continue;
                        min = Integer.parseInt(id_tmp);
                        min_id = id_tmp;
                    }
                    if (GNormPlus.taxid4gene.contains(min_id)) {
                        min_id = "*" + (String)min_id;
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j).set(a, start + "\t" + last + "\t" + mention + "\tSpecies\t" + (String)min_id);
                    String mentions_tmp6 = mention.toLowerCase();
                    mentions_tmp6 = mentions_tmp6.replaceAll("[\\W\\-\\_]", "");
                    mentions_tmp6 = mentions_tmp6.replaceAll("[0-9]", "0");
                    this.data.getFiltering_hash().put(mentions_tmp6, "");
                }
            }
        }
        this.data.getBioCDocobj().BioCOutput(Filename, FilenameBioC, this.data.getBioCDocobj().Annotations, false, true);
    }

    public void SpeciesAssignment(String Filename, String FilenameBioC) throws IOException, XMLStreamException {
        this.data.resetBioCDocobj();
        this.data.getBioCDocobj().BioCReaderWithAnnotation(Filename);
        BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
        for (int i = 0; i < this.data.getBioCDocobj().Annotations.size(); ++i) {
            Object patt;
            HashMap<String, String> PrefixIDTarget_hash = new HashMap<String, String>();
            PrefixIDTarget_hash.put("9606", "h");
            PrefixIDTarget_hash.put("10090", "m");
            PrefixIDTarget_hash.put("10116", "r");
            PrefixIDTarget_hash.put("4932", "y");
            PrefixIDTarget_hash.put("7227", "d");
            PrefixIDTarget_hash.put("7955", "z|zf|Zf|dr|Dr");
            PrefixIDTarget_hash.put("3702", "at|At");
            HashMap<String, Double> SP2Num_hash = new HashMap<String, Double>();
            for (int j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j).size(); ++k) {
                    Pattern ptmp;
                    Matcher mtmp;
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\t");
                    if (anno.length != 5 || !(mtmp = (ptmp = Pattern.compile((String)(patt = "^\\**([0-9]+)$"))).matcher(anno[4])).find()) continue;
                    String id = mtmp.group(1);
                    if (!PrefixIDTarget_hash.containsKey(id)) {
                        PrefixIDTarget_hash.put(id, GNormPlus.PrefixID_hash.get(id));
                    }
                    if (j == 0) {
                        if (SP2Num_hash.containsKey(id)) {
                            SP2Num_hash.put(id, (Double)SP2Num_hash.get(id) + 2.0);
                            continue;
                        }
                        if (GNormPlus.TaxFreq_hash.containsKey(id)) {
                            SP2Num_hash.put(id, GNormPlus.TaxFreq_hash.get(id) + 2.0);
                            continue;
                        }
                        SP2Num_hash.put(id, 2.0);
                        continue;
                    }
                    if (SP2Num_hash.containsKey(id)) {
                        SP2Num_hash.put(id, (Double)SP2Num_hash.get(id) + 1.0);
                        continue;
                    }
                    if (GNormPlus.TaxFreq_hash.containsKey(id)) {
                        SP2Num_hash.put(id, 1.0 + GNormPlus.TaxFreq_hash.get(id));
                        continue;
                    }
                    SP2Num_hash.put(id, 1.0);
                }
            }
            String MajorSP = "9606";
            double MaxSP = 0.0;
            patt = SP2Num_hash.keySet().iterator();
            while (patt.hasNext()) {
                String tid = (String)patt.next();
                if (!((Double)SP2Num_hash.get(tid) > MaxSP)) continue;
                MajorSP = tid;
                MaxSP = (Double)SP2Num_hash.get(tid);
            }
            for (int j = 0; j < this.data.getBioCDocobj().PassageContexts.get(i).size(); ++j) {
                int k;
                String PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j);
                iterator.setText(PassageContext);
                ArrayList<Integer> Sentence_offsets = new ArrayList<Integer>();
                int Sent_start = iterator.first();
                int Sent_last = iterator.next();
                while (Sent_last != -1) {
                    Sentence_offsets.add(Sent_start);
                    Sent_start = Sent_last;
                    Sent_last = iterator.next();
                }
                HashMap<Integer, String> Annotations_Gene_hash = new HashMap<Integer, String>();
                ArrayList<String> Annotations_Species = new ArrayList<String>();
                if (this.data.getBioCDocobj().Annotations.get(i).size() <= j) continue;
                for (int k2 = 0; k2 < this.data.getBioCDocobj().Annotations.get(i).get(j).size(); ++k2) {
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k2).split("\t");
                    if (anno.length == 5) {
                        Annotations_Species.add(this.data.getBioCDocobj().Annotations.get(i).get(j).get(k2));
                        continue;
                    }
                    Annotations_Gene_hash.put(k2, this.data.getBioCDocobj().Annotations.get(i).get(j).get(k2));
                }
                HashMap mention2Location2Species_hash = new HashMap();
                HashMap<Integer, String> Location2Species_hash = new HashMap<Integer, String>();
                Iterator iterator2 = Annotations_Gene_hash.keySet().iterator();
                while (iterator2.hasNext()) {
                    String taxid;
                    Matcher mtmp;
                    Pattern ptmp;
                    String patt2;
                    String[] AnnoSp;
                    int sp;
                    k = (Integer)iterator2.next();
                    boolean SPfound = false;
                    String[] anno = ((String)Annotations_Gene_hash.get(k)).split("\t");
                    int G_Start = Integer.parseInt(anno[0]);
                    int G_Last = Integer.parseInt(anno[1]);
                    String G_mentions = anno[2];
                    int Target_Sentence = 0;
                    if (!SPfound) {
                        for (int s = 0; s < Sentence_offsets.size(); ++s) {
                            int Sentence_last = 1000000;
                            if (s < Sentence_offsets.size() - 1) {
                                Sentence_last = (Integer)Sentence_offsets.get(s + 1);
                            }
                            if (G_Start >= Sentence_last) continue;
                            Target_Sentence = s;
                            break;
                        }
                    }
                    int Sentence_Start = (Integer)Sentence_offsets.get(Target_Sentence);
                    int Sentence_Last = 1000000;
                    if (Sentence_offsets.size() > Target_Sentence + 1) {
                        Sentence_Last = (Integer)Sentence_offsets.get(Target_Sentence + 1);
                    }
                    if (!SPfound) {
                        int closet_Sp_Start = 0;
                        for (sp = 0; sp < Annotations_Species.size(); ++sp) {
                            AnnoSp = ((String)Annotations_Species.get(sp)).split("\t");
                            int Sp_Start = Integer.parseInt(AnnoSp[0]);
                            patt2 = "^\\**([0-9]+)$";
                            ptmp = Pattern.compile(patt2);
                            mtmp = ptmp.matcher(AnnoSp[4]);
                            if (!mtmp.find()) continue;
                            taxid = mtmp.group(1);
                            Location2Species_hash.put(Sp_Start, taxid);
                            if (Sp_Start > G_Start || Sp_Start < Sentence_Start || Sp_Start <= closet_Sp_Start) continue;
                            closet_Sp_Start = Sp_Start;
                            Location2Species_hash.put(Integer.parseInt(anno[0]), taxid);
                            if (mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) {
                                ((HashMap)mention2Location2Species_hash.get(G_mentions.toLowerCase())).put(Integer.parseInt(anno[0]), taxid);
                            } else {
                                mention2Location2Species_hash.put(G_mentions.toLowerCase(), Location2Species_hash);
                            }
                            SPfound = true;
                        }
                    }
                    if (SPfound) continue;
                    int closet_Sp_Last = 1000000;
                    for (sp = 0; sp < Annotations_Species.size(); ++sp) {
                        AnnoSp = ((String)Annotations_Species.get(sp)).split("\t");
                        int Sp_Last = Integer.parseInt(AnnoSp[1]);
                        patt2 = "^\\**([0-9]+)$";
                        ptmp = Pattern.compile(patt2);
                        mtmp = ptmp.matcher(AnnoSp[4]);
                        if (!mtmp.find()) continue;
                        taxid = mtmp.group(1);
                        if (Sp_Last < G_Last || Sp_Last > Sentence_Last || Sp_Last >= closet_Sp_Last) continue;
                        closet_Sp_Last = Sp_Last;
                        Location2Species_hash.put(Integer.parseInt(anno[0]), taxid);
                        if (mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) {
                            ((HashMap)mention2Location2Species_hash.get(G_mentions.toLowerCase())).put(Integer.parseInt(anno[0]), taxid);
                        } else {
                            mention2Location2Species_hash.put(G_mentions.toLowerCase(), Location2Species_hash);
                        }
                        SPfound = true;
                    }
                }
                iterator2 = Annotations_Gene_hash.keySet().iterator();
                while (iterator2.hasNext()) {
                    String taxid;
                    Matcher mtmp;
                    Pattern ptmp;
                    String patt3;
                    String[] AnnoSp;
                    k = (Integer)iterator2.next();
                    String[] anno = ((String)Annotations_Gene_hash.get(k)).split("\t");
                    int G_Start = Integer.parseInt(anno[0]);
                    int G_Last = Integer.parseInt(anno[1]);
                    String G_mentions = anno[2];
                    String G_type = anno[3];
                    String[] G_mention_list = G_mentions.split("\\|");
                    if (G_mention_list.length == 0) {
                        InconsistentDataException e = new InconsistentDataException("There is no gene mention but at least one was expected in document with ID " + this.data.getBioCDocobj().PMIDs.get(i) + " in paragraph with offset " + this.data.getBioCDocobj().PassageOffsets.get(i).get(j) + " and length " + PassageContext.length() + " beginning with " + PassageContext.substring(0, Math.min(PassageContext.length(), 80)));
                        e.setDocId(this.data.getBioCDocobj().PMIDs.get(i));
                        throw e;
                    }
                    String G_mention = G_mention_list[0];
                    boolean SPfound = false;
                    for (String taxid2 : PrefixIDTarget_hash.keySet()) {
                        Pattern ptmp2;
                        Matcher mtmp2;
                        if (GNormPlus.GeneWithoutSPPrefix_hash.containsKey(G_mention.toLowerCase()) || !(mtmp2 = (ptmp2 = Pattern.compile("^(" + (String)PrefixIDTarget_hash.get(taxid2) + ")([A-Z].*)$")).matcher(G_mention)).find()) continue;
                        String MentionWoPrefix = mtmp2.group(2);
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, anno[0] + "\t" + anno[1] + "\t" + anno[2] + "|" + MentionWoPrefix + "\t" + anno[3] + "\tPrefix:" + taxid2);
                        SPfound = true;
                        break;
                    }
                    int Target_Sentence = 0;
                    if (!SPfound) {
                        for (int s = 0; s < Sentence_offsets.size(); ++s) {
                            int Sentence_last = 1000000;
                            if (s < Sentence_offsets.size() - 1) {
                                Sentence_last = (Integer)Sentence_offsets.get(s + 1);
                            }
                            if (G_Start >= Sentence_last) continue;
                            Target_Sentence = s;
                            break;
                        }
                    }
                    int Sentence_Start = (Integer)Sentence_offsets.get(Target_Sentence);
                    int Sentence_Last = 1000000;
                    if (Sentence_offsets.size() > Target_Sentence + 1) {
                        Sentence_Last = (Integer)Sentence_offsets.get(Target_Sentence + 1);
                    }
                    if (!SPfound) {
                        int closet_Sp_Start = 0;
                        for (int sp = 0; sp < Annotations_Species.size(); ++sp) {
                            AnnoSp = ((String)Annotations_Species.get(sp)).split("\t");
                            int Sp_Start = Integer.parseInt(AnnoSp[0]);
                            patt3 = "^\\**([0-9]+)$";
                            ptmp = Pattern.compile(patt3);
                            mtmp = ptmp.matcher(AnnoSp[4]);
                            if (!mtmp.find()) continue;
                            taxid = mtmp.group(1);
                            if (Sp_Start > G_Start || Sp_Start < Sentence_Start || Sp_Start <= closet_Sp_Start) continue;
                            closet_Sp_Start = Sp_Start;
                            if (GNormPlus.SP_Virus2Human_hash.containsKey(taxid)) {
                                this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tLeft:" + taxid + "&9606");
                            } else {
                                this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tLeft:" + taxid);
                            }
                            SPfound = true;
                        }
                    }
                    if (!SPfound) {
                        int closet_Sp_Last = 1000000;
                        for (int sp = 0; sp < Annotations_Species.size(); ++sp) {
                            AnnoSp = ((String)Annotations_Species.get(sp)).split("\t");
                            int Sp_Last = Integer.parseInt(AnnoSp[1]);
                            patt3 = "^\\**([0-9]+)$";
                            ptmp = Pattern.compile(patt3);
                            mtmp = ptmp.matcher(AnnoSp[4]);
                            if (!mtmp.find()) continue;
                            taxid = mtmp.group(1);
                            if (Sp_Last < G_Last || Sp_Last > Sentence_Last || Sp_Last >= closet_Sp_Last) continue;
                            closet_Sp_Last = Sp_Last;
                            if (GNormPlus.SP_Virus2Human_hash.containsKey(taxid)) {
                                this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tRight:" + taxid + "&9606");
                            } else {
                                this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tRight:" + taxid);
                            }
                            SPfound = true;
                        }
                    }
                    if (SPfound) continue;
                    if (mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) {
                        int closed_loca = 0;
                        Iterator iterator3 = ((HashMap)mention2Location2Species_hash.get(G_mentions.toLowerCase())).keySet().iterator();
                        while (iterator3.hasNext()) {
                            int loca_start = (Integer)iterator3.next();
                            if (loca_start >= G_Start || loca_start <= closed_loca) continue;
                            closed_loca = loca_start;
                        }
                        if (closed_loca > 0) {
                            if (GNormPlus.SP_Virus2Human_hash.containsKey(Location2Species_hash.get(closed_loca))) {
                                this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tFocus:" + (String)Location2Species_hash.get(closed_loca) + "&9606");
                                continue;
                            }
                            this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tFocus:" + (String)Location2Species_hash.get(closed_loca));
                            continue;
                        }
                        if (GNormPlus.SP_Virus2Human_hash.containsKey(MajorSP)) {
                            this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tFocus:" + MajorSP + "&9606");
                            continue;
                        }
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tFocus:" + MajorSP);
                        continue;
                    }
                    if (GNormPlus.SP_Virus2Human_hash.containsKey(MajorSP)) {
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tFocus:" + MajorSP + "&9606");
                        continue;
                    }
                    this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, (String)Annotations_Gene_hash.get(k) + "\tFocus:" + MajorSP);
                }
            }
        }
        this.data.getBioCDocobj().BioCOutput(Filename, FilenameBioC, this.data.getBioCDocobj().Annotations, false, true);
    }

    public void SpeciesAssignment(String Filename, String FilenameBioC, String FocusSpecies) throws IOException, XMLStreamException {
        for (int i = 0; i < this.data.getBioCDocobj().Annotations.size(); ++i) {
            for (int j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j).size(); ++k) {
                    Pattern ptmp;
                    Matcher mtmp;
                    String[] anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\t");
                    if (anno.length == 5) {
                        String id = anno[4].replaceAll("\\*", "");
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, anno[0] + "\t" + anno[1] + "\t" + anno[2] + "\t" + anno[3] + "\t" + id);
                        continue;
                    }
                    boolean SPfound = false;
                    if (!GNormPlus.GeneWithoutSPPrefix_hash.containsKey(anno[2].toLowerCase()) && (mtmp = (ptmp = Pattern.compile("^(" + GNormPlus.PrefixID_hash.get(FocusSpecies) + ")([A-Z].*)$")).matcher(anno[2])).find()) {
                        String MentionWoPrefix = mtmp.group(2);
                        this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, anno[0] + "\t" + anno[1] + "\t" + anno[2] + "|" + MentionWoPrefix + "\t" + anno[3] + "\tPrefix:" + FocusSpecies);
                        SPfound = true;
                    }
                    if (SPfound) continue;
                    this.data.getBioCDocobj().Annotations.get(i).get(j).set(k, this.data.getBioCDocobj().Annotations.get(i).get(j).get(k) + "\tFocus:" + FocusSpecies);
                }
            }
        }
        this.data.getBioCDocobj().BioCOutput(Filename, FilenameBioC, this.data.getBioCDocobj().Annotations, false, true);
    }
}

