/*
 * Decompiled with CFR 0.152.
 */
package GNormPluslib;

import GNormPluslib.GNPProcessingData;
import GNormPluslib.GNormPlus;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.lang.invoke.CallSite;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;
import org.tartarus.snowball.ext.englishStemmer;

public class GNR {
    private GNPProcessingData data;

    public GNR(GNPProcessingData data) {
        this.data = data;
    }

    public void Ab3P(String Filename, String FilenameAbb, String TrainTest) throws XMLStreamException, IOException {
        String line = "";
        BufferedWriter FileAbb = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(FilenameAbb), "UTF-8"));
        for (int i = 0; i < this.data.getBioCDocobj().PMIDs.size(); ++i) {
            String Pmid = this.data.getBioCDocobj().PMIDs.get(i);
            Object Context = "";
            for (int j = 0; j < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j) {
                String PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j);
                if (PassageContext.matches(".*\\([^\\(\\)]+,[^\\(\\)]+\\).*")) {
                    PassageContext = PassageContext.replaceAll("\\([^\\(\\)]+,[^\\(\\)]+\\)", "");
                }
                if (!PassageContext.contains("\\(")) continue;
                Context = (String)Context + PassageContext + " ";
            }
            FileAbb.write(Pmid + "\n" + (String)Context + "\n\n");
        }
        FileAbb.close();
        File f = new File(FilenameAbb + ".out");
        BufferedWriter fr = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(f), "UTF-8"));
        Runtime runtime = Runtime.getRuntime();
        String cmd = "./Ab3P " + FilenameAbb + ".Abb " + FilenameAbb + ".out";
        String OS = System.getProperty("os.name").toLowerCase();
        cmd = OS.contains("windows") ? "java -jar bioadi.jar " + FilenameAbb : "./Ab3P " + FilenameAbb + " " + FilenameAbb + ".out";
        Process process = runtime.exec(cmd);
        InputStream is = process.getInputStream();
        InputStreamReader isr = new InputStreamReader(is, "UTF-8");
        BufferedReader br = new BufferedReader(isr);
        line = "";
        while ((line = br.readLine()) != null) {
            fr.write(line);
            fr.newLine();
            fr.flush();
        }
        is.close();
        isr.close();
        br.close();
        fr.close();
        BufferedReader inputfile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(FilenameAbb + ".out"), "UTF-8"));
        line = "";
        String pmid = "";
        while ((line = inputfile.readLine()) != null) {
            String patt = "^  (.+)\\|(.+)\\|([0-9\\.]+)$";
            Pattern ptmp = Pattern.compile(patt);
            Matcher mtmp = ptmp.matcher(line);
            if (line.matches("^[0-9]+$")) {
                pmid = line;
            }
            if (!mtmp.find()) continue;
            String SF = mtmp.group(1);
            String LF = mtmp.group(2);
            double weight = Double.parseDouble(mtmp.group(3));
            this.data.getPmid2Abb_hash().put(pmid + "\t" + SF, "Abb:SF");
            this.data.getPmid2Abb_hash().put(pmid + "\t" + LF, "Abb:LF");
            this.data.getPmidLF2Abb_lc_hash().put(pmid + "\t" + LF.toLowerCase(), SF.toLowerCase());
            this.data.getPmidAbb2LF_lc_hash().put(pmid + "\t" + SF.toLowerCase(), LF.toLowerCase());
            this.data.getPmidAbb2LF_hash().put(pmid + "\t" + SF, LF);
            if (!(weight >= 0.9)) continue;
            this.data.getPmidLF2Abb_hash().put(pmid + "\t" + LF, SF);
        }
        inputfile.close();
    }

    public void LoadInputFile(String Filename, String FilenameAbb, String TrainTest) throws XMLStreamException, IOException {
        this.data.getBioCDocobj().BioCReaderWithAnnotation(Filename);
        String line = "";
        BufferedWriter FileAbb = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(FilenameAbb), "UTF-8"));
        for (int i = 0; i < this.data.getBioCDocobj().PMIDs.size(); ++i) {
            String Pmid = this.data.getBioCDocobj().PMIDs.get(i);
            Object Context = "Text:";
            for (int j = 0; j < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j) {
                String PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j);
                if (PassageContext.matches(".*\\([^\\(\\)]+,[^\\(\\)]+\\).*")) {
                    PassageContext = PassageContext.replaceAll("\\([^\\(\\)]+,[^\\(\\)]+\\)", "");
                }
                if (!PassageContext.contains("(")) continue;
                Context = (String)Context + PassageContext + " ";
            }
            FileAbb.write(Pmid + "\n" + (String)Context + "\n\n");
        }
        FileAbb.close();
        File f = new File(FilenameAbb + ".out");
        final BufferedWriter fr = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(f), "UTF-8"));
        Runtime runtime = Runtime.getRuntime();
        String cmd = "./Ab3P " + FilenameAbb + ".Abb " + FilenameAbb + ".out";
        String OS = System.getProperty("os.name").toLowerCase();
        cmd = OS.contains("windows") ? "java -jar bioadi.jar " + FilenameAbb : "./Ab3P " + FilenameAbb + " " + FilenameAbb + ".out";
        final String finalCmd = cmd;
        final Process process = runtime.exec(finalCmd);
        Thread t = new Thread("GNP Ab3P Runner"){

            @Override
            public void run() {
                super.run();
                try {
                    System.out.println("Starting to find abbreviations with command " + finalCmd);
                    InputStream is = process.getInputStream();
                    InputStreamReader isr = new InputStreamReader(is, "UTF-8");
                    BufferedReader br = new BufferedReader(isr);
                    String line = "";
                    while ((line = br.readLine()) != null) {
                        fr.write(line);
                        fr.newLine();
                        fr.flush();
                    }
                    is.close();
                    isr.close();
                    br.close();
                    fr.close();
                }
                catch (IOException e) {
                    System.err.println("Error in Thread to run cmd " + finalCmd);
                    e.printStackTrace();
                }
            }
        };
        t.start();
        try {
            process.waitFor(10L, TimeUnit.MINUTES);
        }
        catch (InterruptedException e) {
            System.err.println("Command " + finalCmd + " was interrupted because it took too long.");
        }
        BufferedReader inputfile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(FilenameAbb + ".out"), "UTF-8"));
        line = "";
        String pmid = "";
        while ((line = inputfile.readLine()) != null) {
            String patt = "^  (.+)\\|(.+)\\|([0-9\\.]+)$";
            Pattern ptmp = Pattern.compile(patt);
            Matcher mtmp = ptmp.matcher(line);
            if (line.matches("^[0-9]+$")) {
                pmid = line;
            }
            if (!mtmp.find()) continue;
            String SF = mtmp.group(1);
            String LF = mtmp.group(2);
            double weight = Double.parseDouble(mtmp.group(3));
            this.data.getPmid2Abb_hash().put(pmid + "\t" + SF, "Abb:SF");
            this.data.getPmid2Abb_hash().put(pmid + "\t" + LF, "Abb:LF");
            this.data.getPmidLF2Abb_lc_hash().put(pmid + "\t" + LF.toLowerCase(), SF.toLowerCase());
            this.data.getPmidAbb2LF_lc_hash().put(pmid + "\t" + SF.toLowerCase(), LF.toLowerCase());
            this.data.getPmidAbb2LF_hash().put(pmid + "\t" + SF, LF);
            if (!(weight >= 0.9)) continue;
            this.data.getPmidLF2Abb_hash().put(pmid + "\t" + LF, SF);
        }
        inputfile.close();
    }

    public void FeatureExtraction(String FilenameData, String FilenameLoca, String TrainTest) throws XMLStreamException {
        try {
            BufferedWriter FileLocation = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(FilenameLoca), "UTF-8"));
            BufferedWriter FileData = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(FilenameData), "UTF-8"));
            englishStemmer stemmer = new englishStemmer();
            for (int i = 0; i < this.data.getBioCDocobj().PMIDs.size(); ++i) {
                String Pmid = this.data.getBioCDocobj().PMIDs.get(i);
                for (int j = 0; j < this.data.getBioCDocobj().PassageNames.get(i).size(); ++j) {
                    int k;
                    String PassageName = this.data.getBioCDocobj().PassageNames.get(i).get(j);
                    int PassageOffset = this.data.getBioCDocobj().PassageOffsets.get(i).get(j);
                    String PassageContext = this.data.getBioCDocobj().PassageContexts.get(i).get(j);
                    ArrayList<String> Annotation = this.data.getBioCDocobj().Annotations.get(i).get(j);
                    HashMap<Integer, String> CTDGene_hash = new HashMap<Integer, String>();
                    HashMap<Integer, String> FamilyName_hash = new HashMap<Integer, String>();
                    HashMap<Integer, CallSite> character_hash = new HashMap<Integer, CallSite>();
                    HashMap<Integer, String> Abbreviation_hash = new HashMap<Integer, String>();
                    Object PassageContext_tmp = " " + PassageContext + " ";
                    HashMap<Integer, String> Abb_sortebylength = new HashMap<Integer, String>();
                    ArrayList<Integer> length_list = new ArrayList<Integer>();
                    int countn = 0;
                    for (String key : this.data.getPmid2Abb_hash().keySet()) {
                        String[] pmid2abb = key.toString().split("\t");
                        if (!Pmid.equals(pmid2abb[0])) continue;
                        Abb_sortebylength.put(pmid2abb[1].length() * 100 + countn, pmid2abb[1]);
                        length_list.add(pmid2abb[1].length() * 100 + countn);
                        ++countn;
                    }
                    Collections.sort(length_list);
                    for (int l = length_list.size() - 1; l >= 0; --l) {
                        String AbbLF = (String)Abb_sortebylength.get(length_list.get(l));
                        AbbLF = AbbLF.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1");
                        AbbLF = AbbLF.replaceAll(" ", "\\[ \\]\\+");
                        Pattern ptmp = Pattern.compile("^(.*[^A-Za-z0-9]+)(" + AbbLF + ")([^A-Za-z0-9]+.*)$");
                        Matcher mtmp = ptmp.matcher((CharSequence)PassageContext_tmp);
                        while (mtmp.find()) {
                            String str1 = mtmp.group(1);
                            String str2 = mtmp.group(2);
                            String str3 = mtmp.group(3);
                            for (int m = str1.length(); m <= str1.length() + str2.length(); ++m) {
                                Abbreviation_hash.put(m - 1, this.data.getPmid2Abb_hash().get(Pmid + "\t" + (String)Abb_sortebylength.get(length_list.get(l))));
                            }
                            Object men = "";
                            for (int m = 0; m < str2.length(); ++m) {
                                men = (String)men + "@";
                            }
                            PassageContext_tmp = str1 + (String)men + str3;
                            mtmp = ptmp.matcher((CharSequence)PassageContext_tmp);
                        }
                    }
                    ArrayList<String> locations = GNormPlus.PT_CTDGene.SearchMentionLocation(PassageContext, "CTDGene");
                    for (int k2 = 0; k2 < locations.size(); ++k2) {
                        String[] anno = locations.get(k2).split("\t");
                        int start = Integer.parseInt(anno[0]) + PassageOffset;
                        int last = Integer.parseInt(anno[1]) + PassageOffset;
                        String mention = anno[2];
                        String id = anno[3];
                        CTDGene_hash.put(start, "CTDGene_B");
                        CTDGene_hash.put(last, "CTDGene_E");
                        for (int s = start + 1; s < last; ++s) {
                            CTDGene_hash.put(s, "CTDGene_I");
                        }
                    }
                    ArrayList<String> locations_Fname = GNormPlus.PT_FamilyName.SearchMentionLocation(PassageContext, "FamilyName");
                    for (k = 0; k < locations_Fname.size(); ++k) {
                        String[] anno = locations_Fname.get(k).split("\t");
                        int start = Integer.parseInt(anno[0]) + PassageOffset;
                        int last = Integer.parseInt(anno[1]) + PassageOffset;
                        String mention = anno[2];
                        String id = anno[3];
                        if (CTDGene_hash.containsKey(start)) continue;
                        FamilyName_hash.put(start, "famplex_B");
                        FamilyName_hash.put(last, "famplex_E");
                        for (int s = start + 1; s < last; ++s) {
                            FamilyName_hash.put(s, "famplex_I");
                        }
                    }
                    for (k = 0; k < Annotation.size(); ++k) {
                        String[] anno = Annotation.get(k).split("\t");
                        int start = Integer.parseInt(anno[0]);
                        int last = Integer.parseInt(anno[1]);
                        String type = anno[3];
                        character_hash.put(start, (CallSite)((Object)(type + "_B")));
                        character_hash.put(last, (CallSite)((Object)(type + "_E")));
                        for (int s = start + 1; s < last; ++s) {
                            character_hash.put(s, (CallSite)((Object)(type + "_I")));
                        }
                    }
                    String PassageContext_rev = PassageContext;
                    PassageContext_rev = PassageContext_rev.replaceAll("([A-Z][A-Z])([A-Z][0-9][0-9]+[A-Z][\\W\\-\\_])", "$1 $2");
                    PassageContext_rev = PassageContext_rev.replaceAll("([0-9])([A-Za-z])", "$1 $2");
                    PassageContext_rev = PassageContext_rev.replaceAll("([A-Za-z])([0-9])", "$1 $2");
                    PassageContext_rev = PassageContext_rev.replaceAll("([\\W])", " $1 ");
                    PassageContext_rev = PassageContext_rev.replaceAll("[ ]+", " ");
                    PassageContext_tmp = PassageContext;
                    int Offset = 0;
                    String[] tokens = PassageContext_rev.split(" ");
                    for (int p = 0; p < tokens.length; ++p) {
                        String WSB = "WSB:NoGap";
                        String WSF = "WSF:NoGap";
                        while (((String)PassageContext_tmp).substring(0, 1).equals(" ")) {
                            PassageContext_tmp = ((String)PassageContext_tmp).substring(1);
                            ++Offset;
                            WSB = "WSB:Gap";
                        }
                        if (((String)PassageContext_tmp).length() > tokens[p].length() && ((String)PassageContext_tmp).substring(tokens[p].length(), tokens[p].length() + 1).equals(" ")) {
                            WSF = "WSF:Gap";
                        }
                        if (p == 0) {
                            WSB = "WSB:1st";
                        } else if (p == tokens.length - 1) {
                            WSF = "WSF:last";
                        }
                        if (!((String)PassageContext_tmp).substring(0, tokens[p].length()).equals(tokens[p]) || tokens[p].length() <= 0) continue;
                        int start = Offset;
                        int last = Offset + tokens[p].length();
                        String State2 = "";
                        if (character_hash.containsKey(start) && character_hash.containsKey(last)) {
                            if (((String)character_hash.get(start)).matches(".*B$")) {
                                State2 = (String)character_hash.get(start);
                            } else if (((String)character_hash.get(last)).matches(".*E$")) {
                                State2 = (String)character_hash.get(last);
                            } else if (((String)character_hash.get(start)).matches(".*I$")) {
                                State2 = (String)character_hash.get(start);
                            }
                        }
                        if (!tokens[p].equals("\t")) {
                            FileLocation.write(Pmid + "\t" + PassageName + "\t" + j + "\t" + tokens[p] + "\t" + (Offset + 1) + "\t" + (Offset + tokens[p].length()) + "\t" + State2 + "\n");
                        }
                        String Abb_State = "__nil__";
                        if (!Abbreviation_hash.containsKey(start) || !Abbreviation_hash.containsKey(last)) {
                            Abb_State = "__nil__";
                        } else if (Abbreviation_hash.containsKey(start)) {
                            Abb_State = (String)Abbreviation_hash.get(start);
                        }
                        start = PassageOffset + Offset;
                        last = PassageOffset + Offset + tokens[p].length();
                        String CTDGene_State = "__nil__";
                        if (!CTDGene_hash.containsKey(start) || !CTDGene_hash.containsKey(last)) {
                            CTDGene_State = "__nil__";
                        } else if (((String)CTDGene_hash.get(start)).matches(".*B$")) {
                            CTDGene_State = (String)CTDGene_hash.get(start);
                        } else if (((String)CTDGene_hash.get(last)).matches(".*E$")) {
                            CTDGene_State = (String)CTDGene_hash.get(last);
                        } else if (((String)CTDGene_hash.get(start)).matches(".*I$")) {
                            CTDGene_State = (String)CTDGene_hash.get(start);
                        }
                        if (CTDGene_State.equals("__nil__")) {
                            start = PassageOffset + Offset;
                            last = PassageOffset + Offset + tokens[p].length();
                            if (FamilyName_hash.containsKey(start) && FamilyName_hash.containsKey(last)) {
                                if (((String)FamilyName_hash.get(start)).matches(".*B$")) {
                                    CTDGene_State = (String)FamilyName_hash.get(start);
                                } else if (((String)FamilyName_hash.get(last)).matches(".*E$")) {
                                    CTDGene_State = (String)FamilyName_hash.get(last);
                                } else if (((String)FamilyName_hash.get(start)).matches(".*I$")) {
                                    CTDGene_State = (String)FamilyName_hash.get(start);
                                }
                            }
                        }
                        stemmer.setCurrent(tokens[p].toLowerCase());
                        stemmer.stem();
                        String stem = stemmer.getCurrent();
                        Object Num_num = "";
                        String tmp = tokens[p];
                        tmp = tmp.replaceAll("[^0-9]", "");
                        Num_num = tmp.length() > 3 ? "N:4+" : "N:" + tmp.length();
                        Object Num_Uc = "";
                        tmp = tokens[p];
                        tmp = tmp.replaceAll("[^A-Z]", "");
                        Num_Uc = tmp.length() > 3 ? "U:4+" : "U:" + tmp.length();
                        Object Num_lc = "";
                        tmp = tokens[p];
                        tmp = tmp.replaceAll("[^a-z]", "");
                        Num_lc = tmp.length() > 3 ? "L:4+" : "L:" + tmp.length();
                        Object Num_All = "";
                        Num_All = tokens[p].length() > 3 ? "A:4+" : "A:" + tokens[p].length();
                        String SpecificC = "__nil__";
                        if (tokens[p].equals(";") || tokens[p].equals(":") || tokens[p].equals(",") || tokens[p].equals(".") || tokens[p].equals("-") || tokens[p].equals(">") || tokens[p].equals("+") || tokens[p].equals("_")) {
                            SpecificC = "-SpecificC1-";
                        } else if (tokens[p].equals("(") || tokens[p].equals(")")) {
                            SpecificC = "-SpecificC2-";
                        } else if (tokens[p].equals("{") || tokens[p].equals("}")) {
                            SpecificC = "-SpecificC3-";
                        } else if (tokens[p].equals("[") || tokens[p].equals("]")) {
                            SpecificC = "-SpecificC4-";
                        } else if (tokens[p].equals("\\") || tokens[p].equals("/")) {
                            SpecificC = "-SpecificC5-";
                        }
                        String ChemPreSuf = "__nil__";
                        if (tokens[p].matches(".*(yl|ylidyne|oyl|sulfonyl)")) {
                            ChemPreSuf = "-CHEMinlineSuffix-";
                        } else if (tokens[p].matches("(meth|eth|prop|tetracos).*")) {
                            ChemPreSuf = "-CHEMalkaneStem-";
                        } else if (tokens[p].matches("(di|tri|tetra).*")) {
                            ChemPreSuf = "-CHEMsimpleMultiplier-";
                        } else if (tokens[p].matches("(benzen|pyridin|toluen).*")) {
                            ChemPreSuf = "-CHEMtrivialRing-";
                        } else if (tokens[p].matches(".*(one|ol|carboxylic|amide|ate|acid|ium|ylium|ide|uide|iran|olan|inan|pyrid|acrid|amid|keten|formazan|fydrazin)(s|)")) {
                            ChemPreSuf = "-CHEMsuffix-";
                        }
                        String MentionType = "__nil__";
                        if (tokens[p].matches("(ytochrome|cytochrome)")) {
                            MentionType = "-Type_cytochrome-";
                        } else if (tokens[p].matches(".*target")) {
                            MentionType = "-Type_target-";
                        } else if (tokens[p].matches(".*(irradiation|hybrid|fusion|experiment|gst|est|gap|antigen)")) {
                            MentionType = "-Type_ExperimentNoun-";
                        } else if (tokens[p].matches(".*(disease|disorder|dystrophy|deficiency|syndrome|dysgenesis|cancer|injury|neoplasm|diabetes|diabete)")) {
                            MentionType = "-Type_Disease-";
                        } else if (tokens[p].matches(".*(motif|domain|omain|binding|site|region|sequence|frameshift|finger|box).*")) {
                            MentionType = "-Type_DomainMotif-";
                        } else if (tokens[p].equals("-") && p < tokens.length - 1 && tokens[p + 1].matches(".*(motif|domain|omain|binding|site|region|sequence|frameshift|finger|box).*")) {
                            MentionType = "-Type_DomainMotif-";
                        } else if (tokens[p].matches("[rmc]") && p < tokens.length - 1 && (tokens[p + 1].equals("DNA") || tokens[p + 1].equals("RNA"))) {
                            MentionType = "-Type_DomainMotif-";
                        } else if (tokens[p].matches(".*(famil|complex|cluster|proteins|genes|factors|transporter|proteinase|membrane|ligand|enzyme|channels|tors$|ase$|ases$)")) {
                            MentionType = "-Type_Family-";
                        } else if (tokens[p].toLowerCase().matches("^marker")) {
                            MentionType = "-Type_Marker-";
                        } else if (tokens[p].equals(".*cell.*") || p < tokens.length - 1 && tokens[p + 1].equals("cell") && tokens[p].matches("^(T|B|monocytic|cancer|tumor|myeloma|epithelial|crypt)$")) {
                            MentionType = "-Type_Cell-";
                        } else if (tokens[p].equals(".*chromosome.*")) {
                            MentionType = "-Type_Chromosome-";
                        } else if (tokens[p].matches("[pq]") && (p < tokens.length - 1 && tokens[p + 1].matches("^[0-9]+$") || p > 0 && tokens[p - 1].matches("^[0-9]+$"))) {
                            MentionType = "-Type_ChromosomeStrain-";
                        } else if (tokens[p].matches(".*(related|regulated|associated|correlated|reactive).*")) {
                            MentionType = "-Type_relation-";
                        } else if (tokens[p].toLowerCase().matches(".*(polymorphism|mutation|deletion|insertion|duplication|genotype|genotypes).*")) {
                            MentionType = "-Type_VariationTerms-";
                        } else if (tokens[p].matches(".*(oxidase|transferase|transferases|kinase|kinese|subunit|unit|receptor|adrenoceptor|transporter|regulator|transcription|antigen|protein|gene|factor|member|molecule|channel|deaminase|spectrin).*")) {
                            MentionType = "-Type_suffix-";
                        } else if (tokens[p].matches("[\\(\\-\\_]") && p < tokens.length - 1 && tokens[p + 1].toLowerCase().matches(".*(alpha|beta|gamma|delta|theta|kappa|zeta|sigma|omega|i|ii|iii|iv|v|vi|[abcdefgyr])")) {
                            MentionType = "-Type_strain-";
                        } else if (tokens[p].matches("(alpha|beta|gamma|delta|theta|kappa|zeta|sigma|omega|i|ii|iii|iv|v|vi|[abcdefgyr])")) {
                            MentionType = "-Type_strain-";
                        }
                        String ProteinSym = "__nil__";
                        if (tokens[p].matches(".*(glutamine|glutamic|leucine|valine|isoleucine|lysine|alanine|glycine|aspartate|methionine|threonine|histidine|aspartic|asparticacid|arginine|asparagine|tryptophan|proline|phenylalanine|cysteine|serine|glutamate|tyrosine|stop|frameshift).*")) {
                            ProteinSym = "-ProteinSymFull-";
                        } else if (tokens[p].matches("(cys|ile|ser|gln|met|asn|pro|lys|asp|thr|phe|ala|gly|his|leu|arg|trp|val|glu|tyr|fs|fsx)")) {
                            ProteinSym = "-ProteinSymTri-";
                        } else if (tokens[p].matches("[CISQMNPKDTFAGHLRWVEYX]")) {
                            ProteinSym = "-ProteinSymChar-";
                        }
                        Object prefix = "";
                        tmp = tokens[p];
                        prefix = tmp.length() >= 1 ? tmp.substring(0, 1) : "__nil__";
                        prefix = tmp.length() >= 2 ? (String)prefix + " " + tmp.substring(0, 2) : (String)prefix + " __nil__";
                        prefix = tmp.length() >= 3 ? (String)prefix + " " + tmp.substring(0, 3) : (String)prefix + " __nil__";
                        prefix = tmp.length() >= 4 ? (String)prefix + " " + tmp.substring(0, 4) : (String)prefix + " __nil__";
                        prefix = tmp.length() >= 5 ? (String)prefix + " " + tmp.substring(0, 5) : (String)prefix + " __nil__";
                        Object suffix = "";
                        tmp = tokens[p];
                        suffix = tmp.length() >= 1 ? tmp.substring(tmp.length() - 1, tmp.length()) : "__nil__";
                        suffix = tmp.length() >= 2 ? (String)suffix + " " + tmp.substring(tmp.length() - 2, tmp.length()) : (String)suffix + " __nil__";
                        suffix = tmp.length() >= 3 ? (String)suffix + " " + tmp.substring(tmp.length() - 3, tmp.length()) : (String)suffix + " __nil__";
                        suffix = tmp.length() >= 4 ? (String)suffix + " " + tmp.substring(tmp.length() - 4, tmp.length()) : (String)suffix + " __nil__";
                        suffix = tmp.length() >= 5 ? (String)suffix + " " + tmp.substring(tmp.length() - 5, tmp.length()) : (String)suffix + " __nil__";
                        if (State2.equals("")) {
                            State2 = "O";
                        }
                        if (!tokens[p].equals("\t")) {
                            if (TrainTest.equals("Train")) {
                                FileData.write(tokens[p] + " " + stem + " " + WSB + " " + WSF + " " + (String)Num_num + " " + (String)Num_Uc + " " + (String)Num_lc + " " + (String)Num_All + " " + SpecificC + " " + ChemPreSuf + " " + MentionType + " " + ProteinSym + " " + (String)prefix + " " + (String)suffix + " " + CTDGene_State + " " + Abb_State + " " + State2 + "\n");
                            } else {
                                FileData.write(tokens[p] + " " + stem + " " + WSB + " " + WSF + " " + (String)Num_num + " " + (String)Num_Uc + " " + (String)Num_lc + " " + (String)Num_All + " " + SpecificC + " " + ChemPreSuf + " " + MentionType + " " + ProteinSym + " " + (String)prefix + " " + (String)suffix + " " + CTDGene_State + " " + Abb_State + "\n");
                            }
                        }
                        PassageContext_tmp = ((String)PassageContext_tmp).substring(tokens[p].length());
                        Offset += tokens[p].length();
                    }
                    if (tokens.length <= 0) continue;
                    FileLocation.write("\n");
                    FileData.write("\n");
                }
            }
            FileLocation.close();
            FileData.close();
        }
        catch (IOException e1) {
            System.out.println("[MR]: Input file is not exist.");
        }
    }

    public void CRF_test(String model, String FilenameData, String FilenameOutput) throws IOException {
        File f = new File(FilenameOutput);
        BufferedWriter fr = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(f), "UTF-8"));
        Runtime runtime = Runtime.getRuntime();
        String OS = System.getProperty("os.name").toLowerCase();
        String cmd = "./CRF/crf_test -m " + model + " -o " + FilenameOutput + " " + FilenameData;
        cmd = OS.contains("windows") ? "CRF/crf_test -m " + model + " -o " + FilenameOutput + " " + FilenameData : "./CRF/crf_test -m " + model + " -o " + FilenameOutput + " " + FilenameData;
        try {
            Process process = runtime.exec(cmd);
            InputStream is = process.getInputStream();
            InputStreamReader isr = new InputStreamReader(is, "UTF-8");
            BufferedReader br = new BufferedReader(isr);
            String line = "";
            while ((line = br.readLine()) != null) {
                fr.write(line);
                fr.newLine();
                fr.flush();
            }
            is.close();
            isr.close();
            br.close();
            fr.close();
        }
        catch (IOException e) {
            System.out.println(e);
            runtime.exit(0);
        }
    }

    public void CRF_test(String model, String FilenameData, String FilenameOutput, String top3) throws IOException {
        File f = new File(FilenameOutput);
        BufferedWriter fr = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(f), "UTF-8"));
        Runtime runtime = Runtime.getRuntime();
        String OS = System.getProperty("os.name").toLowerCase();
        String cmd = "./CRF/crf_test -n 3 -m " + model + " -o " + FilenameOutput + " " + FilenameData;
        cmd = OS.contains("windows") ? "CRF/crf_test -n 3 -m " + model + " -o " + FilenameOutput + " " + FilenameData : "./CRF/crf_test -n 3 -m " + model + " -o " + FilenameOutput + " " + FilenameData;
        try {
            Process process = runtime.exec(cmd);
            InputStream is = process.getInputStream();
            InputStreamReader isr = new InputStreamReader(is, "UTF-8");
            BufferedReader br = new BufferedReader(isr);
            String line = "";
            while ((line = br.readLine()) != null) {
                fr.write(line);
                fr.newLine();
                fr.flush();
            }
            is.close();
            isr.close();
            br.close();
            fr.close();
        }
        catch (IOException e) {
            System.out.println(e);
            runtime.exit(0);
        }
    }

    public void CRF_learn(String model, String FilenameData) throws IOException {
        Runtime runtime = Runtime.getRuntime();
        Process process = null;
        String line = null;
        InputStream is = null;
        InputStreamReader isr = null;
        BufferedReader br = null;
        String OS = System.getProperty("os.name").toLowerCase();
        String cmd = "./CRF/crf_learn -f 3 -c 4.0 CRF/template_UB " + FilenameData + " " + model;
        cmd = OS.contains("windows") ? "CRF/crf_learn -f 3 -c 4.0 CRF/template_UB " + FilenameData + " " + model : "./CRF/crf_learn -f 3 -c 4.0 CRF/template_UB " + FilenameData + " " + model;
        try {
            process = runtime.exec(cmd);
            is = process.getInputStream();
            isr = new InputStreamReader(is, "UTF-8");
            br = new BufferedReader(isr);
            while ((line = br.readLine()) != null) {
                System.out.println(line);
                System.out.flush();
            }
            is.close();
            isr.close();
            br.close();
        }
        catch (IOException e) {
            System.out.println(e);
            runtime.exit(0);
        }
    }

    public void ReadCRFresult(String Filename, String FilenameLoca, String FilenameOutput, String FilenameBioC) throws XMLStreamException, IOException {
        String line;
        ArrayList<String> outputArr = new ArrayList<String>();
        BufferedReader inputfile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(FilenameOutput), "UTF-8"));
        while ((line = inputfile.readLine()) != null) {
            outputArr.add(line);
        }
        inputfile.close();
        ArrayList<String> locationArr = new ArrayList<String>();
        inputfile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(FilenameLoca), "UTF-8"));
        while ((line = inputfile.readLine()) != null) {
            locationArr.add(line);
        }
        inputfile.close();
        String pmid_last = "";
        String paragraph_num_last = "";
        String pmid = "";
        String paragraph = "";
        String paragraph_num = "";
        Pattern pat_B = Pattern.compile("((FamilyName|DomainMotif|Gene)_[B])$");
        Pattern pat_IE = Pattern.compile("((FamilyName|DomainMotif|Gene)_[IE])$");
        ArrayList AnnotationInPMID = new ArrayList();
        ArrayList<CallSite> AnnotationInPassage = new ArrayList<CallSite>();
        this.data.getBioCDocobj().Annotations = new ArrayList();
        int countPMID = 0;
        int countPassage = 0;
        for (int i = 0; i < outputArr.size(); ++i) {
            String[] outputsRow = ((String)outputArr.get(i)).split("\\t");
            String[] locationRow = ((String)locationArr.get(i)).split("\\t");
            int start = 100000;
            int last = 0;
            String MentionType = "";
            if (locationRow.length > 3) {
                pmid = locationRow[0];
                paragraph = locationRow[1];
                paragraph_num = locationRow[2];
            }
            if (!paragraph_num_last.equals("") && !paragraph_num.equals(paragraph_num_last)) {
                AnnotationInPMID.add(AnnotationInPassage);
                AnnotationInPassage = new ArrayList();
                ++countPassage;
            }
            if (!pmid_last.equals("") && !pmid.equals(pmid_last)) {
                this.data.getBioCDocobj().Annotations.add(AnnotationInPMID);
                AnnotationInPMID = new ArrayList();
                ++countPMID;
                countPassage = 0;
            }
            boolean F = false;
            if (locationRow.length > 2) {
                Matcher mat = pat_B.matcher(outputsRow[outputsRow.length - 1]);
                while (mat.find() && locationRow.length == 6) {
                    MentionType = mat.group(2);
                    pmid = locationRow[0];
                    paragraph_num = locationRow[2];
                    int start_tmp = Integer.parseInt(locationRow[4]) - 1;
                    int last_tmp = Integer.parseInt(locationRow[5]);
                    if (start_tmp < start) {
                        start = start_tmp;
                    }
                    if (last_tmp > last) {
                        last = last_tmp;
                    }
                    F = true;
                    if (((String)locationArr.get(++i)).length() <= 0) break;
                    outputsRow = ((String)outputArr.get(i)).split("\\t");
                    locationRow = ((String)locationArr.get(i)).split("\\t");
                    mat = pat_IE.matcher(outputsRow[outputsRow.length - 1]);
                }
            }
            if (F) {
                String PassageContext = this.data.getBioCDocobj().PassageContexts.get(countPMID).get(countPassage);
                String Mention = PassageContext.substring(start, last);
                String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", "");
                if (!(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*") || Mention.matches("[A-Z][A-Z]s") || Mention.matches(".*\\|.*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*"))) {
                    AnnotationInPassage.add((CallSite)((Object)(start + "\t" + last + "\t" + Mention + "\t" + MentionType)));
                }
                --i;
            }
            paragraph_num_last = paragraph_num;
            pmid_last = pmid;
        }
        AnnotationInPMID.add(AnnotationInPassage);
        this.data.getBioCDocobj().Annotations.add(AnnotationInPMID);
    }

    public void ReadCRFresult(String Filename, String FilenameLoca, String FilenameOutput, String FilenameBioC, double threshold, double threshold_GeneType) throws XMLStreamException, IOException {
        String Mention_tmp;
        int lastj;
        int startj;
        String[] GetData;
        int j;
        boolean overlap;
        String PassageContext;
        Matcher mat;
        boolean F;
        String MentionType;
        int last;
        int start;
        String[] locationRow;
        String[] outputsRow;
        int i;
        String line;
        ArrayList<String> outputArr1 = new ArrayList<String>();
        ArrayList<String> outputArr2 = new ArrayList<String>();
        ArrayList<String> outputArr3 = new ArrayList<String>();
        ArrayList<String> outputArr1_score = new ArrayList<String>();
        ArrayList<String> outputArr2_score = new ArrayList<String>();
        ArrayList<String> outputArr3_score = new ArrayList<String>();
        BufferedReader inputfile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(FilenameOutput), "UTF-8"));
        int rank = 0;
        String score = "";
        Pattern pat_Rank = Pattern.compile("^# ([0-2]) ([0-9\\.]+)$");
        while ((line = inputfile.readLine()) != null) {
            Matcher mat2 = pat_Rank.matcher(line);
            if (mat2.find()) {
                rank = Integer.parseInt(mat2.group(1));
                score = mat2.group(2);
                continue;
            }
            if (rank == 0) {
                outputArr1.add(line);
                outputArr1_score.add(score);
                continue;
            }
            if (rank == 1) {
                outputArr2.add(line);
                outputArr2_score.add(score);
                continue;
            }
            if (rank != 2) continue;
            outputArr3.add(line);
            outputArr3_score.add(score);
        }
        inputfile.close();
        ArrayList<String> locationArr = new ArrayList<String>();
        inputfile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(FilenameLoca), "UTF-8"));
        while ((line = inputfile.readLine()) != null) {
            locationArr.add(line);
        }
        inputfile.close();
        String pmid_last = "";
        String paragraph_num_last = "";
        String pmid = "";
        String paragraph = "";
        String paragraph_num = "";
        Pattern pat_B = Pattern.compile("((FamilyName|DomainMotif|Gene)_[B])$");
        Pattern pat_IE = Pattern.compile("((FamilyName|DomainMotif|Gene)_[IE])$");
        ArrayList AnnotationInPMID = new ArrayList();
        ArrayList<CallSite> AnnotationInPassage = new ArrayList<CallSite>();
        this.data.getBioCDocobj().Annotations = new ArrayList();
        int countPMID = 0;
        int countPassage = 0;
        int size_Arr = outputArr1.size();
        if (locationArr.size() < outputArr1.size()) {
            size_Arr = locationArr.size();
        }
        for (i = 0; i < size_Arr; ++i) {
            outputsRow = ((String)outputArr1.get(i)).split("\\t");
            locationRow = ((String)locationArr.get(i)).split("\\t");
            start = 100000;
            last = 0;
            MentionType = "";
            if (locationRow.length > 3) {
                pmid = locationRow[0];
                paragraph = locationRow[1];
                paragraph_num = locationRow[2];
            }
            F = false;
            if (outputsRow.length >= 1) {
                mat = pat_B.matcher(outputsRow[outputsRow.length - 1]);
                while (mat.find() && locationRow.length == 6) {
                    MentionType = mat.group(2);
                    pmid = locationRow[0];
                    int start_tmp = Integer.parseInt(locationRow[4]) - 1;
                    int last_tmp = Integer.parseInt(locationRow[5]);
                    if (start_tmp < start) {
                        start = start_tmp;
                    }
                    if (last_tmp > last) {
                        last = last_tmp;
                    }
                    outputsRow = ((String)outputArr1.get(++i)).split("\\t");
                    locationRow = ((String)locationArr.get(i)).split("\\t");
                    mat = pat_IE.matcher(outputsRow[outputsRow.length - 1]);
                    F = true;
                }
            }
            if (!paragraph_num_last.equals("") && !paragraph_num.equals(paragraph_num_last)) {
                AnnotationInPMID.add(AnnotationInPassage);
                AnnotationInPassage = new ArrayList();
                ++countPassage;
            }
            if (!pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0")) {
                AnnotationInPMID.add(AnnotationInPassage);
                AnnotationInPassage = new ArrayList();
                this.data.getBioCDocobj().Annotations.add(AnnotationInPMID);
                AnnotationInPMID = new ArrayList();
                ++countPMID;
                countPassage = 0;
            } else if (!pmid_last.equals("") && !pmid.equals(pmid_last)) {
                this.data.getBioCDocobj().Annotations.add(AnnotationInPMID);
                AnnotationInPMID = new ArrayList();
                ++countPMID;
                countPassage = 0;
            }
            if (F) {
                if (this.data.getBioCDocobj().PassageContexts.size() > countPMID && this.data.getBioCDocobj().PassageContexts.get(countPMID).size() > countPassage && this.data.getBioCDocobj().PassageContexts.get(countPMID).get(countPassage).length() >= last && last - start < 1000) {
                    PassageContext = this.data.getBioCDocobj().PassageContexts.get(countPMID).get(countPassage);
                    String Mention = PassageContext.substring(start, last);
                    String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", "");
                    if (!(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*") || Mention.matches("[A-Z][A-Z]s") || Mention.matches(".*\\|.*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*") || GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase()) && PassageContext.toLowerCase().matches(".*[\\W\\-\\-](" + GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase()) + ")[\\W\\-\\-].*"))) {
                        AnnotationInPassage.add((CallSite)((Object)(start + "\t" + last + "\t" + Mention + "\t" + MentionType)));
                    }
                }
                --i;
            }
            paragraph_num_last = paragraph_num;
            pmid_last = pmid;
        }
        AnnotationInPMID.add(AnnotationInPassage);
        this.data.getBioCDocobj().Annotations.add(AnnotationInPMID);
        pmid_last = "";
        paragraph_num_last = "";
        pmid = "";
        paragraph = "";
        paragraph_num = "";
        countPMID = 0;
        countPassage = 0;
        size_Arr = outputArr2.size();
        if (locationArr.size() < outputArr2.size()) {
            size_Arr = locationArr.size();
        }
        for (i = 0; i < size_Arr; ++i) {
            outputsRow = ((String)outputArr2.get(i)).split("\\t");
            locationRow = ((String)locationArr.get(i)).split("\\t");
            start = 100000;
            last = 0;
            MentionType = "";
            if (locationRow.length > 2) {
                pmid = locationRow[0];
                paragraph = locationRow[1];
                paragraph_num = locationRow[2];
            }
            F = false;
            if (outputsRow.length >= 1) {
                mat = pat_B.matcher(outputsRow[outputsRow.length - 1]);
                while (mat.find() && locationRow.length == 6) {
                    MentionType = mat.group(2);
                    pmid = locationRow[0];
                    int start_tmp = Integer.parseInt(locationRow[4]) - 1;
                    int last_tmp = Integer.parseInt(locationRow[5]);
                    if (start_tmp < start) {
                        start = start_tmp;
                    }
                    if (last_tmp > last) {
                        last = last_tmp;
                    }
                    outputsRow = ((String)outputArr2.get(++i)).split("\\t");
                    locationRow = ((String)locationArr.get(i)).split("\\t");
                    mat = pat_IE.matcher(outputsRow[outputsRow.length - 1]);
                    F = true;
                }
            }
            if (!paragraph_num_last.equals("") && !paragraph_num.equals(paragraph_num_last)) {
                ++countPassage;
            }
            if (!pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0")) {
                ++countPMID;
                countPassage = 0;
            } else if (!pmid_last.equals("") && !pmid.equals(pmid_last)) {
                ++countPMID;
                countPassage = 0;
            }
            if (F) {
                if (this.data.getBioCDocobj().PassageContexts.size() > countPMID && this.data.getBioCDocobj().PassageContexts.get(countPMID).size() > countPassage && this.data.getBioCDocobj().PassageContexts.get(countPMID).get(countPassage).length() >= last && last - start < 1000) {
                    PassageContext = this.data.getBioCDocobj().PassageContexts.get(countPMID).get(countPassage);
                    String Mention = PassageContext.substring(start, last);
                    String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", "");
                    if (!(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*") || Mention.matches("[A-Z][A-Z]s") || Mention.matches(".*\\|.*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*") || GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase()) && PassageContext.toLowerCase().matches(".*[\\W\\-\\-](" + GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase()) + ")[\\W\\-\\-].*") || !(Double.parseDouble((String)outputArr2_score.get(i)) > threshold))) {
                        overlap = false;
                        for (j = 0; j < this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).size(); ++j) {
                            GetData = this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).get(j).split("\t");
                            startj = Integer.parseInt(GetData[0]);
                            lastj = Integer.parseInt(GetData[1]);
                            Mention_tmp = Mention.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1");
                            if (MentionType.equals("Gene") && Double.parseDouble((String)outputArr2_score.get(i)) > threshold_GeneType && this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).get(j).matches(start + "\t" + last + "\t" + Mention_tmp + "\t(FamilyName|DomainMotif)")) {
                                this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).set(j, start + "\t" + last + "\t" + Mention + "\t" + MentionType);
                                continue;
                            }
                            if ((start < startj || start >= lastj) && (last <= startj || last > lastj)) continue;
                            overlap = true;
                        }
                        if (!overlap) {
                            this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).add(start + "\t" + last + "\t" + Mention + "\t" + MentionType);
                        }
                    }
                }
                --i;
            }
            paragraph_num_last = paragraph_num;
            pmid_last = pmid;
        }
        pmid_last = "";
        paragraph_num_last = "";
        pmid = "";
        paragraph = "";
        paragraph_num = "";
        countPMID = 0;
        countPassage = 0;
        size_Arr = outputArr3.size();
        if (locationArr.size() < outputArr3.size()) {
            size_Arr = locationArr.size();
        }
        for (i = 0; i < size_Arr; ++i) {
            outputsRow = ((String)outputArr3.get(i)).split("\\t");
            locationRow = ((String)locationArr.get(i)).split("\\t");
            start = 100000;
            last = 0;
            MentionType = "";
            if (locationRow.length > 2) {
                pmid = locationRow[0];
                paragraph = locationRow[1];
                paragraph_num = locationRow[2];
            }
            F = false;
            if (outputsRow.length >= 1) {
                mat = pat_B.matcher(outputsRow[outputsRow.length - 1]);
                while (mat.find() && locationRow.length == 6) {
                    MentionType = mat.group(2);
                    pmid = locationRow[0];
                    paragraph_num = locationRow[2];
                    int start_tmp = Integer.parseInt(locationRow[4]) - 1;
                    int last_tmp = Integer.parseInt(locationRow[5]);
                    if (start_tmp < start) {
                        start = start_tmp;
                    }
                    if (last_tmp > last) {
                        last = last_tmp;
                    }
                    outputsRow = ((String)outputArr3.get(++i)).split("\\t");
                    locationRow = ((String)locationArr.get(i)).split("\\t");
                    mat = pat_IE.matcher(outputsRow[outputsRow.length - 1]);
                    F = true;
                }
            }
            if (!paragraph_num_last.equals("") && !paragraph_num.equals(paragraph_num_last)) {
                ++countPassage;
            }
            if (!pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0")) {
                ++countPMID;
                countPassage = 0;
            } else if (!pmid_last.equals("") && !pmid.equals(pmid_last)) {
                ++countPMID;
                countPassage = 0;
            }
            if (F) {
                if (this.data.getBioCDocobj().PassageContexts.size() > countPMID && this.data.getBioCDocobj().PassageContexts.get(countPMID).size() > countPassage && this.data.getBioCDocobj().PassageContexts.get(countPMID).get(countPassage).length() >= last && last - start < 1000) {
                    PassageContext = this.data.getBioCDocobj().PassageContexts.get(countPMID).get(countPassage);
                    String Mention = PassageContext.substring(start, last);
                    String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", "");
                    if (!(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*") || Mention.matches("[A-Z][A-Z]s") || Mention.matches(".*\\|.*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*") || Mention_nospace.length() <= 3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*") || GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase()) && PassageContext.toLowerCase().matches(".*[\\W\\-\\-](" + GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase()) + ")[\\W\\-\\-].*") || !(Double.parseDouble((String)outputArr3_score.get(i)) > threshold))) {
                        overlap = false;
                        for (j = 0; j < this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).size(); ++j) {
                            GetData = this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).get(j).split("\t");
                            startj = Integer.parseInt(GetData[0]);
                            lastj = Integer.parseInt(GetData[1]);
                            Mention_tmp = Mention.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1");
                            if (MentionType.equals("Gene") && Double.parseDouble((String)outputArr3_score.get(i)) > threshold_GeneType && this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).get(j).matches(start + "\t" + last + "\t" + Mention_tmp + "\t(FamilyName|DomainMotif)")) {
                                this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).set(j, start + "\t" + last + "\t" + Mention + "\t" + MentionType);
                                continue;
                            }
                            if ((start < startj || start >= lastj) && (last <= startj || last > lastj)) continue;
                            overlap = true;
                        }
                        if (!overlap) {
                            this.data.getBioCDocobj().Annotations.get(countPMID).get(countPassage).add(start + "\t" + last + "\t" + Mention + "\t" + MentionType);
                        }
                    }
                }
                --i;
            }
            paragraph_num_last = paragraph_num;
            pmid_last = pmid;
        }
    }

    public void PostProcessing(String Filename, String FilenameBioC) throws XMLStreamException, IOException {
        String Disease_Suffix = "disease|diseases|syndrome|syndromes|tumor|tumour|deficiency|dysgenesis|atrophy|frame|dystrophy";
        String Cell_Suffix = "cell|cells";
        String FamilyName_Suffix = "disease|diseases|syndrome|syndromes|tumor|tumour|deficiency|dysgenesis|atrophy|frame|dystrophy|frame|factors|family|families|superfamily|superfamilies|subfamily|subfamilies|complex|genes|proteins";
        String DomainMotif_Suffix = "domain|motif|domains|motifs|sequences";
        String Strain_Suffix = "alpha|beta|gamma|kappa|theta|delta|[A-Ga-g0-9]";
        ArrayList Translate2Family = new ArrayList();
        for (int i = 0; i < this.data.getBioCDocobj().Annotations.size(); ++i) {
            String mention;
            int start;
            String post;
            String gmtmp;
            Pattern ptmp;
            Object PassageContexts_tmp;
            int j;
            String type;
            int j2;
            HashMap<String, String> Mention2Type_Hash = new HashMap<String, String>();
            ArrayList<String> GeneMentionPattern = new ArrayList<String>();
            HashMap<CallSite, Integer> MentionType2Num = new HashMap<CallSite, Integer>();
            if (this.data.getBioCDocobj().PMIDs.size() < i) continue;
            String pmid = this.data.getBioCDocobj().PMIDs.get(i);
            for (j2 = 0; j2 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j2) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j2).size(); ++k) {
                    String mentmp;
                    String[] Anno = this.data.getBioCDocobj().Annotations.get(i).get(j2).get(k).split("\\t");
                    String start2 = Anno[0];
                    String last = Anno[1];
                    String mention2 = Anno[2];
                    String type2 = Anno[3];
                    Mention2Type_Hash.put(mention2.toLowerCase(), type2);
                    if (MentionType2Num.containsKey(mention2 + "\t" + type2)) {
                        MentionType2Num.put((CallSite)((Object)(mention2.toLowerCase() + "\t" + type2)), (Integer)MentionType2Num.get(mention2 + "\t" + type2) + 1);
                        if (this.data.getPmidLF2Abb_lc_hash().containsKey(pmid + "\t" + mention2.toLowerCase())) {
                            MentionType2Num.put((CallSite)((Object)(this.data.getPmidLF2Abb_lc_hash().get(pmid + "\t" + mention2.toLowerCase()) + "\t" + type2)), (Integer)MentionType2Num.get(mention2 + "\t" + type2) + 1);
                        } else {
                            MentionType2Num.put((CallSite)((Object)(this.data.getPmidLF2Abb_lc_hash().get(pmid + "\t" + mention2.toLowerCase()) + "\t" + type2)), 1);
                        }
                        if (this.data.getPmidAbb2LF_lc_hash().containsKey(pmid + "\t" + mention2.toLowerCase())) {
                            MentionType2Num.put((CallSite)((Object)(this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + mention2.toLowerCase()) + "\t" + type2)), (Integer)MentionType2Num.get(mention2 + "\t" + type2) + 1);
                        } else {
                            MentionType2Num.put((CallSite)((Object)(this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + mention2.toLowerCase()) + "\t" + type2)), 1);
                        }
                    } else {
                        MentionType2Num.put((CallSite)((Object)(mention2.toLowerCase() + "\t" + type2)), 1);
                        if (this.data.getPmidLF2Abb_lc_hash().containsKey(pmid + "\t" + mention2.toLowerCase())) {
                            MentionType2Num.put((CallSite)((Object)(this.data.getPmidLF2Abb_lc_hash().get(pmid + "\t" + mention2.toLowerCase()) + "\t" + type2)), 1);
                        }
                        if (this.data.getPmidAbb2LF_lc_hash().containsKey(pmid + "\t" + mention2.toLowerCase())) {
                            MentionType2Num.put((CallSite)((Object)(this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + mention2.toLowerCase()) + "\t" + type2)), 1);
                        }
                    }
                    if (!Anno[3].equals("Gene") || !(mentmp = mention2.toLowerCase()).matches(".*[0-9].*") && !mentmp.matches(".*(alpha|beta|gamma|theta|zeta|delta).*") || mentmp.matches(".*\\{(alpha|beta|gamma|theta|zeta|delta)\\}.*")) continue;
                    mentmp = mentmp.replaceAll("([^A-Za-z0-9\\| ])", "\\\\$1");
                    mentmp = mentmp.replaceAll("[0-9]", "[0-9]");
                    if (GeneMentionPattern.contains(mentmp = mentmp.replaceAll("(alpha|beta|gamma|theta|zeta|delta)", "(alpha\\|beta\\|gamma\\|theta\\|zeta\\|delta)"))) continue;
                    GeneMentionPattern.add(mentmp);
                }
            }
            for (j2 = 0; j2 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j2) {
                ArrayList RemoveList = new ArrayList();
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j2).size(); ++k) {
                    String[] Anno = this.data.getBioCDocobj().Annotations.get(i).get(j2).get(k).split("\\t");
                    String start3 = Anno[0];
                    String last = Anno[1];
                    String mention3 = Anno[2];
                    type = Anno[3];
                    String mention_tmp = mention3.toLowerCase().replaceAll("([^A-Za-z0-9@ ])", "\\\\$1");
                    boolean SubSt = false;
                    if (SubSt) continue;
                    int BoundaryLen = 15;
                    if (this.data.getBioCDocobj().PassageContexts.get(i).get(j2).length() < Integer.parseInt(last) + 15) {
                        BoundaryLen = this.data.getBioCDocobj().PassageContexts.get(i).get(j2).length() - Integer.parseInt(last);
                    }
                    String SurroundingString = "";
                    if (BoundaryLen > 0) {
                        if (this.data.getBioCDocobj().PassageContexts.get(i).get(j2).length() < Integer.parseInt(last) + BoundaryLen) {
                            BoundaryLen = this.data.getBioCDocobj().PassageContexts.get(i).get(j2).length() - Integer.parseInt(last) - 1;
                            SurroundingString = this.data.getBioCDocobj().PassageContexts.get(i).get(j2).substring(Integer.parseInt(last), Integer.parseInt(last) + BoundaryLen).toLowerCase();
                        } else {
                            SurroundingString = this.data.getBioCDocobj().PassageContexts.get(i).get(j2).substring(Integer.parseInt(last), Integer.parseInt(last) + BoundaryLen).toLowerCase();
                        }
                    }
                    if (mention3.toLowerCase().matches(".*(" + Cell_Suffix + ")") || SurroundingString.matches("(" + Cell_Suffix + ")")) {
                        type = "Cell";
                        this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\t" + type);
                    } else if (mention3.toLowerCase().matches(".*(" + FamilyName_Suffix + ")") || SurroundingString.matches("(" + FamilyName_Suffix + ")")) {
                        type = "FamilyName";
                        this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\t" + type);
                    } else if (mention3.toLowerCase().matches(".*(" + DomainMotif_Suffix + ")") || SurroundingString.matches("(" + DomainMotif_Suffix + ")")) {
                        type = "DomainMotif";
                        this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\t" + type);
                    } else if (!type.equals("Gene")) {
                        double Num_FDC = 0.0;
                        double Num_Gene = 0.0;
                        if (MentionType2Num.containsKey(mention3.toLowerCase() + "\tFamilyName")) {
                            Num_FDC += (double)((Integer)MentionType2Num.get(mention3.toLowerCase() + "\tFamilyName")).intValue();
                        }
                        if (MentionType2Num.containsKey(mention3.toLowerCase() + "\tDomainMotif")) {
                            Num_FDC += (double)((Integer)MentionType2Num.get(mention3.toLowerCase() + "\tDomainMotif")).intValue();
                        }
                        if (MentionType2Num.containsKey(mention3.toLowerCase() + "\tCell")) {
                            Num_FDC += (double)((Integer)MentionType2Num.get(mention3.toLowerCase() + "\tCell")).intValue();
                        }
                        if (MentionType2Num.containsKey(mention3.toLowerCase() + "\tGene")) {
                            Num_Gene += (double)((Integer)MentionType2Num.get(mention3.toLowerCase() + "\tGene")).intValue();
                        }
                        if (Num_Gene / (Num_FDC + Num_Gene) >= 0.5) {
                            this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\tGene");
                        }
                        for (int p = 0; p < GeneMentionPattern.size(); ++p) {
                            if (!mention3.toLowerCase().matches((String)GeneMentionPattern.get(p))) continue;
                            this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\tGene");
                        }
                    }
                    String lc_ment = mention3.toLowerCase();
                    if (!this.data.getPmidAbb2LF_lc_hash().containsKey(pmid + "\t" + lc_ment) || this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + lc_ment).matches(".*(" + Disease_Suffix + ")") || this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + lc_ment).matches(".*(" + Cell_Suffix + ")")) continue;
                    if (this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + lc_ment).matches(".*(" + FamilyName_Suffix + ")") && !lc_ment.matches(".+[a-z][0-9][a-z]")) {
                        this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\tFamilyName");
                        continue;
                    }
                    if (this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + lc_ment).matches(".*(" + DomainMotif_Suffix + ")")) {
                        this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\tDomainMotif");
                        continue;
                    }
                    if (!Mention2Type_Hash.containsKey(this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + lc_ment)) || !((String)Mention2Type_Hash.get(this.data.getPmidAbb2LF_lc_hash().get(pmid + "\t" + lc_ment))).equals("Gene") || type.equals("Gene")) continue;
                    this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, start3 + "\t" + last + "\t" + mention3 + "\tGene");
                }
            }
            for (j2 = 0; j2 < this.data.getBioCDocobj().Annotations.get(i).size(); ++j2) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j2).size(); ++k) {
                    String[] Anno = this.data.getBioCDocobj().Annotations.get(i).get(j2).get(k).split("\\t");
                    if (!Translate2Family.contains(Anno[2].toLowerCase())) continue;
                    this.data.getBioCDocobj().Annotations.get(i).get(j2).set(k, Anno[0] + "\t" + Anno[1] + "\t" + Anno[2] + "\tFamilyName");
                }
            }
            HashMap<String, String> GeneMentions = new HashMap<String, String>();
            HashMap<CallSite, String> GeneMentionLocationGNR = new HashMap<CallSite, String>();
            for (j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                for (int k = 0; k < this.data.getBioCDocobj().Annotations.get(i).get(j).size(); ++k) {
                    String[] Anno = this.data.getBioCDocobj().Annotations.get(i).get(j).get(k).split("\\t");
                    int start4 = Integer.parseInt(Anno[0]);
                    int last = Integer.parseInt(Anno[1]);
                    String mention4 = Anno[2];
                    String type3 = Anno[3];
                    for (int s = start4; s <= last; ++s) {
                        GeneMentionLocationGNR.put((CallSite)((Object)(j + "\t" + s)), type3);
                    }
                    GeneMentions.put(mention4.toLowerCase(), type3);
                }
            }
            for (j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                if (this.data.getBioCDocobj().PassageContexts.size() <= i || this.data.getBioCDocobj().PassageContexts.get(i).size() <= j) continue;
                String PassageContexts = " " + this.data.getBioCDocobj().PassageContexts.get(i).get(j) + " ";
                PassageContexts_tmp = PassageContexts.toLowerCase();
                for (String gm : GeneMentions.keySet()) {
                    type = (String)GeneMentions.get(gm);
                    if (!type.equals("Gene")) continue;
                    gm = gm.replaceAll("([\\W\\-\\_])", "\\\\$1");
                    gm = gm.replaceAll("[0-9]", "\\[0\\-9\\]");
                    gm = gm.replaceAll("(alpha|beta|gamma|theta|zeta|delta)", "(alpha\\|beta\\|gamma\\|theta\\|zeta\\|delta)");
                    gm = gm.replaceAll("\\-[a-z]$", "\\-\\[a\\-z\\]");
                    ptmp = Pattern.compile("^(.*[\\W\\-\\_])(" + gm + ")([\\W\\-\\_].*)$");
                    Matcher mtmp = ptmp.matcher((CharSequence)PassageContexts_tmp);
                    while (mtmp.find()) {
                        String pre = mtmp.group(1);
                        gmtmp = mtmp.group(2);
                        post = mtmp.group(3);
                        start = pre.length() - 1;
                        int last = start + gmtmp.length();
                        if (PassageContexts.length() <= last) continue;
                        mention = PassageContexts.substring(start + 1, last + 1);
                        if (!GeneMentionLocationGNR.containsKey(j + "\t" + start) && !GeneMentionLocationGNR.containsKey(j + "\t" + last)) {
                            if (this.data.getBioCDocobj().Annotations.get(i).get(j).contains(start + "\t" + last + "\t" + mention + "\tFamilyName")) {
                                this.data.getBioCDocobj().Annotations.get(i).get(j).remove(start + "\t" + last + "\t" + mention + "\tFamilyName");
                            } else if (this.data.getBioCDocobj().Annotations.get(i).get(j).contains(start + "\t" + last + "\t" + mention + "\tDomainMotif")) {
                                this.data.getBioCDocobj().Annotations.get(i).get(j).remove(start + "\t" + last + "\t" + mention + "\tDomainMotif");
                            }
                            this.data.getBioCDocobj().Annotations.get(i).get(j).add(start + "\t" + last + "\t" + mention + "\tGene");
                        }
                        gmtmp = gmtmp.replaceAll(".", "X");
                        PassageContexts_tmp = pre + gmtmp + post;
                        mtmp = ptmp.matcher((CharSequence)PassageContexts_tmp);
                    }
                }
            }
            for (j = 0; j < this.data.getBioCDocobj().Annotations.get(i).size(); ++j) {
                if (this.data.getBioCDocobj().PassageContexts.size() <= i || this.data.getBioCDocobj().PassageContexts.get(i).size() <= j) continue;
                String PassageContexts = " " + this.data.getBioCDocobj().PassageContexts.get(i).get(j) + " ";
                PassageContexts_tmp = PassageContexts.toLowerCase();
                for (String gm : GeneMentions.keySet()) {
                    type = (String)GeneMentions.get(gm);
                    if (!type.matches("(FamilyName|DomainMotif)")) continue;
                    gm = gm.replaceAll("([\\W\\-\\_])", "\\\\$1");
                    gm = gm.replaceAll("s$", "(s\\|)");
                    ptmp = Pattern.compile("^(.*[\\W\\-\\_])(" + gm + ")([\\W\\-\\_].*)$");
                    Matcher mtmp = ptmp.matcher((CharSequence)PassageContexts_tmp);
                    while (mtmp.find()) {
                        String pre = mtmp.group(1);
                        gmtmp = mtmp.group(2);
                        post = mtmp.group(3);
                        start = pre.length() - 1;
                        int last = start + gmtmp.length();
                        if (PassageContexts.length() <= last) continue;
                        mention = PassageContexts.substring(start + 1, last + 1);
                        if (!(mention.isBlank() || GeneMentionLocationGNR.containsKey(j + "\t" + start) || GeneMentionLocationGNR.containsKey(j + "\t" + last) || this.data.getBioCDocobj().Annotations.get(i).get(j).contains(start + "\t" + last + "\t" + mention + "\tGene"))) {
                            this.data.getBioCDocobj().Annotations.get(i).get(j).add(start + "\t" + last + "\t" + mention + "\t" + type);
                        }
                        gmtmp = gmtmp.replaceAll(".", "X");
                        PassageContexts_tmp = pre + gmtmp + post;
                        mtmp = ptmp.matcher((CharSequence)PassageContexts_tmp);
                    }
                }
            }
        }
        this.data.getBioCDocobj().BioCOutput(Filename, FilenameBioC, this.data.getBioCDocobj().Annotations, false, false);
    }
}

