/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.gene.candidateretrieval.scoring;

import de.julielab.geneexpbase.CandidateFilter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MaxEntScorerPairExtractor {
    private static final Logger LOGGER = LoggerFactory.getLogger(MaxEntScorerPairExtractor.class);
    private static final int RELATED_PAIRS = 10;
    private static final double OVERLAP_RATIO = 0.5;
    private static final int MAXSYN_LENGTH = 5;

    public ArrayList<String[]> getPairs(File trueList, File completeList, int amountTrue, float ratioFalse) {
        ArrayList<String[]> pairs = new ArrayList<String[]>();
        try {
            String text;
            BufferedReader fileIn = new BufferedReader(new FileReader(trueList));
            LOGGER.debug("getPairs() - getting positive training examples ...");
            for (int counter = 1; (text = fileIn.readLine()) != null && counter <= amountTrue; ++counter) {
                pairs.add(text.split("\t"));
            }
            fileIn.close();
            LOGGER.debug("getPairs() - number of positive training examples read: " + pairs.size());
            if (amountTrue != pairs.size()) {
                LOGGER.debug("getPairs() - number of positive training examples: " + amountTrue);
                amountTrue = pairs.size();
                LOGGER.warn("getPairs() - Only " + amountTrue + " entries available!");
            }
            int fillUp = (int)((float)amountTrue * ratioFalse);
            ArrayList<String[]> restPairs = this.findFalse(completeList, fillUp);
            for (int i = 0; i < restPairs.size(); ++i) {
                pairs.add(restPairs.get(i));
            }
            LOGGER.debug("getPairs() - overall number of training examples: " + pairs.size());
        }
        catch (IOException io) {
            io.printStackTrace();
        }
        return pairs;
    }

    public void showPairs(ArrayList<String[]> pairs) {
        LOGGER.debug("all pairs: ");
        for (int i = 0; i < pairs.size(); ++i) {
            StringBuffer pair = new StringBuffer();
            String[] l = pairs.get(i);
            for (int j = l.length - 1; j > -1; --j) {
                pair.append(l[j] + " \t");
            }
            System.out.println(pair);
        }
    }

    public void storePairs(ArrayList<String[]> pairs, File filename) throws IOException {
        LOGGER.debug("storePairs()");
        FileWriter fw = new FileWriter(filename);
        for (int i = 0; i < pairs.size(); ++i) {
            StringBuffer pair = new StringBuffer();
            String[] l = pairs.get(i);
            for (int j = 0; j < l.length; ++j) {
                pair.append(l[j] + "\t");
            }
            fw.write(pair + "\n");
        }
        fw.close();
    }

    ArrayList<String[]> findFalse(File entities, int amount) {
        ArrayList<String[]> pairs = new ArrayList<String[]>();
        LOGGER.debug("findFalse() - getting negative training examples ...");
        try {
            String text;
            BufferedReader fileIn = new BufferedReader(new FileReader(entities));
            ArrayList<String[]> rows = new ArrayList<String[]>();
            while ((text = fileIn.readLine()) != null) {
                rows.add(text.split("\t"));
            }
            LOGGER.debug("rows: " + rows.size());
            fileIn.close();
            LOGGER.debug("findFalse() - number of negative training examples: " + amount);
            int counter = 0;
            long time = System.currentTimeMillis();
            block3: for (int i = 0; i < rows.size() && counter < amount; ++i) {
                String first = ((String[])rows.get(i))[0];
                int internalCounter = 0;
                for (int j = i + 1; j < rows.size() && counter < amount; ++j) {
                    String second;
                    if (!((String[])rows.get(i))[1].equals(((String[])rows.get(j))[1]) && this.addPair(first, second = ((String[])rows.get(j))[0], 0.5, 5)) {
                        pairs.add(new String[]{first, second, "FALSE"});
                        ++internalCounter;
                        if (++counter % 50 == 0) {
                            System.out.println("made " + counter + "/" + amount);
                        }
                    }
                    if (internalCounter >= 10) continue block3;
                }
            }
            long time2 = System.currentTimeMillis();
            LOGGER.debug("findFalse() - result: duration: " + (time2 - time) + " ms; pairs: " + pairs.size() + "; counter: " + counter);
        }
        catch (IOException io) {
            io.printStackTrace();
        }
        return pairs;
    }

    void makeFalseList(File entities, File storeList) {
        ArrayList pairs = new ArrayList();
        LOGGER.debug("findFalse() - getting negative training examples ...");
        try {
            String text;
            LOGGER.debug("reading complete list...");
            BufferedReader fileIn = new BufferedReader(new FileReader(entities));
            ArrayList<String[]> rows = new ArrayList<String[]>();
            while ((text = fileIn.readLine()) != null) {
                String[] values = text.split("\t");
                if (values[1].split(" ").length > 5) continue;
                rows.add(text.split("\t"));
            }
            if (storeList.isFile()) {
                storeList.delete();
            }
            LOGGER.debug("starting to make pairs for rows: " + rows.size());
            fileIn.close();
            long time = System.currentTimeMillis();
            for (int i = 0; i < rows.size(); ++i) {
                FileWriter out = new FileWriter(storeList, true);
                if (i % 10 == 0) {
                    System.out.println(i + "/" + rows.size());
                }
                String first = ((String[])rows.get(i))[1];
                for (int j = i + 1; j < rows.size(); ++j) {
                    String second;
                    if (((String[])rows.get(i))[0].equals(((String[])rows.get(j))[0]) || !this.addPairSpecialRules(first, second = ((String[])rows.get(j))[1], 0.5, 5)) continue;
                    out.write(first + "\t" + second + "\tFALSE\n");
                }
                out.close();
            }
            long time2 = System.currentTimeMillis();
            LOGGER.debug("findFalse() - result: duration: " + (time2 - time) + " ms");
        }
        catch (IOException io) {
            io.printStackTrace();
        }
    }

    public boolean addPair(String first, String second) {
        String[] secondArray;
        if (first.equals(second)) {
            return false;
        }
        String[] firstArray = first.split(" ");
        TreeSet intersection = CandidateFilter.getCommonWords((String[])firstArray, (String[])(secondArray = second.split(" ")));
        return intersection.size() >= 1;
    }

    public boolean addPair(String first, String second, double overlapRatio, int maxSynLength) {
        double termOverlap = CandidateFilter.getOverlapRatio((String)first, (String)second);
        String[] firstArray = first.split(" ");
        String[] secondArray = second.split(" ");
        int firstLength = firstArray.length;
        int secondLength = secondArray.length;
        return termOverlap >= overlapRatio && firstLength <= maxSynLength && secondLength <= maxSynLength;
    }

    public boolean addPairSpecialRules(String first, String second, double overlapRatio, int maxSynLength) {
        double termOverlap = CandidateFilter.getOverlapRatio((String)first, (String)second);
        String[] firstArray = first.split(" ");
        String[] secondArray = second.split(" ");
        int firstLength = firstArray.length;
        int secondLength = secondArray.length;
        if (termOverlap >= overlapRatio) {
            TreeSet common = CandidateFilter.getCommonWords((String[])firstArray, (String[])secondArray);
            boolean onlyNumCharCommon = true;
            for (String element : common) {
                if (element.matches("([0-9]+|[a-z])")) continue;
                onlyNumCharCommon = false;
            }
            TreeSet different = CandidateFilter.getDifferentWords((String[])firstArray, (String[])secondArray);
            boolean onlyNumCharDifferent = true;
            for (String element : different) {
                if (element.matches("([0-9]+|[a-z])")) continue;
                onlyNumCharDifferent = false;
            }
            return !onlyNumCharCommon && !onlyNumCharDifferent;
        }
        return false;
    }

    void makeTrueList(File completeList, File storeList) {
        LOGGER.debug("makeTrueList() - started ...");
        try {
            String text;
            BufferedReader fileIn = new BufferedReader(new FileReader(completeList));
            ArrayList<String[]> rows = new ArrayList<String[]>();
            while ((text = fileIn.readLine()) != null) {
                rows.add(text.split("\t"));
            }
            fileIn.close();
            FileWriter outTrue = new FileWriter(storeList);
            long time = System.currentTimeMillis();
            for (int i = 0; i < rows.size(); ++i) {
                String first = ((String[])rows.get(i))[0];
                for (int j = i + 1; j < rows.size() && ((String[])rows.get(i))[1].equals(((String[])rows.get(j))[1]); ++j) {
                    String second = ((String[])rows.get(j))[0];
                    if (!this.addPair(first, second, 0.5, 5)) continue;
                    outTrue.write(first + "\t" + second + "\tTRUE\n");
                }
            }
            long time2 = System.currentTimeMillis();
            outTrue.flush();
            outTrue.close();
            LOGGER.debug("makeTrueList() - took: " + (time2 - time));
        }
        catch (IOException io) {
            io.printStackTrace();
        }
    }

    ArrayList<String[]> readList(File listFile) {
        ArrayList<String[]> list = new ArrayList<String[]>();
        try {
            String text;
            BufferedReader fileIn = new BufferedReader(new FileReader(listFile));
            while ((text = fileIn.readLine()) != null) {
                list.add(text.split("\t"));
            }
            fileIn.close();
        }
        catch (IOException io) {
            io.printStackTrace();
        }
        return list;
    }

    public String[][] compareStrings(String S1, String S2) {
        int j;
        boolean equal;
        int i;
        String[] split1 = S1.split(" ");
        String[] split2 = S2.split(" ");
        TreeSet<String> sameWords = new TreeSet<String>();
        TreeSet<String> diffWords = new TreeSet<String>();
        for (i = 0; i < split1.length; ++i) {
            equal = false;
            for (j = 0; j < split2.length; ++j) {
                if (!split1[i].equals(split2[j])) continue;
                sameWords.add(split1[i]);
                equal = true;
            }
            if (equal) continue;
            diffWords.add(split1[i]);
        }
        for (i = 0; i < split2.length; ++i) {
            equal = false;
            for (j = 0; j < split1.length; ++j) {
                if (!split2[i].equals(split1[j])) continue;
                equal = true;
            }
            if (equal) continue;
            diffWords.add(split2[i]);
        }
        return new String[][]{sameWords.toArray(new String[0]), diffWords.toArray(new String[0])};
    }
}

