/*
 * Decompiled with CFR 0.152.
 */
package org.allenai.scienceparse;

import com.gs.collections.api.tuple.Pair;
import com.gs.collections.impl.tuple.Tuples;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.allenai.scienceparse.LabeledData;
import org.allenai.scienceparse.PaperToken;
import org.allenai.scienceparse.StringUtils;
import org.allenai.scienceparse.pdfapi.PDFDoc;
import org.allenai.scienceparse.pdfapi.PDFLine;
import org.allenai.scienceparse.pdfapi.PDFPage;
import org.allenai.scienceparse.pdfapi.PDFToken;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PDFToCRFInput {
    private static final Logger log = LoggerFactory.getLogger(PDFToCRFInput.class);

    public static int findPatternEnd(List<String> list, List<Pair<Pattern, Boolean>> list2, int n, int n2) {
        if (list2.size() == n2) {
            return n;
        }
        if (list2.size() == 0) {
            return -1;
        }
        if (list.size() == 0 || n == list.size()) {
            return -1;
        }
        String string = StringUtils.normalize(list.get(n));
        if (((Pattern)list2.get(n2).getOne()).matcher(string).matches()) {
            return PDFToCRFInput.findPatternEnd(list, list2, n + 1, n2 + 1);
        }
        if (((Boolean)list2.get(n2).getTwo()).booleanValue()) {
            return PDFToCRFInput.findPatternEnd(list, list2, n, n2 + 1);
        }
        return -1;
    }

    private static String seqToString(List<String> list) {
        StringBuilder stringBuilder = new StringBuilder(list.size() * 10);
        for (String string : list) {
            stringBuilder.append(string);
            stringBuilder.append(' ');
        }
        if (stringBuilder.length() > 0) {
            stringBuilder.setLength(stringBuilder.length() - 1);
        }
        return stringBuilder.toString();
    }

    private static String patternToString(List<Pair<Pattern, Boolean>> list) {
        StringBuilder stringBuilder = new StringBuilder(list.size() * 10);
        for (Pair<Pattern, Boolean> pair : list) {
            boolean bl = (Boolean)pair.getTwo();
            if (bl) {
                stringBuilder.append('[');
            }
            stringBuilder.append(((Pattern)pair.getOne()).pattern());
            if (bl) {
                stringBuilder.append(']');
            }
            stringBuilder.append(' ');
        }
        stringBuilder.setLength(stringBuilder.length() - 1);
        return stringBuilder.toString();
    }

    public static Pair<Integer, Integer> findPatternSequence(List<String> list, List<Pair<Pattern, Boolean>> list2) {
        if (log.isDebugEnabled()) {
            log.debug("Finding {}\nin {}", (Object)PDFToCRFInput.patternToString(list2), (Object)PDFToCRFInput.seqToString(list));
        }
        for (int i = 0; i < list.size(); ++i) {
            int n = -1;
            n = PDFToCRFInput.findPatternEnd(list, list2, i, 0);
            if (n < 0) continue;
            return Tuples.pair((Object)i, (Object)n);
        }
        return null;
    }

    public static Pair<Integer, Integer> findString(List<String> list, String string) {
        string = StringUtils.normalize(string);
        if (list.size() == 0 || string.length() == 0) {
            return null;
        }
        String[] stringArray = string.split(" ");
        if (stringArray.length == 0) {
            return null;
        }
        int n = 0;
        for (int i = 0; i < list.size(); ++i) {
            String string2 = StringUtils.normalize(list.get(i));
            if (stringArray[n].equalsIgnoreCase(string2)) {
                ++n;
            } else {
                i -= n;
                n = 0;
            }
            if (n != stringArray.length) continue;
            return Tuples.pair((Object)(i + 1 - stringArray.length), (Object)(i + 1));
        }
        return null;
    }

    public static List<Pair<Pattern, Boolean>> authorToPatternOptPair(String string) {
        String string2;
        int n;
        ArrayList<Pair<Pattern, Boolean>> arrayList = new ArrayList<Pair<Pattern, Boolean>>();
        string = string.replace(")", "");
        string = string.replace("(", "");
        string = string.replace("?", "");
        string = string.replace("*", "");
        string = string.replace("+", "");
        string = string.replace("^", "");
        string = StringUtils.normalize(string);
        String[] stringArray = string.split(" ");
        for (int i = 0; i < stringArray.length; ++i) {
            n = 1;
            if (i == 0 || i == stringArray.length - 1) {
                n = 0;
            }
            string2 = "";
            if (i < stringArray.length - 1) {
                string2 = stringArray[i].matches("[A-Z](\\.)?") ? stringArray[i].substring(0, 1) + "(((\\.)?)|([a-z]+))" : (stringArray[i].length() > 1 ? stringArray[i].substring(0, 1) + "(((\\.)?)|(" + stringArray[i].substring(1) + "))" : stringArray[i]);
            } else {
                string2 = stringArray[i];
                string2 = string2 + "((\\W)|[0-9])*";
            }
            try {
                arrayList.add((Pair<Pattern, Boolean>)Tuples.pair((Object)Pattern.compile(string2, 2), (Object)(n != 0)));
                continue;
            }
            catch (Exception exception) {
                log.info("error in author pattern " + string2);
            }
        }
        if (stringArray.length == 2) {
            Object object = (Pair)arrayList.get(1);
            for (n = 2; n < arrayList.size(); n += 1) {
                string2 = (Pair)arrayList.get(n);
                arrayList.set(n, (Pair<Pattern, Boolean>)object);
                object = string2;
            }
            arrayList.add((Pair<Pattern, Boolean>)object);
            arrayList.set(1, (Pair<Pattern, Boolean>)Tuples.pair((Object)Pattern.compile("[A-Z](\\.)?", 2), (Object)true));
        }
        return arrayList;
    }

    public static Pair<Integer, Integer> findAuthor(List<String> list, String string) {
        List<Pair<Pattern, Boolean>> list2 = PDFToCRFInput.authorToPatternOptPair(string);
        return PDFToCRFInput.findPatternSequence(list, list2);
    }

    private static void addLineTokens(List<PaperToken> list, List<PDFLine> list2, int n) {
        int n2 = 0;
        for (PDFLine pDFLine : list2) {
            int n3 = n2++;
            pDFLine.tokens.forEach(pDFToken -> list.add(new PaperToken((PDFToken)pDFToken, n3, n)));
        }
    }

    public static float getY(PDFLine pDFLine, boolean bl) {
        if (bl) {
            return pDFLine.bounds().get(1);
        }
        return pDFLine.bounds().get(3);
    }

    public static float getXGap(PDFToken pDFToken, PDFToken pDFToken2) {
        return PDFToCRFInput.getX(pDFToken2, true) - PDFToCRFInput.getX(pDFToken, false);
    }

    public static float getX(PDFLine pDFLine, boolean bl) {
        if (bl) {
            return pDFLine.bounds().get(0);
        }
        return pDFLine.bounds().get(2);
    }

    public static float getX(PDFToken pDFToken, boolean bl) {
        if (bl) {
            return pDFToken.getBounds().get(0);
        }
        return pDFToken.getBounds().get(2);
    }

    public static float getY(PDFToken pDFToken, boolean bl) {
        if (bl) {
            return pDFToken.getBounds().get(1);
        }
        return pDFToken.getBounds().get(3);
    }

    public static float getH(PDFLine pDFLine) {
        float f = pDFLine.bounds().get(3) - pDFLine.bounds().get(1);
        if (f < 0.0f) {
            log.debug("Negative height? Guessing a height of 5.");
            return 5.0f;
        }
        return f;
    }

    public static List<PaperToken> getSequence(PDFDoc pDFDoc) {
        ArrayList<PaperToken> arrayList = new ArrayList<PaperToken>();
        List<PDFPage> list = pDFDoc.getPages();
        for (int i = 0; i < list.size(); ++i) {
            PDFToCRFInput.addLineTokens(arrayList, list.get((int)i).lines, i);
        }
        return arrayList;
    }

    public static List<PaperToken> padSequence(List<PaperToken> list) {
        ArrayList<PaperToken> arrayList = new ArrayList<PaperToken>();
        arrayList.add(PaperToken.generateStartStopToken());
        arrayList.addAll(list);
        arrayList.add(PaperToken.generateStartStopToken());
        return arrayList;
    }

    public static List<String> padTagSequence(List<String> list) {
        ArrayList<String> arrayList = new ArrayList<String>();
        arrayList.add("<S>");
        arrayList.addAll(list);
        arrayList.add("</S>");
        return arrayList;
    }

    public static boolean findAndLabelWith(String string, List<PaperToken> list, List<Pair<PaperToken, String>> list2, String string2, String string3, boolean bl) {
        Pair<Integer, Integer> pair = null;
        pair = bl ? PDFToCRFInput.findAuthor(PDFToCRFInput.asStringList(list), string2) : PDFToCRFInput.findString(PDFToCRFInput.asStringList(list), string2);
        if (pair == null) {
            log.debug("{}: could not find {} string {} in paper.", new Object[]{string, string3, string2});
            return false;
        }
        if ((Integer)pair.getOne() == (Integer)pair.getTwo() - 1) {
            Pair<PaperToken, String> pair2 = list2.get((Integer)pair.getOne());
            list2.set((Integer)pair.getOne(), (Pair<PaperToken, String>)Tuples.pair((Object)pair2.getOne(), (Object)("W_" + string3)));
        } else {
            for (int i = ((Integer)pair.getOne()).intValue(); i < (Integer)pair.getTwo(); ++i) {
                Pair<PaperToken, String> pair3 = list2.get(i);
                list2.set(i, (Pair<PaperToken, String>)Tuples.pair((Object)pair3.getOne(), (Object)(i == (Integer)pair.getOne() ? "B_" + string3 : (i == (Integer)pair.getTwo() - 1 ? "E_" + string3 : "I_" + string3))));
            }
        }
        return true;
    }

    public static List<Pair<PaperToken, String>> labelMetadata(String string, List<PaperToken> list, LabeledData labeledData) {
        Optional<Collection<String>> optional = labeledData.javaAuthorNames();
        if (!optional.isPresent()) {
            return null;
        }
        Collection<String> collection = optional.get();
        Optional<String> optional2 = labeledData.javaTitle();
        if (!optional2.isPresent()) {
            return null;
        }
        String string3 = optional2.get();
        ArrayList<Pair<PaperToken, String>> arrayList = new ArrayList<Pair<PaperToken, String>>();
        for (PaperToken paperToken : list) {
            arrayList.add(Tuples.pair((Object)paperToken, (Object)"O"));
        }
        collection.forEach(string2 -> PDFToCRFInput.findAndLabelWith(string, list, arrayList, string2, "A", true));
        if (!PDFToCRFInput.findAndLabelWith(string, list, arrayList, string3, "T", false)) {
            return null;
        }
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(Tuples.pair((Object)PaperToken.generateStartStopToken(), (Object)"<S>"));
        arrayList2.addAll(arrayList);
        arrayList2.add(Tuples.pair((Object)PaperToken.generateStartStopToken(), (Object)"</S>"));
        return arrayList2;
    }

    public static List<String> asStringList(List<PaperToken> list) {
        return list.stream().map(paperToken -> paperToken.getPdfToken().token).collect(Collectors.toList());
    }

    public static String stringAt(List<PaperToken> list, Pair<Integer, Integer> pair) {
        List<PaperToken> list2 = list.subList((Integer)pair.getOne(), (Integer)pair.getTwo());
        List<String> list3 = list2.stream().map(paperToken -> paperToken.getLine() == -1 ? "<S>" : paperToken.getPdfToken().token).collect(Collectors.toList());
        return PDFToCRFInput.appendStringList(list3).trim();
    }

    public static String stringAtForStringList(List<String> list, Pair<Integer, Integer> pair) {
        List<String> list2 = list.subList((Integer)pair.getOne(), (Integer)pair.getTwo());
        return PDFToCRFInput.appendStringList(list2).trim();
    }

    public static String appendStringList(List<String> list) {
        StringBuilder stringBuilder = new StringBuilder();
        for (String string : list) {
            stringBuilder.append(string);
            stringBuilder.append(" ");
        }
        return stringBuilder.toString();
    }

    public static String getLabelString(List<Pair<PaperToken, String>> list) {
        return list.stream().map(Pair::getTwo).collect(Collectors.toList()).toString();
    }
}

