/*
 * Decompiled with CFR 0.152.
 */
package org.allenai.scienceparse;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.gs.collections.api.tuple.Pair;
import com.gs.collections.impl.tuple.Tuples;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.allenai.ml.sequences.crf.CRFModel;
import org.allenai.scienceparse.BibRecord;
import org.allenai.scienceparse.ExtractReferences;
import org.allenai.scienceparse.ExtractedMetadata;
import org.allenai.scienceparse.PDFToCRFInput;
import org.allenai.scienceparse.Parser;
import org.allenai.scienceparse.ReferencesPredicateExtractor;
import org.allenai.scienceparse.RegexWithTimeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CRFBibRecordParser
implements ExtractReferences.BibRecordParser {
    private static final Logger log = LoggerFactory.getLogger(CRFBibRecordParser.class);
    private CRFModel<String, String, String> model;
    public static final String DATA_VERSION = "0.1";
    private final LoadingCache<ArrayList<String>, List<String>> bestGuessCache = CacheBuilder.newBuilder().maximumSize(10240L).expireAfterAccess(10L, TimeUnit.MINUTES).build((CacheLoader)new CacheLoader<ArrayList<String>, List<String>>(){

        public List<String> load(ArrayList<String> arrayList) throws Exception {
            return CRFBibRecordParser.this.model.bestGuess(arrayList);
        }
    });

    public CRFBibRecordParser(CRFModel<String, String, String> cRFModel) {
        this.model = cRFModel;
    }

    public static List<Pair<String, String>> getLabeledLineUMass(String string) {
        String[] stringArray = new String[]{"address", "authors", "booktitle", "editor", "institution", "journal", "pages", "publisher", "tech", "thesis", "title", "volume", "year"};
        String[] stringArray2 = new String[]{"address", "A", "V", "editor", "institution", "V", "pages", "publisher", "tech", "V", "T", "volume", "Y"};
        string.replaceAll("<ref-marker>.*</ref-marker>", "");
        return CRFBibRecordParser.labelAccordingToTags(string, stringArray, stringArray2);
    }

    public static List<Pair<String, String>> labelAccordingToTags(String string, String[] stringArray, String[] stringArray2) {
        List<String> list = CRFBibRecordParser.tokenize(string);
        ArrayList<Pair<String, String>> arrayList = new ArrayList<Pair<String, String>>();
        arrayList.add(Tuples.pair((Object)"<S>", (Object)"<S>"));
        boolean bl = false;
        int n = -1;
        for (int i = 0; i < list.size(); ++i) {
            String string2;
            String string3 = list.get(i);
            if (string3.endsWith(">")) {
                string2 = string3.replaceAll("<", "").replaceAll("/", "").replaceAll(">", "");
                int n2 = Arrays.binarySearch(stringArray, string2);
                if (n2 < 0) continue;
                if (string3.startsWith("</")) {
                    n = -1;
                    continue;
                }
                n = n2;
                bl = true;
                continue;
            }
            string2 = "O";
            if (n >= 0) {
                string2 = stringArray2[n];
                if (i < list.size() - 1) {
                    string2 = list.get(i + 1).equals("</" + stringArray[n] + ">") ? (bl ? "W_" + string2 : "E_" + string2) : (bl ? "B_" + string2 : "I_" + string2);
                }
            }
            arrayList.add((Pair<String, String>)Tuples.pair((Object)string3, (Object)string2));
            bl = false;
        }
        arrayList.add(Tuples.pair((Object)"</S>", (Object)"</S>"));
        return arrayList;
    }

    public static List<Pair<String, String>> getLabeledLineCora(String string) {
        String[] stringArray = new String[]{"author", "booktitle", "date", "editor", "institution", "journal", "location", "note", "pages", "publisher", "tech", "title", "volume"};
        String[] stringArray2 = new String[]{"A", "V", "Y", "editor", "institution", "V", "location", "note", "pages", "publisher", "V", "T", "volume"};
        return CRFBibRecordParser.labelAccordingToTags(string, stringArray, stringArray2);
    }

    public static List<Pair<String, String>> getLabeledLineKermit(String string) {
        String[] stringArray = new String[]{"author", "booktitle", "date", "editor", "note", "pages", "publisher", "pubPlace", "title", "volume"};
        String[] stringArray2 = new String[]{"A", "V", "Y", "editor", "note", "pages", "publisher", "location", "T", "volume"};
        string = string.replaceAll("<title level=\"j\"> ([^<]+) </title>", "<booktitle> $1 </booktitle>");
        string = string.replaceAll("<title level=\"m\"> ([^<]+) </title>", "<booktitle> $1 </booktitle>");
        string = string.replaceAll("<title level=\"a\"> ([^<]+) </title>", "<title> $1 </title>");
        string = string.replaceAll("biblScope type=\"vol\" ([^<]+) </biblScope>", "<vol> $1 </vol>");
        string = string.replaceAll("biblScope type=\"pp\" ([^<]+) </biblScope>", "<pages> $1 </pages>");
        string = string.replaceAll("biblScope type=\"issue\" ([^<]+) </biblScope>", "<issue> $1 </issue>");
        return CRFBibRecordParser.labelAccordingToTags(string, stringArray, stringArray2);
    }

    public static List<List<Pair<String, String>>> labelFromCoraFile(File file) throws IOException {
        return CRFBibRecordParser.labelFromFile(file, string -> CRFBibRecordParser.getLabeledLineCora(string));
    }

    public static List<List<Pair<String, String>>> labelFromUMassFile(File file) throws IOException {
        return CRFBibRecordParser.labelFromFile(file, string -> CRFBibRecordParser.getLabeledLineUMass(string));
    }

    public static List<List<Pair<String, String>>> labelFromKermitFile(File file) throws IOException {
        return CRFBibRecordParser.labelFromFile(file, string -> CRFBibRecordParser.getLabeledLineKermit(string));
    }

    public static List<List<Pair<String, String>>> labelFromFile(File file, Function<String, List<Pair<String, String>>> function) throws IOException {
        String string;
        ArrayList<List<Pair<String, String>>> arrayList = new ArrayList<List<Pair<String, String>>>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), "UTF-8"));
        while ((string = bufferedReader.readLine()) != null) {
            List<Pair<String, String>> list = function.apply(string);
            arrayList.add(list);
        }
        bufferedReader.close();
        return arrayList;
    }

    private static List<String> tokenize(String string) {
        String[] stringArray = string.split(" ");
        return Arrays.asList(stringArray);
    }

    @Override
    public BibRecord parseRecord(String string) {
        Object object;
        List<String> list;
        if (Thread.interrupted()) {
            throw new Parser.ParsingTimeout();
        }
        if ((string = string.trim()).isEmpty() || string.length() > 2000) {
            return null;
        }
        Matcher matcher = RegexWithTimeout.matcher(ExtractReferences.pBracket, string);
        String string2 = null;
        String string3 = null;
        if (matcher.matches()) {
            string3 = string2 = matcher.group(1);
            string = matcher.group(2);
        } else {
            matcher = RegexWithTimeout.matcher(ExtractReferences.pDot, string);
            if (matcher.matches()) {
                string3 = string2 = matcher.group(1);
                string = matcher.group(2);
            }
        }
        string = string.trim();
        if (string.isEmpty()) {
            return null;
        }
        ArrayList<String> arrayList = new ArrayList<String>();
        arrayList.add("<S>");
        arrayList.addAll(CRFBibRecordParser.tokenize(string));
        arrayList.add("</S>");
        try {
            list = (List<String>)this.bestGuessCache.get(arrayList);
        }
        catch (Exception exception) {
            return null;
        }
        list = PDFToCRFInput.padTagSequence(list);
        List<ExtractedMetadata.LabelSpan> list2 = ExtractedMetadata.getSpans(list);
        String string4 = null;
        String string5 = null;
        String string6 = null;
        String string7 = null;
        for (ExtractedMetadata.LabelSpan labelSpan : list2) {
            if (string4 == null && labelSpan.tag.equals("T")) {
                string4 = PDFToCRFInput.stringAtForStringList(arrayList, labelSpan.loc);
                continue;
            }
            if (string5 == null && labelSpan.tag.equals("A")) {
                string5 = PDFToCRFInput.stringAtForStringList(arrayList, labelSpan.loc);
                continue;
            }
            if (string6 == null && labelSpan.tag.equals("V")) {
                string6 = PDFToCRFInput.stringAtForStringList(arrayList, labelSpan.loc);
                continue;
            }
            if (string7 != null || !labelSpan.tag.equals("Y")) continue;
            string7 = PDFToCRFInput.stringAtForStringList(arrayList, labelSpan.loc);
        }
        List<String> list3 = string5 == null ? null : ExtractReferences.authorStringToList(string5);
        int n = -1;
        if (string7 == null) {
            object = RegexWithTimeout.matcher(ReferencesPredicateExtractor.yearPattern, string);
            while (((Matcher)object).find()) {
                string7 = ((Matcher)object).group(1);
            }
        }
        if (string7 != null) {
            n = ExtractReferences.extractRefYear(string7);
        }
        if (string2 == null && string7 != null) {
            string3 = ExtractReferences.getCiteAuthorFromAuthors(list3);
            string2 = string3 + ",? " + (n > 0 ? Pattern.quote(n + "") : "");
        }
        if (string2 == null || string3 == null || string4 == null || list3 == null || string7 == null) {
            return null;
        }
        object = null;
        try {
            object = new BibRecord(CRFBibRecordParser.cleanTitle(string4), Parser.trimAuthors(list3), Parser.cleanTitle(string6), Pattern.compile(string2), Pattern.compile(string3), n);
        }
        catch (NumberFormatException numberFormatException) {
            return null;
        }
        if (n == 0) {
            return null;
        }
        return object;
    }

    public static String cleanTitle(String string) {
        return string.replaceAll("^\\p{Pi}", "").replaceAll("(\\p{Pe}|,|\\.)$", "");
    }
}

