/*
 * Decompiled with CFR 0.152.
 */
package org.allenai.scienceparse;

import com.gs.collections.api.map.primitive.ObjectDoubleMap;
import com.gs.collections.impl.map.mutable.primitive.ObjectDoubleHashMap;
import com.medallia.word2vec.Searcher;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.allenai.datastore.Datastore;
import org.allenai.ml.sequences.crf.CRFPredicateExtractor;
import org.allenai.scienceparse.ExtractedMetadata;
import org.allenai.scienceparse.GazetteerFeatures;
import org.allenai.scienceparse.PDFPredicateExtractor;
import org.allenai.scienceparse.Parser;
import org.allenai.scienceparse.ParserLMFeatures;
import org.allenai.scienceparse.RegexWithTimeout;
import org.allenai.scienceparse.WordVectorCache;

public class ReferencesPredicateExtractor
implements CRFPredicateExtractor<String, String> {
    private ParserLMFeatures lmFeats;
    private final Searcher word2vecSearcher;
    public static final Pattern yearPattern = Pattern.compile("((?:19|20)[0-9][0-9])");
    private GazetteerFeatures gf;

    public ReferencesPredicateExtractor() {
        this(null);
    }

    public ReferencesPredicateExtractor(ParserLMFeatures parserLMFeatures) {
        try {
            Path path = Datastore.apply().filePath("org.allenai.scienceparse", "Word2VecModel.bin", 1);
            this.word2vecSearcher = WordVectorCache.searcherForPath(path);
        }
        catch (IOException iOException) {
            throw new RuntimeException(iOException);
        }
        this.lmFeats = parserLMFeatures;
    }

    public int locationBin(int n, int n2) {
        return 12 * n / n2;
    }

    public int addPunctuationFeatures(String string, ObjectDoubleHashMap<String> objectDoubleHashMap) {
        Pattern pattern = Pattern.compile("\\p{Pi}");
        Pattern pattern2 = Pattern.compile("\\p{Pf}");
        Pattern pattern3 = Pattern.compile("\\p{Pd}\\p{Pd}");
        Pattern pattern4 = Pattern.compile(":$");
        Pattern pattern5 = Pattern.compile(",$");
        Pattern pattern6 = Pattern.compile(";$");
        Pattern pattern7 = Pattern.compile("(\\.$|\\p{Pf}$)");
        Pattern pattern8 = Pattern.compile("\\p{Ps}.*\\p{Pe}");
        int n = 0;
        if (RegexWithTimeout.matcher(pattern, string).find()) {
            objectDoubleHashMap.put((Object)"%pInitialQuote", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern2, string).find()) {
            objectDoubleHashMap.put((Object)"%pEndQuote", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern3, string).find()) {
            objectDoubleHashMap.put((Object)"%pDoubleDash", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern5, string).find()) {
            objectDoubleHashMap.put((Object)"%pComma", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern4, string).find()) {
            objectDoubleHashMap.put((Object)"%pColon", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern6, string).find()) {
            objectDoubleHashMap.put((Object)"%pSemicolon", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern7, string).find()) {
            objectDoubleHashMap.put((Object)"%pEnding", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern8, string).find()) {
            objectDoubleHashMap.put((Object)"%pPairedBraces", 1.0);
            ++n;
        }
        return n;
    }

    public int addNumberFeatures(String string, ObjectDoubleHashMap<String> objectDoubleHashMap) {
        Pattern pattern;
        Pattern pattern2;
        int n = 0;
        Pattern pattern3 = Pattern.compile("[0-9]+-[0-9]+\\p{P}?");
        if (RegexWithTimeout.matcher(pattern3, string).matches()) {
            objectDoubleHashMap.put((Object)"%pRange", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(yearPattern, string).find()) {
            objectDoubleHashMap.put((Object)"%hasYear", 1.0);
        }
        if (RegexWithTimeout.matcher(pattern2 = Pattern.compile("[0-9](\\([0-9]+\\))?\\p{P}?"), string).matches()) {
            objectDoubleHashMap.put((Object)"%pVolume", 1.0);
            ++n;
        }
        if (RegexWithTimeout.matcher(pattern = Pattern.compile("[0-9]+\\p{P}?"), string).matches()) {
            int n2 = Math.min(string.length(), 5);
            String string2 = "%digs" + n2;
            objectDoubleHashMap.put((Object)string2, 1.0);
        }
        if (RegexWithTimeout.matcher(pattern, string).find()) {
            objectDoubleHashMap.put((Object)"%hasDigit", 1.0);
        } else {
            objectDoubleHashMap.put((Object)"%noDigit", 1.0);
        }
        Pattern pattern4 = Pattern.compile("[0-9]+(st|nd|rd|th)\\p{P}?");
        if (RegexWithTimeout.matcher(pattern4, string).find()) {
            objectDoubleHashMap.put((Object)"%ordinal", 1.0);
        }
        return n;
    }

    public static boolean containsEditor(List<String> list) {
        for (String string : list) {
            String string2 = string.replaceAll(",", "");
            if (!string2.equalsIgnoreCase("eds.") && !string2.equalsIgnoreCase("ed.") && !string2.equalsIgnoreCase("editor") && !string2.equalsIgnoreCase("editors")) continue;
            return true;
        }
        return false;
    }

    public static boolean isMonth(String string) {
        List<String> list = Arrays.asList("January", "February", "March", "April", "June", "July", "August", "September", "October", "November", "December", "Jan.", "Feb.", "Mar.", "Apr.", "Jun.", "Jul.", "Aug.", "Sept.", "Oct.", "Nov.", "Dec.");
        return list.contains(string.replaceAll(",", ""));
    }

    public static boolean addTokenFeatures(String string, ObjectDoubleHashMap<String> objectDoubleHashMap) {
        objectDoubleHashMap.put((Object)("%rawnopunct=" + string.toLowerCase().replaceAll("\\p{P}", "")), 1.0);
        String string2 = string.substring(0, Math.min(string.length(), 4));
        String string3 = string.substring(Math.max(string.length() - 4, 0));
        objectDoubleHashMap.put((Object)("%prefix=" + string2), 1.0);
        objectDoubleHashMap.put((Object)("%suffix=" + string3), 1.0);
        return true;
    }

    public void addGazetteerSpan(List<ObjectDoubleMap<String>> list, ExtractedMetadata.LabelSpan labelSpan) {
        if ((Integer)labelSpan.loc.getOne() == (Integer)labelSpan.loc.getTwo() - 1) {
            ((ObjectDoubleHashMap)list.get((Integer)labelSpan.loc.getOne())).put((Object)("%gaz_W_" + labelSpan.tag), 1.0);
        } else {
            ((ObjectDoubleHashMap)list.get((Integer)labelSpan.loc.getOne())).put((Object)("%gaz_B_" + labelSpan.tag), 1.0);
            for (int i = (Integer)labelSpan.loc.getOne() + 1; i < (Integer)labelSpan.loc.getTwo() - 1; ++i) {
                ((ObjectDoubleHashMap)list.get(i)).put((Object)("%gaz_I_" + labelSpan.tag), 1.0);
            }
            ((ObjectDoubleHashMap)list.get((Integer)labelSpan.loc.getTwo() - 1)).put((Object)("%gaz_E_" + labelSpan.tag), 1.0);
        }
    }

    public void addGazetteerPredicates(List<String> list, List<ObjectDoubleMap<String>> list2) {
        if (this.gf != null) {
            for (ExtractedMetadata.LabelSpan labelSpan : this.gf.getSpans(list)) {
                this.addGazetteerSpan(list2, labelSpan);
            }
        }
    }

    public List<ObjectDoubleMap<String>> nodePredicates(List<String> list) {
        ArrayList<ObjectDoubleMap<String>> arrayList = new ArrayList<ObjectDoubleMap<String>>();
        boolean bl = ReferencesPredicateExtractor.containsEditor(list);
        for (int i = 0; i < list.size(); ++i) {
            String string2;
            ObjectDoubleHashMap objectDoubleHashMap = new ObjectDoubleHashMap();
            String string3 = list.get(i);
            PDFPredicateExtractor.getCaseMasks(string3).forEach(string -> objectDoubleHashMap.put(string, 1.0));
            if (PDFPredicateExtractor.isStopWord(string3)) {
                objectDoubleHashMap.put((Object)"%stop", 1.0);
                if (objectDoubleHashMap.containsKey((Object)"%XXX") || objectDoubleHashMap.containsKey((Object)"%Xxx")) {
                    objectDoubleHashMap.put((Object)"%startCapStop", 1.0);
                }
            } else if (objectDoubleHashMap.containsKey((Object)"%xxx")) {
                objectDoubleHashMap.put((Object)"%uncapns", 1.0);
            }
            double d = Math.min((double)string3.length(), 10.0) / 10.0;
            double d2 = (d - 0.5) * (d - 0.5);
            objectDoubleHashMap.put((Object)"%adjLen", d);
            objectDoubleHashMap.put((Object)"%adjLenSq", d2);
            if (this.lmFeats != null) {
                objectDoubleHashMap.put((Object)"%tfreq", PDFPredicateExtractor.smoothFreq(string3, this.lmFeats.titleBow));
                objectDoubleHashMap.put((Object)"%tffreq", PDFPredicateExtractor.smoothFreq(string3, this.lmFeats.titleFirstBow));
                objectDoubleHashMap.put((Object)"%tlfreq", PDFPredicateExtractor.smoothFreq(string3, this.lmFeats.titleLastBow));
                objectDoubleHashMap.put((Object)"%afreq", PDFPredicateExtractor.smoothFreq(Parser.fixupAuthors(string3), this.lmFeats.authorBow));
                objectDoubleHashMap.put((Object)"%affreq", PDFPredicateExtractor.smoothFreq(Parser.fixupAuthors(string3), this.lmFeats.authorFirstBow));
                objectDoubleHashMap.put((Object)"%alfreq", PDFPredicateExtractor.smoothFreq(Parser.fixupAuthors(string3), this.lmFeats.authorLastBow));
                objectDoubleHashMap.put((Object)"%vfreq", PDFPredicateExtractor.smoothFreq(string3, this.lmFeats.venueBow));
                objectDoubleHashMap.put((Object)"%vffreq", PDFPredicateExtractor.smoothFreq(string3, this.lmFeats.venueFirstBow));
                objectDoubleHashMap.put((Object)"%vlfreq", PDFPredicateExtractor.smoothFreq(string3, this.lmFeats.venueLastBow));
                objectDoubleHashMap.put((Object)"%bfreq", PDFPredicateExtractor.smoothFreq(string3, this.lmFeats.backgroundBow));
                objectDoubleHashMap.put((Object)"%bafreq", PDFPredicateExtractor.smoothFreq(Parser.fixupAuthors(string3), this.lmFeats.backgroundBow));
                try {
                    string2 = this.word2vecSearcher.getRawVector(string3).iterator();
                    int n = 0;
                    while (string2.hasNext()) {
                        double d3 = (Double)string2.next();
                        objectDoubleHashMap.put((Object)PDFPredicateExtractor.wordEmbeddingFeatureNames[n], d3);
                        ++n;
                    }
                }
                catch (Searcher.UnknownWordException unknownWordException) {
                    // empty catch block
                }
            }
            string2 = "%locbin" + this.locationBin(i, list.size());
            objectDoubleHashMap.put((Object)string2, 1.0);
            this.addNumberFeatures(string3, (ObjectDoubleHashMap<String>)objectDoubleHashMap);
            if (bl) {
                objectDoubleHashMap.put((Object)"%editor", 1.0);
            }
            ReferencesPredicateExtractor.addTokenFeatures(string3, (ObjectDoubleHashMap<String>)objectDoubleHashMap);
            this.addPunctuationFeatures(string3, (ObjectDoubleHashMap<String>)objectDoubleHashMap);
            arrayList.add((ObjectDoubleMap<String>)objectDoubleHashMap);
        }
        this.addGazetteerPredicates(list, arrayList);
        return arrayList;
    }

    public List<ObjectDoubleMap<String>> edgePredicates(List<String> list) {
        ArrayList<ObjectDoubleMap<String>> arrayList = new ArrayList<ObjectDoubleMap<String>>();
        for (int i = 0; i < list.size() - 1; ++i) {
            ObjectDoubleHashMap objectDoubleHashMap = new ObjectDoubleHashMap();
            objectDoubleHashMap.put((Object)"B", 1.0);
            arrayList.add((ObjectDoubleMap<String>)objectDoubleHashMap);
        }
        return arrayList;
    }

    public void setGf(GazetteerFeatures gazetteerFeatures) {
        this.gf = gazetteerFeatures;
    }
}

