/*
 * Decompiled with CFR 0.152.
 */
package org.allenai.scienceparse;

import com.gs.collections.api.tuple.Pair;
import com.gs.collections.impl.set.mutable.primitive.LongHashSet;
import com.gs.collections.impl.tuple.Tuples;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.allenai.scienceparse.ExtractedMetadata;
import org.allenai.scienceparse.StringLongHash;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GazetteerFeatures
implements Serializable {
    private static final Logger log = LoggerFactory.getLogger(GazetteerFeatures.class);
    private static final long serialVersionUID = 1L;
    private LongHashSet[] hashSets;
    private String[] hashNames;
    private static int MAXLENGTH = 7;

    public GazetteerFeatures(String string) throws IOException {
        File[] fileArray = new File(string).listFiles();
        this.hashSets = new LongHashSet[fileArray.length];
        this.hashNames = new String[fileArray.length];
        for (int i = 0; i < fileArray.length; ++i) {
            this.hashSets[i] = this.readGazetteer(fileArray[i]);
            this.hashNames[i] = fileArray[i].getName();
        }
    }

    private String t(String string) {
        return string.toLowerCase().replaceAll("\\p{P}+", " ").replaceAll("  +", " ").trim();
    }

    public static boolean withinLength(String string) {
        int n = 0;
        string = string.trim();
        int n2 = string.indexOf(" ");
        while (n2 >= 0) {
            if (++n == MAXLENGTH) {
                return false;
            }
            n2 = string.indexOf(" ", n2 + 1);
        }
        return true;
    }

    private LongHashSet readGazetteer(File file) throws IOException {
        String string;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), "UTF-8"));
        LongHashSet longHashSet = new LongHashSet();
        while ((string = bufferedReader.readLine()) != null) {
            if (string.startsWith("#") || string.trim().length() == 0 || !GazetteerFeatures.withinLength(string)) continue;
            longHashSet.add(StringLongHash.hash(this.t(string)));
        }
        bufferedReader.close();
        return longHashSet;
    }

    public int size() {
        return this.hashSets.length;
    }

    public int sizeOfSet(int n) {
        return this.hashSets[n].size();
    }

    public boolean inSet(String string, int n) {
        long l = StringLongHash.hash(this.t(string));
        return this.hashSets[n].contains(l);
    }

    public boolean[] inSet(String string) {
        long l = StringLongHash.hash(this.t(string));
        boolean[] blArray = new boolean[this.hashSets.length];
        Arrays.fill(blArray, false);
        for (int i = 0; i < this.hashSets.length; ++i) {
            if (!this.hashSets[i].contains(l)) continue;
            blArray[i] = true;
        }
        return blArray;
    }

    public int gazetteerNumber(String string) {
        for (int i = 0; i <= this.hashSets.length; ++i) {
            if (!string.equals(this.hashNames[i])) continue;
            return i;
        }
        return -1;
    }

    public String getStringSpan(List<String> list, int n, int n2) {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = n; i < n2 + n; ++i) {
            stringBuffer.append(list.get(i) + " ");
        }
        return stringBuffer.toString().trim();
    }

    public List<ExtractedMetadata.LabelSpan> getSpansForGaz(List<String> list, int n) {
        ArrayList<ExtractedMetadata.LabelSpan> arrayList = new ArrayList<ExtractedMetadata.LabelSpan>();
        for (int i = 0; i < list.size(); ++i) {
            for (int j = 0; j < Math.min(MAXLENGTH, list.size() + 1 - i); ++j) {
                String string = this.getStringSpan(list, i, j);
                if (!this.inSet(string, n)) continue;
                ExtractedMetadata.LabelSpan labelSpan = new ExtractedMetadata.LabelSpan(this.hashNames[n], (Pair<Integer, Integer>)Tuples.pair((Object)i, (Object)(i + j)));
                arrayList.add(labelSpan);
            }
        }
        return arrayList;
    }

    public List<ExtractedMetadata.LabelSpan> getSpans(List<String> list) {
        ArrayList<ExtractedMetadata.LabelSpan> arrayList = new ArrayList<ExtractedMetadata.LabelSpan>();
        for (int i = 0; i < this.hashSets.length; ++i) {
            arrayList.addAll(this.getSpansForGaz(list, i));
        }
        return arrayList;
    }
}

