/*
 * Decompiled with CFR 0.152.
 */
package epic.dense;

import breeze.linalg.Counter;
import breeze.linalg.Counter$;
import breeze.storage.Zero;
import epic.dense.Word2VecUtils;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.Serializable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.Predef$;
import scala.collection.GenTraversableOnce;
import scala.collection.IterableLike;
import scala.collection.Iterator;
import scala.collection.MapLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.IndexedSeq$;
import scala.collection.immutable.Range;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuilder;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashMap;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Numeric;
import scala.math.Ordering;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.RichInt$;
import scala.util.Random;

public final class Word2Vec$ {
    public static Word2Vec$ MODULE$;
    private final Pattern hyphenPattern;

    static {
        new Word2Vec$();
    }

    public HashMap<String, float[]> smartLoadVectorsForVocabulary(Seq<String> word2vecPaths, Set<String> voc, Counter<String, Object> vocCounts, int maxVectorLen, boolean inputVectorBias, boolean randomizeUnks) {
        Seq vectorsEachSource = (Seq)word2vecPaths.map((Function1 & Serializable & scala.Serializable)word2vecPath -> {
            if (word2vecPath.endsWith("bin")) {
                return this.readWord2Vec((String)word2vecPath, voc, false);
            }
            if (word2vecPath.endsWith(".txt")) {
                return this.readBansalEmbeddings((String)word2vecPath, voc, false);
            }
            throw new RuntimeException("Unrecognized vectors: " + word2vecPath);
        }, Seq$.MODULE$.canBuildFrom());
        Seq dimsEachSource = (Seq)vectorsEachSource.map((Function1 & Serializable & scala.Serializable)x$1 -> BoxesRunTime.boxToInteger((int)Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$2(x$1)), Seq$.MODULE$.canBuildFrom());
        int finalVectorDim = Math.min(maxVectorLen, BoxesRunTime.unboxToInt((Object)dimsEachSource.sum((Numeric)Numeric.IntIsIntegral$.MODULE$)) + (inputVectorBias ? 1 : 0));
        HashMap finalVectors = new HashMap();
        Random rng = new Random(0);
        Counter mostCommonMisses = Counter$.MODULE$.apply((Zero)Zero.DoubleZero$.MODULE$);
        IntRef numRand = IntRef.create((int)0);
        voc.foreach((Function1 & Serializable & scala.Serializable)word -> {
            void require_requirement;
            boolean bl;
            float[] fArray;
            if (BoxesRunTime.unboxToBoolean((Object)((TraversableOnce)vectorsEachSource.map((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToBoolean((boolean)Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$4(word, x$2)), Seq$.MODULE$.canBuildFrom())).reduce((Function2 & Serializable & scala.Serializable)(x$3, x$4) -> BoxesRunTime.boxToBoolean((boolean)Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$5(BoxesRunTime.unboxToBoolean((Object)x$3), BoxesRunTime.unboxToBoolean((Object)x$4)))))) {
                float[] finalVector = (float[])((TraversableOnce)vectorsEachSource.indices().map((Function1 & Serializable & scala.Serializable)i -> Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$6(vectorsEachSource, dimsEachSource, word, BoxesRunTime.unboxToInt((Object)i)), IndexedSeq$.MODULE$.canBuildFrom())).reduce((Function2 & Serializable & scala.Serializable)(x$5, x$6) -> (float[])new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(x$5)).$plus$plus((GenTraversableOnce)new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(x$6)), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Float())));
                if (inputVectorBias) {
                    finalVector = (float[])new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(finalVector)).$plus$plus((GenTraversableOnce)new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(new float[]{1.0f})), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Float()));
                }
                fArray = finalVector;
            } else {
                mostCommonMisses.update(word, vocCounts.apply(word));
                ++numRand$1.elem;
                if (randomizeUnks) {
                    void tabulate_evidence$14;
                    Array$ array$ = Array$.MODULE$;
                    ClassTag classTag = ClassTag$.MODULE$.Float();
                    if (array$ == null) {
                        throw null;
                    }
                    ArrayBuilder tabulate_b = array$.newBuilder((ClassTag)tabulate_evidence$14);
                    tabulate_b.sizeHint(finalVectorDim);
                    for (int tabulate_i = 0; tabulate_i < finalVectorDim; ++tabulate_i) {
                        tabulate_b.$plus$eq((Object)BoxesRunTime.boxToFloat((float)Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$10(inputVectorBias, finalVectorDim, rng, tabulate_i)));
                    }
                    fArray = (float[])tabulate_b.result();
                } else {
                    void tabulate_evidence$14;
                    Array$ array$ = Array$.MODULE$;
                    ClassTag classTag = ClassTag$.MODULE$.Float();
                    if (array$ == null) {
                        throw null;
                    }
                    ArrayBuilder tabulate_b = array$.newBuilder((ClassTag)tabulate_evidence$14);
                    tabulate_b.sizeHint(finalVectorDim);
                    for (int tabulate_i = 0; tabulate_i < finalVectorDim; ++tabulate_i) {
                        tabulate_b.$plus$eq((Object)BoxesRunTime.boxToFloat((float)Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$11(inputVectorBias, finalVectorDim, tabulate_i)));
                    }
                    fArray = (float[])tabulate_b.result();
                }
            }
            float[] vector = fArray;
            float[] vectorTrimmed = vector.length > finalVectorDim ? (float[])new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(vector)).slice(0, finalVectorDim) : vector;
            boolean bl2 = bl = vectorTrimmed.length == finalVectorDim;
            if (Predef$.MODULE$ == null) {
                throw null;
            }
            if (require_requirement == false) {
                throw new IllegalArgumentException("requirement failed: " + Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$12(finalVectorDim, vector, vectorTrimmed));
            }
            return finalVectors.put(word, (Object)vectorTrimmed);
        });
        Predef$.MODULE$.println((Object)("Read embeddings for " + voc.size() + " words from " + word2vecPaths.size() + " sources, " + "total embedding size = " + finalVectorDim + ", " + numRand.elem + " present in no source"));
        Predef$.MODULE$.println((Object)("Fifty most common misses: " + mostCommonMisses.argtopk(50, (Ordering)Ordering.Double$.MODULE$).map((Function1 & Serializable & scala.Serializable)word -> word + ": " + mostCommonMisses.apply(word), IndexedSeq$.MODULE$.canBuildFrom())));
        return finalVectors;
    }

    public Counter<String, Object> smartLoadVectorsForVocabulary$default$3() {
        return Counter$.MODULE$.apply((Zero)Zero.DoubleZero$.MODULE$);
    }

    public int smartLoadVectorsForVocabulary$default$4() {
        return Integer.MAX_VALUE;
    }

    public boolean smartLoadVectorsForVocabulary$default$6() {
        return true;
    }

    public HashMap<String, float[]> makeRandomVectorsForVocabulary(Set<String> voc, int dim, boolean inputVectorBias) {
        HashMap finalVectors = new HashMap();
        int finalVectorDim = dim + (inputVectorBias ? 1 : 0);
        Random rng = new Random(0);
        voc.foreach((Function1 & Serializable & scala.Serializable)word -> {
            void tabulate_evidence$14;
            Array$ array$ = Array$.MODULE$;
            ClassTag classTag = ClassTag$.MODULE$.Float();
            if (array$ == null) {
                throw null;
            }
            ArrayBuilder tabulate_b = array$.newBuilder((ClassTag)tabulate_evidence$14);
            tabulate_b.sizeHint(finalVectorDim);
            for (int tabulate_i = 0; tabulate_i < finalVectorDim; ++tabulate_i) {
                tabulate_b.$plus$eq((Object)BoxesRunTime.boxToFloat((float)Word2Vec$.$anonfun$makeRandomVectorsForVocabulary$2(inputVectorBias, finalVectorDim, rng, tabulate_i)));
            }
            float[] vec = (float[])tabulate_b.result();
            return finalVectors.put(word, (Object)vec);
        });
        return finalVectors;
    }

    public HashMap<String, float[]> loadVectorsForVocabulary(String word2vecPath, Set<String> voc, boolean inputVectorBias) {
        HashMap<String, float[]> word2vecMap = this.readWord2Vec(word2vecPath, voc, inputVectorBias);
        if (word2vecMap.isEmpty()) {
            throw new RuntimeException("No word2vec vectors loaded");
        }
        return this.augmentVectorsToCompleteVocabulary(word2vecMap, voc, inputVectorBias);
    }

    public HashMap<String, float[]> loadBansalVectorsForVocabulary(String word2vecPath, Set<String> voc, boolean inputVectorBias) {
        HashMap<String, float[]> word2vecMap = this.readBansalEmbeddings(word2vecPath, voc, inputVectorBias);
        if (word2vecMap.isEmpty()) {
            throw new RuntimeException("No Bansal vectors loaded");
        }
        return this.augmentVectorsToCompleteVocabulary(word2vecMap, voc, inputVectorBias);
    }

    private HashMap<String, float[]> augmentVectorsToCompleteVocabulary(HashMap<String, float[]> word2vecMap, Set<String> voc, boolean inputVectorBias) {
        int word2vecDim = ((float[])word2vecMap.values().head()).length;
        Random rng = new Random(0);
        ((IterableLike)voc.$minus$minus((GenTraversableOnce)word2vecMap.keySet())).foreach((Function1 & Serializable & scala.Serializable)unkWord -> {
            void tabulate_evidence$14;
            Array$ array$ = Array$.MODULE$;
            ClassTag classTag = ClassTag$.MODULE$.Float();
            if (array$ == null) {
                throw null;
            }
            ArrayBuilder tabulate_b = array$.newBuilder((ClassTag)tabulate_evidence$14);
            tabulate_b.sizeHint(word2vecDim);
            for (int tabulate_i = 0; tabulate_i < word2vecDim; ++tabulate_i) {
                tabulate_b.$plus$eq((Object)BoxesRunTime.boxToFloat((float)Word2Vec$.$anonfun$augmentVectorsToCompleteVocabulary$2(inputVectorBias, word2vecDim, rng, tabulate_i)));
            }
            return word2vecMap.put(unkWord, tabulate_b.result());
        });
        return word2vecMap;
    }

    public HashMap<String, float[]> readWord2Vec(String word2VecPath, Set<String> words, boolean inputVectorBias) {
        BufferedInputStream bis = new BufferedInputStream(new FileInputStream(word2VecPath));
        DataInputStream dis = new DataInputStream(bis);
        HashMap word2Vec = new HashMap();
        String string = Word2VecUtils.readString(dis);
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        int vocSize = new StringOps(string).toInt();
        String string2 = Word2VecUtils.readString(dis);
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        int dim = new StringOps(string2).toInt();
        int n = 0;
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        Range range = RichInt$.MODULE$.until$extension0(n, vocSize);
        if (range == null) {
            throw null;
        }
        Range foreach_this = range;
        if (!foreach_this.isEmpty()) {
            int foreach_i = foreach_this.start();
            while (true) {
                Word2Vec$.$anonfun$readWord2Vec$1(words, inputVectorBias, dis, word2Vec, dim, foreach_i);
                if (foreach_i == foreach_this.scala$collection$immutable$Range$$lastElement()) break;
                foreach_i += foreach_this.step();
            }
        }
        Predef$.MODULE$.println((Object)("Loaded " + word2Vec.size() + " word2vec representations out of " + words.size() + " attempted words"));
        return word2Vec;
    }

    public Pattern hyphenPattern() {
        return this.hyphenPattern;
    }

    /*
     * WARNING - void declaration
     */
    public String convertWord(String str, boolean lowercase) {
        void var3_3;
        String strRep = str.replace("-LRB-", "(");
        strRep = strRep.replace("-RRB-", ")");
        strRep = strRep.replace("-LSB-", "[");
        strRep = strRep.replace("-RSB-", "]");
        strRep = strRep.replace("-LCB-", "{");
        strRep = strRep.replace("-RCB-", "}");
        strRep = strRep.replaceAll("^-?[0-9,.]{2,15}$", "fifteen");
        Matcher m = this.hyphenPattern().matcher(str);
        String string = strRep = m.find() ? m.group(2) : strRep;
        if (lowercase) {
            strRep = strRep.toLowerCase();
        }
        return var3_3;
    }

    public boolean convertWord$default$2() {
        return false;
    }

    /*
     * WARNING - void declaration
     */
    public HashMap<String, float[]> readBansalEmbeddings(String embeddingsPath, Set<String> words, boolean inputVectorBias) {
        Iterator inFile = Source$.MODULE$.fromFile(new File(embeddingsPath), Codec$.MODULE$.fallbackSystemCodec()).getLines();
        HashMap word2Vec = new HashMap();
        boolean firstLine = true;
        while (inFile.hasNext()) {
            String line = (String)inFile.next();
            if (firstLine) {
                if (new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])line.split("\\s+"))).size() == 2) {
                    Predef$.MODULE$.println((Object)("Skipping first line: " + line));
                } else {
                    Predef$.MODULE$.println((Object)("Not skipping first line: " + line));
                    firstLine = false;
                }
            }
            if (!firstLine) {
                if (line.contains("\t")) {
                    String word = line.substring(0, line.indexOf("\t"));
                    if (words.isEmpty() || words.contains((Object)word)) {
                        int tabulate_n;
                        void tabulate_evidence$14;
                        String[] entries = line.substring(line.indexOf("\t") + 1).split(" ");
                        Array$ array$ = Array$.MODULE$;
                        ClassTag classTag = ClassTag$.MODULE$.Float();
                        int n = inputVectorBias ? new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])entries)).size() + 1 : new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])entries)).size();
                        if (array$ == null) {
                            throw null;
                        }
                        ArrayBuilder tabulate_b = array$.newBuilder((ClassTag)tabulate_evidence$14);
                        tabulate_b.sizeHint(tabulate_n);
                        for (int tabulate_i = 0; tabulate_i < tabulate_n; ++tabulate_i) {
                            tabulate_b.$plus$eq((Object)BoxesRunTime.boxToFloat((float)Word2Vec$.$anonfun$readBansalEmbeddings$1(inputVectorBias, entries, tabulate_i)));
                        }
                        float[] arr = (float[])tabulate_b.result();
                        word2Vec.put((Object)word, (Object)arr);
                    }
                } else {
                    String word = line.substring(0, line.indexOf(" "));
                    if (words.isEmpty() || words.contains((Object)word)) {
                        int tabulate_n;
                        void tabulate_evidence$14;
                        String[] entries = line.substring(line.indexOf(" ") + 1).split(" ");
                        Array$ array$ = Array$.MODULE$;
                        ClassTag classTag = ClassTag$.MODULE$.Float();
                        int n = inputVectorBias ? new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])entries)).size() + 1 : new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])entries)).size();
                        if (array$ == null) {
                            throw null;
                        }
                        ArrayBuilder tabulate_b = array$.newBuilder((ClassTag)tabulate_evidence$14);
                        tabulate_b.sizeHint(tabulate_n);
                        for (int tabulate_i = 0; tabulate_i < tabulate_n; ++tabulate_i) {
                            tabulate_b.$plus$eq((Object)BoxesRunTime.boxToFloat((float)Word2Vec$.$anonfun$readBansalEmbeddings$2(inputVectorBias, entries, tabulate_i)));
                        }
                        float[] arr = (float[])tabulate_b.result();
                        word2Vec.put((Object)word, (Object)arr);
                    }
                }
            }
            firstLine = false;
        }
        Predef$.MODULE$.println((Object)("Loaded " + word2Vec.size() + " Bansal representations out of " + words.size() + " attempted words"));
        return word2Vec;
    }

    public static final /* synthetic */ int $anonfun$smartLoadVectorsForVocabulary$2(HashMap x$1) {
        return ((float[])x$1.values().head()).length;
    }

    public static final /* synthetic */ boolean $anonfun$smartLoadVectorsForVocabulary$4(String word$1, HashMap x$2) {
        return x$2.keySet().contains((Object)word$1);
    }

    public static final /* synthetic */ boolean $anonfun$smartLoadVectorsForVocabulary$5(boolean x$3, boolean x$4) {
        return x$3 || x$4;
    }

    public static final /* synthetic */ float $anonfun$smartLoadVectorsForVocabulary$8(int j) {
        return 0.0f;
    }

    public static final /* synthetic */ float[] $anonfun$smartLoadVectorsForVocabulary$6(Seq vectorsEachSource$1, Seq dimsEachSource$1, String word$1, int i) {
        return (float[])((MapLike)vectorsEachSource$1.apply(i)).getOrElse((Object)word$1, (Function0 & Serializable & scala.Serializable)() -> {
            int tabulate_n;
            void tabulate_evidence$14;
            Array$ array$ = Array$.MODULE$;
            ClassTag classTag = ClassTag$.MODULE$.Float();
            int n = BoxesRunTime.unboxToInt((Object)dimsEachSource$1.apply(i));
            if (array$ == null) {
                throw null;
            }
            ArrayBuilder tabulate_b = array$.newBuilder((ClassTag)tabulate_evidence$14);
            tabulate_b.sizeHint(tabulate_n);
            for (int tabulate_i = 0; tabulate_i < tabulate_n; ++tabulate_i) {
                tabulate_b.$plus$eq((Object)BoxesRunTime.boxToFloat((float)Word2Vec$.$anonfun$smartLoadVectorsForVocabulary$8(tabulate_i)));
            }
            return (float[])tabulate_b.result();
        });
    }

    public static final /* synthetic */ float $anonfun$smartLoadVectorsForVocabulary$10(boolean inputVectorBias$1, int finalVectorDim$1, Random rng$1, int i) {
        if (i == finalVectorDim$1 - 1 && inputVectorBias$1) {
            return 1.0f;
        }
        return (float)((rng$1.nextDouble() - 0.5) * 0.5);
    }

    public static final /* synthetic */ float $anonfun$smartLoadVectorsForVocabulary$11(boolean inputVectorBias$1, int finalVectorDim$1, int i) {
        if (i == finalVectorDim$1 - 1 && inputVectorBias$1) {
            return 1.0f;
        }
        return 0.0f;
    }

    public static final /* synthetic */ String $anonfun$smartLoadVectorsForVocabulary$12(int finalVectorDim$1, float[] vector$1, float[] vectorTrimmed$1) {
        return "Mismatched sizes, expected dimension " + finalVectorDim$1 + " but got " + vector$1.length + " clipped to " + vectorTrimmed$1.length;
    }

    public static final /* synthetic */ float $anonfun$makeRandomVectorsForVocabulary$2(boolean inputVectorBias$4, int finalVectorDim$2, Random rng$2, int i) {
        if (i == finalVectorDim$2 - 1 && inputVectorBias$4) {
            return 1.0f;
        }
        return (float)((rng$2.nextDouble() - 0.5) * 0.5);
    }

    public static final /* synthetic */ float $anonfun$augmentVectorsToCompleteVocabulary$2(boolean inputVectorBias$5, int word2vecDim$1, Random rng$3, int i) {
        if (i == word2vecDim$1 - 1 && inputVectorBias$5) {
            return 1.0f;
        }
        return (float)((rng$3.nextDouble() - 0.5) * 0.5);
    }

    public static final /* synthetic */ Object $anonfun$readWord2Vec$1(Set words$1, boolean inputVectorBias$2, DataInputStream dis$1, HashMap word2Vec$1, int dim$1, int i) {
        if (i % 1000000 == 0) {
            Predef$.MODULE$.println((Object)("On line " + i));
        }
        String word = Word2VecUtils.readString(dis$1);
        float[] vector = new float[inputVectorBias$2 ? dim$1 + 1 : dim$1];
        for (int j = 0; j < dim$1; ++j) {
            vector[j] = Word2VecUtils.readFloat(dis$1);
        }
        if (inputVectorBias$2) {
            vector[j] = 1.0f;
        }
        if (words$1.isEmpty() || words$1.contains((Object)word)) {
            return word2Vec$1.put((Object)word, (Object)vector);
        }
        return BoxedUnit.UNIT;
    }

    public static final /* synthetic */ float $anonfun$readBansalEmbeddings$1(boolean inputVectorBias$3, String[] entries$1, int i) {
        if (inputVectorBias$3 && i == new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])entries$1)).size()) {
            return 1.0f;
        }
        String string = entries$1[i];
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        return new StringOps(string).toFloat();
    }

    public static final /* synthetic */ float $anonfun$readBansalEmbeddings$2(boolean inputVectorBias$3, String[] entries$2, int i) {
        if (inputVectorBias$3 && i == new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])entries$2)).size()) {
            return 1.0f;
        }
        String string = entries$2[i];
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        return new StringOps(string).toFloat();
    }

    private Word2Vec$() {
        MODULE$ = this;
        this.hyphenPattern = Pattern.compile("(\\w+-)+(\\w+)");
    }

    public static final /* synthetic */ Object $anonfun$readWord2Vec$1$adapted(Set words$1, boolean inputVectorBias$2, DataInputStream dis$1, HashMap word2Vec$1, int dim$1, Object i) {
        return Word2Vec$.$anonfun$readWord2Vec$1(words$1, inputVectorBias$2, dis$1, word2Vec$1, dim$1, BoxesRunTime.unboxToInt((Object)i));
    }
}

