/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.odkl.texts;

import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.odkl.texts.OdklCountVectorizerModel;
import org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams;
import org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams$class;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.Params;
import org.apache.spark.ml.util.Identifiable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.util.collection.OpenHashMap;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.Map;
import scala.collection.Seq;
import scala.collection.mutable.Queue;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

@ScalaSignature(bytes="\u0006\u0001M3A!\u0001\u0002\u0001\u001f\t\u0019r\nZ6m\u0007>,h\u000e\u001e,fGR|'/\u001b>fe*\u00111\u0001B\u0001\u0006i\u0016DHo\u001d\u0006\u0003\u000b\u0019\tAa\u001c3lY*\u0011q\u0001C\u0001\u0003[2T!!\u0003\u0006\u0002\u000bM\u0004\u0018M]6\u000b\u0005-a\u0011AB1qC\u000eDWMC\u0001\u000e\u0003\ry'oZ\u0002\u0001'\r\u0001\u0001C\u0006\t\u0003#Qi\u0011A\u0005\u0006\u0003'\u0019\tqAZ3biV\u0014X-\u0003\u0002\u0016%\ty1i\\;oiZ+7\r^8sSj,'\u000f\u0005\u0002\u001815\t!!\u0003\u0002\u001a\u0005\tIr\nZ6m\u0007>,h\u000e\u001e,fGR|'/\u001b>feB\u000b'/Y7t\u0011!Y\u0002A!b\u0001\n\u0003b\u0012aA;jIV\tQ\u0004\u0005\u0002\u001fI9\u0011qDI\u0007\u0002A)\t\u0011%A\u0003tG\u0006d\u0017-\u0003\u0002$A\u00051\u0001K]3eK\u001aL!!\n\u0014\u0003\rM#(/\u001b8h\u0015\t\u0019\u0003\u0005C\u0005)\u0001\t\u0005\t\u0015!\u0003\u001eS\u0005!Q/\u001b3!\u0013\tYB\u0003C\u0003,\u0001\u0011\u0005A&\u0001\u0004=S:LGO\u0010\u000b\u0003[9\u0002\"a\u0006\u0001\t\u000bmQ\u0003\u0019A\u000f\t\u000b-\u0002A\u0011\u0001\u0019\u0015\u00035BqA\r\u0001C\u0002\u0013\u00051'A\nj]\",'/\u001b;fIZ{7-\u00192vY\u0006\u0014\u00180F\u00015!\r)\u0004HO\u0007\u0002m)\u0011qGB\u0001\u0006a\u0006\u0014\u0018-\\\u0005\u0003sY\u0012Q\u0001U1sC6\u0004Ba\u000f \u001e\u00016\tAH\u0003\u0002>A\u0005Q1m\u001c7mK\u000e$\u0018n\u001c8\n\u0005}b$aA'baB\u0011q$Q\u0005\u0003\u0005\u0002\u00121!\u00138u\u0011\u0019!\u0005\u0001)A\u0005i\u0005!\u0012N\u001c5fe&$X\r\u001a,pG\u0006\u0014W\u000f\\1ss\u0002BQA\u0012\u0001\u0005B\u001d\u000b1AZ5u)\tA5\n\u0005\u0002\u0012\u0013&\u0011!J\u0005\u0002\u0015\u0007>,h\u000e\u001e,fGR|'/\u001b>fe6{G-\u001a7\t\u000b1+\u0005\u0019A'\u0002\u000f\u0011\fG/Y:fiB\u0011a*U\u0007\u0002\u001f*\u0011\u0001\u000bC\u0001\u0004gFd\u0017B\u0001*P\u0005%!\u0015\r^1Ge\u0006lW\r")
public class OdklCountVectorizer
extends CountVectorizer
implements OdklCountVectorizerParams {
    private final Param<Map<String, Object>> inheritedVocabulary;
    private final Param<String> vocabAttrGroupName;
    private final BooleanParam storeVocabInMetadata;

    @Override
    public Param<String> vocabAttrGroupName() {
        return this.vocabAttrGroupName;
    }

    @Override
    public BooleanParam storeVocabInMetadata() {
        return this.storeVocabInMetadata;
    }

    @Override
    public void org$apache$spark$ml$odkl$texts$OdklCountVectorizerParams$_setter_$vocabAttrGroupName_$eq(Param x$1) {
        this.vocabAttrGroupName = x$1;
    }

    @Override
    public void org$apache$spark$ml$odkl$texts$OdklCountVectorizerParams$_setter_$storeVocabInMetadata_$eq(BooleanParam x$1) {
        this.storeVocabInMetadata = x$1;
    }

    @Override
    public String getVocabAttrGroupName() {
        return OdklCountVectorizerParams$class.getVocabAttrGroupName(this);
    }

    @Override
    public OdklCountVectorizerParams setVocabAttrGroupName(String value) {
        return OdklCountVectorizerParams$class.setVocabAttrGroupName(this, value);
    }

    @Override
    public boolean getStoreVocabInMetadata() {
        return OdklCountVectorizerParams$class.getStoreVocabInMetadata(this);
    }

    @Override
    public OdklCountVectorizerParams setStoreVocabInMetadata(boolean value) {
        return OdklCountVectorizerParams$class.setStoreVocabInMetadata(this, value);
    }

    public String uid() {
        return super.uid();
    }

    public Param<Map<String, Object>> inheritedVocabulary() {
        return this.inheritedVocabulary;
    }

    public CountVectorizerModel fit(DataFrame dataset) {
        String[] stringArray;
        this.transformSchema(dataset.schema(), true);
        int vocSize = BoxesRunTime.unboxToInt((Object)this.$((Param)this.vocabSize()));
        RDD input = dataset.select((String)this.$(this.inputCol()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Seq<String> apply(Row x$1) {
                return (Seq)x$1.getAs(0);
            }
        }, ClassTag$.MODULE$.apply(Seq.class));
        double minDf = BoxesRunTime.unboxToDouble((Object)this.$((Param)this.minDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$((Param)this.minDF())) : BoxesRunTime.unboxToDouble((Object)this.$((Param)this.minDF())) * (double)input.count();
        RDD wordCounts = RDD$.MODULE$.rddToPairRDDFunctions(input.flatMap((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Iterable<Tuple2<String, Tuple2<Object, Object>>> apply(Seq<String> x0$1) {
                Seq<String> seq = x0$1;
                OpenHashMap.mcJ.sp wc = new OpenHashMap.mcJ.sp(ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Long());
                seq.foreach((Function1)new Serializable(this, (OpenHashMap)wc){
                    public static final long serialVersionUID = 0L;
                    private final OpenHashMap wc$1;

                    public final long apply(String w) {
                        return this.wc$1.changeValue$mcJ$sp((Object)w, (Function0)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply() {
                                return this.apply$mcJ$sp();
                            }

                            public long apply$mcJ$sp() {
                                return 1L;
                            }
                        }, (Function1)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply(long x$2) {
                                return this.apply$mcJJ$sp(x$2);
                            }

                            public long apply$mcJJ$sp(long x$2) {
                                return x$2 + 1L;
                            }
                        });
                    }
                    {
                        this.wc$1 = wc$1;
                    }
                });
                Iterable iterable = (Iterable)wc.map((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final Tuple2<String, Tuple2<Object, Object>> apply(Tuple2<String, Object> x0$2) {
                        Tuple2<String, Object> tuple2 = x0$2;
                        if (tuple2 != null) {
                            String word = (String)tuple2._1();
                            long count = tuple2._2$mcJ$sp();
                            Tuple2 tuple22 = new Tuple2((Object)word, (Object)new Tuple2.mcJI.sp(count, 1));
                            return tuple22;
                        }
                        throw new MatchError(tuple2);
                    }
                }, Iterable$.MODULE$.canBuildFrom());
                return iterable;
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), (Ordering)Ordering.String$.MODULE$).reduceByKey((Function2)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<Object, Object> apply(Tuple2<Object, Object> x0$3, Tuple2<Object, Object> x1$1) {
                Tuple2 tuple2 = new Tuple2(x0$3, x1$1);
                if (tuple2 != null) {
                    Tuple2 tuple22 = (Tuple2)tuple2._1();
                    Tuple2 tuple23 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long wc1 = tuple22._1$mcJ$sp();
                        int df1 = tuple22._2$mcI$sp();
                        if (tuple23 != null) {
                            long wc2 = tuple23._1$mcJ$sp();
                            int df2 = tuple23._2$mcI$sp();
                            Tuple2.mcJI.sp sp2 = new Tuple2.mcJI.sp(wc1 + wc2, df1 + df2);
                            return sp2;
                        }
                    }
                }
                throw new MatchError((Object)tuple2);
            }
        }).filter((Function1)new Serializable(this, minDf){
            public static final long serialVersionUID = 0L;
            private final double minDf$1;

            public final boolean apply(Tuple2<String, Tuple2<Object, Object>> x0$4) {
                Tuple2 tuple2;
                Tuple2<String, Tuple2<Object, Object>> tuple22 = x0$4;
                if (tuple22 != null && (tuple2 = (Tuple2)tuple22._2()) != null) {
                    int df = tuple2._2$mcI$sp();
                    boolean bl = (double)df >= this.minDf$1;
                    return bl;
                }
                throw new MatchError(tuple22);
            }
            {
                this.minDf$1 = minDf$1;
            }
        }).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<String, Object> apply(Tuple2<String, Tuple2<Object, Object>> x0$5) {
                Tuple2<String, Tuple2<Object, Object>> tuple2 = x0$5;
                if (tuple2 != null) {
                    String word = (String)tuple2._1();
                    Tuple2 tuple22 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long count = tuple22._1$mcJ$sp();
                        Tuple2 tuple23 = new Tuple2((Object)word, (Object)BoxesRunTime.boxToLong((long)count));
                        return tuple23;
                    }
                }
                throw new MatchError(tuple2);
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        String[] vocab = (String[])wordCounts.sortBy((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final long apply(Tuple2<String, Object> x$3) {
                return x$3._2$mcJ$sp();
            }
        }, false, 1, (Ordering)Ordering.Long$.MODULE$, ClassTag$.MODULE$.Long()).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply(Tuple2<String, Object> x$4) {
                return (String)x$4._1();
            }
        }, ClassTag$.MODULE$.apply(String.class)).take(vocSize);
        Predef$.MODULE$.require(vocab.length > 0, (Function0)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply() {
                return "The vocabulary size should be > 0. Lower minDF as necessary.";
            }
        });
        if (this.isDefined(this.inheritedVocabulary())) {
            String[] newVocabulary = new String[vocab.length];
            Map previousMap = (Map)this.$(this.inheritedVocabulary());
            Queue remainder = new Queue();
            Predef$.MODULE$.refArrayOps((Object[])vocab).foreach((Function1)new Serializable(this, newVocabulary, previousMap, remainder){
                public static final long serialVersionUID = 0L;
                private final String[] newVocabulary$1;
                private final Map previousMap$1;
                private final Queue remainder$1;

                public final void apply(String word) {
                    Option option = this.previousMap$1.get((Object)word);
                    if (option instanceof Some) {
                        Some some = (Some)option;
                        this.newVocabulary$1[BoxesRunTime.unboxToInt((Object)some.get())] = word;
                        BoxedUnit boxedUnit = BoxedUnit.UNIT;
                    } else {
                        this.remainder$1.enqueue((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{word}));
                        BoxedUnit boxedUnit = BoxedUnit.UNIT;
                    }
                }
                {
                    this.newVocabulary$1 = newVocabulary$1;
                    this.previousMap$1 = previousMap$1;
                    this.remainder$1 = remainder$1;
                }
            });
            Predef$.MODULE$.refArrayOps((Object[])newVocabulary).indices().foreach$mVc$sp((Function1)new Serializable(this, newVocabulary, remainder){
                public static final long serialVersionUID = 0L;
                private final String[] newVocabulary$1;
                private final Queue remainder$1;

                public final void apply(int i) {
                    this.apply$mcVI$sp(i);
                }

                public void apply$mcVI$sp(int i) {
                    if (this.newVocabulary$1[i] == null) {
                        this.newVocabulary$1[i] = (String)this.remainder$1.dequeue();
                    }
                }
                {
                    this.newVocabulary$1 = newVocabulary$1;
                    this.remainder$1 = remainder$1;
                }
            });
            stringArray = newVocabulary;
        } else {
            stringArray = vocab;
        }
        String[] mayBeMerged = stringArray;
        return (CountVectorizerModel)this.copyValues((Params)new OdklCountVectorizerModel(Identifiable$.MODULE$.randomUID("odklCountVectorizerModel"), mayBeMerged).setParent((Estimator)this), this.copyValues$default$2());
    }

    public OdklCountVectorizer(String uid) {
        super(uid);
        OdklCountVectorizerParams$class.$init$(this);
        this.inheritedVocabulary = new Param((Identifiable)this, "inheritedVocabulary", "Dictionary inherited from the previous epoche. Can be used to try to preserve word indices.");
    }

    public OdklCountVectorizer() {
        this(Identifiable$.MODULE$.randomUID("odklCountVectorizer"));
    }
}

