/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.odkl.texts;

import org.apache.spark.annotation.DeveloperApi;
import org.apache.spark.ml.Transformer;
import org.apache.spark.ml.odkl.texts.FreqStatsTransformer$;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.DoubleArrayParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.param.Params;
import org.apache.spark.ml.param.shared.HasInputCol;
import org.apache.spark.ml.util.Identifiable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.LongType$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructType;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.collection.Seq;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ScalaSignature;
import scala.reflect.api.JavaMirrors;
import scala.reflect.api.JavaUniverse;
import scala.reflect.api.Mirror;
import scala.reflect.api.Symbols;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;

@ScalaSignature(bytes="\u0006\u0001\u0005\rf\u0001B\u0001\u0003\u0001=\u0011AC\u0012:fcN#\u0018\r^:Ue\u0006t7OZ8s[\u0016\u0014(BA\u0002\u0005\u0003\u0015!X\r\u001f;t\u0015\t)a!\u0001\u0003pI.d'BA\u0004\t\u0003\tiGN\u0003\u0002\n\u0015\u0005)1\u000f]1sW*\u00111\u0002D\u0001\u0007CB\f7\r[3\u000b\u00035\t1a\u001c:h\u0007\u0001\u0019B\u0001\u0001\t\u00155A\u0011\u0011CE\u0007\u0002\r%\u00111C\u0002\u0002\f)J\fgn\u001d4pe6,'\u000f\u0005\u0002\u001615\taC\u0003\u0002\u0018\r\u0005)\u0001/\u0019:b[&\u0011\u0011D\u0006\u0002\u0007!\u0006\u0014\u0018-\\:\u0011\u0005mqR\"\u0001\u000f\u000b\u0005u1\u0012AB:iCJ,G-\u0003\u0002 9\tY\u0001*Y:J]B,HoQ8m\u0011!\t\u0003A!b\u0001\n\u0003\u0012\u0013aA;jIV\t1\u0005\u0005\u0002%U9\u0011Q\u0005K\u0007\u0002M)\tq%A\u0003tG\u0006d\u0017-\u0003\u0002*M\u00051\u0001K]3eK\u001aL!a\u000b\u0017\u0003\rM#(/\u001b8h\u0015\tIc\u0005\u0003\u0005/\u0001\t\u0005\t\u0015!\u0003$\u0003\u0011)\u0018\u000e\u001a\u0011\t\u000bA\u0002A\u0011A\u0019\u0002\rqJg.\u001b;?)\t\u0011D\u0007\u0005\u00024\u00015\t!\u0001C\u0003\"_\u0001\u00071\u0005C\u00047\u0001\t\u0007I\u0011A\u001c\u0002\u0011\u0011L7\u000f^5oGR,\u0012\u0001\u000f\t\u0003sqj\u0011A\u000f\u0006\u0003w!\t1a]9m\u0013\ti$HA\nVg\u0016\u0014H)\u001a4j]\u0016$g)\u001e8di&|g\u000e\u0003\u0004@\u0001\u0001\u0006I\u0001O\u0001\nI&\u001cH/\u001b8di\u0002Bq!\u0011\u0001C\u0002\u0013\u0005!)A\u0005d_J\u0004Xo\u001d'f]V\t1\tE\u0002\u0016\t\u001aK!!\u0012\f\u0003\u000bA\u000b'/Y7\u0011\u0005\u0015:\u0015B\u0001%'\u0005\u0011auN\\4\t\r)\u0003\u0001\u0015!\u0003D\u0003)\u0019wN\u001d9vg2+g\u000e\t\u0005\b\u0019\u0002\u0011\r\u0011\"\u0001N\u00035yW\u000f\u001e9vi\u000e{G\u000eV3s[V\ta\nE\u0002\u0016\t\u000eBa\u0001\u0015\u0001!\u0002\u0013q\u0015AD8viB,HoQ8m)\u0016\u0014X\u000e\t\u0005\b%\u0002\u0011\r\u0011\"\u0001N\u00035yW\u000f\u001e9vi\u000e{GN\u0012:fc\"1A\u000b\u0001Q\u0001\n9\u000bab\\;uaV$8i\u001c7Ge\u0016\f\b\u0005C\u0004W\u0001\t\u0007I\u0011A,\u0002\u0019\u0019\u0014X-\u001d+sKND\u0017I\u001d:\u0016\u0003a\u0003\"!F-\n\u0005i3\"\u0001\u0005#pk\ndW-\u0011:sCf\u0004\u0016M]1n\u0011\u0019a\u0006\u0001)A\u00051\u0006iaM]3r)J,7\u000f[!se\u0002BqA\u0018\u0001C\u0002\u0013\u0005Q*A\u0005eK2LW.\u001a;fe\"1\u0001\r\u0001Q\u0001\n9\u000b!\u0002Z3mS6,G/\u001a:!\u0011\u001d\u0011\u0007A1A\u0005\u0002\r\fQb^5uQRKW.Z:uC6\u0004X#\u00013\u0011\u0005U)\u0017B\u00014\u0017\u00051\u0011un\u001c7fC:\u0004\u0016M]1n\u0011\u0019A\u0007\u0001)A\u0005I\u0006qq/\u001b;i)&lWm\u001d;b[B\u0004\u0003b\u00026\u0001\u0005\u0004%\t!T\u0001\u0015i&lW\r^:uC6\u00048i\u001c7v[:t\u0015-\\3\t\r1\u0004\u0001\u0015!\u0003O\u0003U!\u0018.\\3ugR\fW\u000e]\"pYVlgNT1nK\u0002BQ\u0001\r\u0001\u0005\u00029$\u0012A\r\u0005\u0006a\u0002!\t%]\u0001\u0005G>\u0004\u0018\u0010\u0006\u0002\u0011e\")1o\u001ca\u0001i\u0006)Q\r\u001f;sCB\u0011Q#^\u0005\u0003mZ\u0011\u0001\u0002U1sC6l\u0015\r\u001d\u0005\u0006q\u0002!\t%_\u0001\niJ\fgn\u001d4pe6$\"A_?\u0011\u0005eZ\u0018B\u0001?;\u0005%!\u0015\r^1Ge\u0006lW\rC\u0003\u007fo\u0002\u0007!0A\u0004eCR\f7/\u001a;\t\u000f\u0005\u0005\u0001\u0001\"\u0001\u0002\u0004\u0005iA-Y=U_R+'/\\*uCR$2A_A\u0003\u0011\u0019\t9a a\u0001u\u0006Y\u0001O]3qe>\u001cW\r\u001a#G\u0011\u001d\tY\u0001\u0001C\u0001\u0003\u001b\tq![:WC2LG\r\u0006\u0004\u0002\u0010\u0005U\u0011\u0011\u0004\t\u0004s\u0005E\u0011bAA\nu\t11i\u001c7v[:D\u0001\"a\u0006\u0002\n\u0001\u0007\u0011qB\u0001\u0005i\u0016\u0014X\u000e\u0003\u0005\u0002\u001c\u0005%\u0001\u0019AA\b\u0003\u00111'/Z9\t\u000f\u0005}\u0001\u0001\"\u0011\u0002\"\u0005yAO]1og\u001a|'/\\*dQ\u0016l\u0017\r\u0006\u0003\u0002$\u0005=\u0002\u0003BA\u0013\u0003Wi!!a\n\u000b\u0007\u0005%\"(A\u0003usB,7/\u0003\u0003\u0002.\u0005\u001d\"AC*ueV\u001cG\u000fV=qK\"A\u0011\u0011GA\u000f\u0001\u0004\t\u0019#\u0001\u0004tG\",W.\u0019\u0015\u0005\u0003;\t)\u0004\u0005\u0003\u00028\u0005uRBAA\u001d\u0015\r\tY\u0004C\u0001\u000bC:tw\u000e^1uS>t\u0017\u0002BA \u0003s\u0011A\u0002R3wK2|\u0007/\u001a:Ba&Da!a\u0011\u0001\t\u0003\u0011\u0013aD4fi&s\u0007/\u001e;ECR\f7i\u001c7\t\u000f\u0005\u001d\u0003\u0001\"\u0001\u0002J\u0005y1/\u001a;J]B,H\u000fR1uC\u000e{G\u000e\u0006\u0003\u0002L\u00055S\"\u0001\u0001\t\u000f\u0005=\u0013Q\ta\u0001G\u0005)a/\u00197vK\"1\u00111\u000b\u0001\u0005\u0002\t\n\u0001cZ3u\u001fV$\b/\u001e;D_2$VM]7\t\u000f\u0005]\u0003\u0001\"\u0001\u0002Z\u0005q1/\u001a;Ue\u0016\u001c\bn\u001c7e\u0003J\u0014H\u0003BA&\u00037B\u0001\"a\u0014\u0002V\u0001\u0007\u0011Q\f\t\u0006K\u0005}\u00131M\u0005\u0004\u0003C2#!B!se\u0006L\bcA\u0013\u0002f%\u0019\u0011q\r\u0014\u0003\r\u0011{WO\u00197f\u0011\u001d\tY\u0007\u0001C\u0001\u0003[\nAb]3u\t\u0016d\u0017.\\5uKJ$B!a\u0013\u0002p!9\u0011qJA5\u0001\u0004\u0019\u0003bBA:\u0001\u0011\u0005\u0011QO\u0001\u0011g\u0016$x*\u001e;qkR\u001cu\u000e\u001c+fe6$B!a\u0013\u0002x!9\u0011qJA9\u0001\u0004\u0019\u0003BBA>\u0001\u0011\u0005!%\u0001\thKR|U\u000f\u001e9vi\u000e{GN\u0012:fc\"9\u0011q\u0010\u0001\u0005\u0002\u0005\u0005\u0015\u0001E:fi>+H\u000f];u\u0007>dgI]3r)\u0011\tY%a!\t\u000f\u0005=\u0013Q\u0010a\u0001G!9\u0011q\u0011\u0001\u0005\u0002\u0005%\u0015aD:fi\u000e{'\u000f];t\u0019\u0016tw\r\u001e5\u0015\t\u0005-\u00131\u0012\u0005\b\u0003\u001f\n)\t1\u0001G\u0011\u001d\ty\t\u0001C\u0001\u0003#\u000b\u0001c]3u/&$\b\u000eV5nKN$\u0018-\u001c9\u0015\t\u0005-\u00131\u0013\u0005\t\u0003\u001f\ni\t1\u0001\u0002\u0016B\u0019Q%a&\n\u0007\u0005eeEA\u0004C_>dW-\u00198\t\u000f\u0005u\u0005\u0001\"\u0001\u0002 \u000612/\u001a;US6,7\u000f^1na\u000e{G.^7o\u001d\u0006lW\r\u0006\u0003\u0002L\u0005\u0005\u0006bBA(\u00037\u0003\ra\t")
public class FreqStatsTransformer
extends Transformer
implements HasInputCol {
    private final String uid;
    private final UserDefinedFunction distinct;
    private final Param<Object> corpusLen;
    private final Param<String> outputColTerm;
    private final Param<String> outputColFreq;
    private final DoubleArrayParam freqTreshArr;
    private final Param<String> delimeter;
    private final BooleanParam withTimestamp;
    private final Param<String> timetstampColumnName;
    private final Param<String> inputCol;

    public final Param<String> inputCol() {
        return this.inputCol;
    }

    public final void org$apache$spark$ml$param$shared$HasInputCol$_setter_$inputCol_$eq(Param x$1) {
        this.inputCol = x$1;
    }

    public final String getInputCol() {
        return HasInputCol.class.getInputCol((HasInputCol)this);
    }

    public String uid() {
        return this.uid;
    }

    public UserDefinedFunction distinct() {
        return this.distinct;
    }

    public Param<Object> corpusLen() {
        return this.corpusLen;
    }

    public Param<String> outputColTerm() {
        return this.outputColTerm;
    }

    public Param<String> outputColFreq() {
        return this.outputColFreq;
    }

    public DoubleArrayParam freqTreshArr() {
        return this.freqTreshArr;
    }

    public Param<String> delimeter() {
        return this.delimeter;
    }

    public BooleanParam withTimestamp() {
        return this.withTimestamp;
    }

    public Param<String> timetstampColumnName() {
        return this.timetstampColumnName;
    }

    public Transformer copy(ParamMap extra) {
        return (Transformer)this.defaultCopy(extra);
    }

    public DataFrame transform(DataFrame dataset) {
        return this.dayToTermStat(dataset);
    }

    public DataFrame dayToTermStat(DataFrame preprocedDF) {
        DataFrame nGramDF = BoxesRunTime.unboxToBoolean((Object)this.$((Param)this.withTimestamp())) ? preprocedDF.select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.explode(this.distinct().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{preprocedDF.apply((String)this.$(this.inputCol()))}))).as((String)this.$(this.outputColTerm())), preprocedDF.apply((String)this.$(this.timetstampColumnName()))})) : preprocedDF.select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.explode(this.distinct().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{preprocedDF.apply((String)this.$(this.inputCol()))}))).as((String)this.$(this.outputColTerm()))}));
        long corpusLength = this.isSet(this.corpusLen()) ? BoxesRunTime.unboxToLong((Object)this.$(this.corpusLen())) : nGramDF.count();
        UserDefinedFunction termCountToRelFreq = functions$.MODULE$.udf((Function1)new Serializable(this, corpusLength){
            public static final long serialVersionUID = 0L;
            private final long corpusLength$1;

            public final double apply(long count) {
                return this.apply$mcDJ$sp(count);
            }

            public double apply$mcDJ$sp(long count) {
                return (double)count / (double)this.corpusLength$1;
            }
            {
                this.corpusLength$1 = corpusLength$1;
            }
        }, ((TypeTags)package$.MODULE$.universe()).TypeTag().Double(), ((TypeTags)package$.MODULE$.universe()).TypeTag().Long());
        String termCountColName = new StringBuilder().append((Object)"termCount").append((Object)this.uid()).toString();
        return (BoxesRunTime.unboxToBoolean((Object)this.$((Param)this.withTimestamp())) ? nGramDF.groupBy((String)this.$(this.outputColTerm()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).agg(functions$.MODULE$.count((String)this.$(this.outputColTerm())).alias(termCountColName), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.max((String)this.$(this.timetstampColumnName())).as((String)this.$(this.timetstampColumnName()))})) : nGramDF.groupBy((String)this.$(this.outputColTerm()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).agg(functions$.MODULE$.count((String)this.$(this.outputColTerm())).alias(termCountColName), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0]))).withColumn((String)this.$(this.outputColFreq()), termCountToRelFreq.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col(termCountColName)}))).where(this.isValid(functions$.MODULE$.col((String)this.$(this.outputColTerm())), functions$.MODULE$.col((String)this.$(this.outputColFreq())))).drop(termCountColName);
    }

    public Column isValid(Column term, Column freq) {
        double[] arrTresholds = (double[])this.$((Param)this.freqTreshArr());
        String delim = (String)this.$(this.delimeter());
        JavaUniverse $u = package$.MODULE$.universe();
        JavaMirrors.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(FreqStatsTransformer.class.getClassLoader());
        public final class Org_apache_spark_ml_odkl_texts_FreqStatsTransformer$$typecreator3$1
        extends TypeCreator {
            public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                Universe $u = $m$untyped.universe();
                Mirror<U> $m = $m$untyped;
                return $u.TypeRef().apply($u.SingleType().apply($u.ThisType().apply(((Symbols.ModuleSymbolApi)((Symbols.ModuleSymbolApi)$m.staticPackage("scala")).asModule()).moduleClass()), $m.staticModule("scala.Predef")), $u.build().selectType(((Symbols.ModuleSymbolApi)((Symbols.ModuleSymbolApi)$m.staticModule("scala.Predef")).asModule()).moduleClass(), "String"), (List)Nil$.MODULE$);
            }

            public Org_apache_spark_ml_odkl_texts_FreqStatsTransformer$$typecreator3$1(FreqStatsTransformer $outer) {
            }
        }
        UserDefinedFunction udfCreateFreqTreshForTerm = functions$.MODULE$.udf((Function1)new Serializable(this, arrTresholds, delim){
            public static final long serialVersionUID = 0L;
            private final double[] arrTresholds$1;
            private final String delim$1;

            public final double apply(String term) {
                int nGramN = new StringOps(Predef$.MODULE$.augmentString(term)).r().findAllIn((CharSequence)this.delim$1).length();
                return this.arrTresholds$1[nGramN > this.arrTresholds$1.length - 1 ? this.arrTresholds$1.length - 1 : nGramN];
            }
            {
                this.arrTresholds$1 = arrTresholds$1;
                this.delim$1 = delim$1;
            }
        }, ((TypeTags)package$.MODULE$.universe()).TypeTag().Double(), ((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new Org_apache_spark_ml_odkl_texts_FreqStatsTransformer$$typecreator3$1(this)));
        Column freqTresh = udfCreateFreqTreshForTerm.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{term}));
        return freq.$greater((Object)freqTresh);
    }

    @DeveloperApi
    public StructType transformSchema(StructType schema) {
        StructType answerStruct = new StructType().add((String)this.$(this.outputColTerm()), (DataType)StringType$.MODULE$).add((String)this.$(this.outputColFreq()), (DataType)DoubleType$.MODULE$);
        return BoxesRunTime.unboxToBoolean((Object)this.$((Param)this.withTimestamp())) ? answerStruct.add((String)this.$(this.timetstampColumnName()), (DataType)LongType$.MODULE$) : answerStruct;
    }

    public String getInputDataCol() {
        return (String)this.$(this.inputCol());
    }

    public FreqStatsTransformer setInputDataCol(String value) {
        return (FreqStatsTransformer)this.set(this.inputCol(), value);
    }

    public String getOutputColTerm() {
        return (String)this.$(this.outputColTerm());
    }

    public FreqStatsTransformer setTresholdArr(double[] value) {
        return (FreqStatsTransformer)this.set((Param)this.freqTreshArr(), value);
    }

    public FreqStatsTransformer setDelimiter(String value) {
        return (FreqStatsTransformer)this.set(this.delimeter(), value);
    }

    public FreqStatsTransformer setOutputColTerm(String value) {
        return (FreqStatsTransformer)this.set(this.outputColTerm(), value);
    }

    public String getOutputColFreq() {
        return (String)this.$(this.outputColFreq());
    }

    public FreqStatsTransformer setOutputColFreq(String value) {
        return (FreqStatsTransformer)this.set(this.outputColFreq(), value);
    }

    public FreqStatsTransformer setCorpusLength(long value) {
        return (FreqStatsTransformer)this.set(this.corpusLen(), BoxesRunTime.boxToLong((long)value));
    }

    public FreqStatsTransformer setWithTimestamp(boolean value) {
        return (FreqStatsTransformer)this.set((Param)this.withTimestamp(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public FreqStatsTransformer setTimestampColumnName(String value) {
        return (FreqStatsTransformer)this.set(this.timetstampColumnName(), value);
    }

    public FreqStatsTransformer(String uid) {
        this.uid = uid;
        HasInputCol.class.$init$((HasInputCol)this);
        JavaUniverse $u = package$.MODULE$.universe();
        JavaMirrors.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(FreqStatsTransformer.class.getClassLoader());
        JavaUniverse $u2 = package$.MODULE$.universe();
        JavaMirrors.JavaMirror $m2 = package$.MODULE$.universe().runtimeMirror(FreqStatsTransformer.class.getClassLoader());
        this.distinct = functions$.MODULE$.udf((Function1)new $anonfun$2(this), ((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new $typecreator1$1(this)), ((TypeTags)$u2).TypeTag().apply((Mirror)$m2, (TypeCreator)new $typecreator2$1(this)));
        this.corpusLen = new Param((Identifiable)this, "CorpusLength", "corpus length if not set - count corpus length by $(outputColTerm)", ParamValidators$.MODULE$.gt(0.0));
        this.outputColTerm = new Param((Identifiable)this, "outputColTerm", "output Col with Term[String] name");
        this.outputColFreq = new Param((Identifiable)this, "outputColFreq", "output Col with Freq[Double] name");
        this.freqTreshArr = new DoubleArrayParam((Params)this, "FreqTresholdArray", "Array(UniTreshold,BiGramTreshold,TriGramTreshold and etc)");
        this.delimeter = new Param((Identifiable)this, "Delimiter", "Delimiter for nGrams");
        this.withTimestamp = new BooleanParam((Identifiable)this, "WithTimestamp", "should it create max(timestamp) column?");
        this.timetstampColumnName = new Param((Identifiable)this, "TimestampColumnName", "column with message timestamp and name for column with term's last seen timestamp");
        this.setDefault((Seq)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{new ParamPair((Param)this.freqTreshArr(), (Object)new double[]{1.0E-8, 1.0E-6}), new ParamPair(this.delimeter(), (Object)" "), new ParamPair((Param)this.withTimestamp(), (Object)BoxesRunTime.boxToBoolean((boolean)false)), new ParamPair(this.timetstampColumnName(), (Object)"timestamp")}));
    }

    public FreqStatsTransformer() {
        this(Identifiable$.MODULE$.randomUID("freqStatsTransformer"));
    }
}

