package com.github.catalystcode.fortis.spark.streaming.html;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import java.net.URL;
import java.util.concurrent.TimeUnit;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.GenSeq;
import scala.collection.GenSeq$;
import scala.collection.GenTraversableLike;
import scala.collection.JavaConversions$;
import scala.collection.Parallelizable;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.parallel.ParIterableLike;
import scala.collection.parallel.ParSeq$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.sys.package$;
import scala.util.matching.Regex;

/* compiled from: HTMLSource.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005ef!B\u0001\u0003\u0001\t\u0001\"A\u0003%U\u001b2\u001bv.\u001e:dK*\u00111\u0001B\u0001\u0005QRlGN\u0003\u0002\u0006\r\u0005I1\u000f\u001e:fC6Lgn\u001a\u0006\u0003\u000f!\tQa\u001d9be.T!!\u0003\u0006\u0002\r\u0019|'\u000f^5t\u0015\tYA\"\u0001\u0007dCR\fG._:uG>$WM\u0003\u0002\u000e\u001d\u00051q-\u001b;ik\nT\u0011aD\u0001\u0004G>l7c\u0001\u0001\u0012/A\u0011!#F\u0007\u0002')\tA#A\u0003tG\u0006d\u0017-\u0003\u0002\u0017'\t1\u0011I\\=SK\u001a\u0004\"A\u0005\r\n\u0005e\u0019\"\u0001D*fe&\fG.\u001b>bE2,\u0007\u0002C\u000e\u0001\u0005\u0003\u0005\u000b\u0011B\u000f\u0002\u000fMLG/Z+S\u0019\u000e\u0001\u0001C\u0001\u0010$\u001b\u0005y\"B\u0001\u0011\"\u0003\rqW\r\u001e\u0006\u0002E\u0005!!.\u0019<b\u0013\t!sDA\u0002V%2C\u0001B\n\u0001\u0003\u0002\u0003\u0006IaJ\u0001\t[\u0006DH)\u001a9uQB\u0011!\u0003K\u0005\u0003SM\u00111!\u00138u\u0011!Y\u0003A!A!\u0002\u0013a\u0013A\u0004:fcV,7\u000f\u001e%fC\u0012,'o\u001d\t\u0005[A\u001a4G\u0004\u0002\u0013]%\u0011qfE\u0001\u0007!J,G-\u001a4\n\u0005E\u0012$aA'ba*\u0011qf\u0005\t\u0003[QJ!!\u000e\u001a\u0003\rM#(/\u001b8h\u0011!9\u0004A!A!\u0002\u0013A\u0014AG2bG\",W\tZ5u\t&\u001cH/\u00198dKRC'/Z:i_2$\u0007C\u0001\n:\u0013\tQ4C\u0001\u0004E_V\u0014G.\u001a\u0005\u0006y\u0001!\t!P\u0001\u0007y%t\u0017\u000e\u001e \u0015\u000by\u0002\u0015IQ\"\u0011\u0005}\u0002Q\"\u0001\u0002\t\u000bmY\u0004\u0019A\u000f\t\u000f\u0019Z\u0004\u0013!a\u0001O!91f\u000fI\u0001\u0002\u0004a\u0003bB\u001c<!\u0003\u0005\r\u0001\u000f\u0005\b\u000b\u0002\u0011\r\u0011\"\u0003G\u0003Q\u0019wN\u001c8fGR$\u0016.\\3pkRl\u0015\u000e\u001c7jgV\tq\u0005\u0003\u0004I\u0001\u0001\u0006IaJ\u0001\u0016G>tg.Z2u)&lWm\\;u\u001b&dG.[:!\u0011\u001dQ\u0005A1A\u0005\n\u0019\u000b\u0001cY1dQ\u0016$\u0016.\\3NS:,H/Z:\t\r1\u0003\u0001\u0015!\u0003(\u0003E\u0019\u0017m\u00195f)&lW-T5okR,7\u000f\t\u0005\b\u001d\u0002\u0011\r\u0011\"\u0003P\u0003\u0015\u0019\u0017m\u00195f+\u0005\u0001\u0006\u0003B)X;ek\u0011A\u0015\u0006\u0003\u001dNS!\u0001V+\u0002\r\r|W.\\8o\u0015\t1f\"\u0001\u0004h_><G.Z\u0005\u00031J\u0013QaQ1dQ\u0016\u0004\"AW1\u000e\u0003mS!\u0001X/\u0002\u000b9|G-Z:\u000b\u0005y{\u0016!\u00026t_V\u0004(\"\u00011\u0002\u0007=\u0014x-\u0003\u0002c7\nAAi\\2v[\u0016tG\u000f\u0003\u0004e\u0001\u0001\u0006I\u0001U\u0001\u0007G\u0006\u001c\u0007.\u001a\u0011\t\u0011\u0019\u0004\u0001\u0019!C\u0001\u0005\u001d\f\u0011bY8o]\u0016\u001cGo\u001c:\u0016\u0003!\u0004\"!\u001b6\u000e\u0003\u00011Qa\u001b\u0001\u0001\u00051\u0014Q\u0002\u0013+N\u0019\u000e{gN\\3di>\u00148C\u00016\u0012\u0011\u0015a$\u000e\"\u0001o)\u0005A\u0007\"\u00029k\t\u0003\t\u0018aB2p]:,7\r\u001e\u000b\u0003eZ\u0004\"a\u001d;\u000e\u0003uK!!^/\u0003\u0015\r{gN\\3di&|g\u000eC\u0003x_\u0002\u0007Q$A\u0002ve2D\u0001\"\u001f\u0001A\u0002\u0013\u0005!A_\u0001\u000eG>tg.Z2u_J|F%Z9\u0015\u0005mt\bC\u0001\n}\u0013\ti8C\u0001\u0003V]&$\bbB@y\u0003\u0003\u0005\r\u0001[\u0001\u0004q\u0012\n\u0004bBA\u0002\u0001\u0001\u0006K\u0001[\u0001\u000bG>tg.Z2u_J\u0004\u0003bBA\u0004\u0001\u0011\u0005\u0011\u0011B\u0001\u0006e\u0016\u001cX\r\u001e\u000b\u0002w\"9\u0011Q\u0002\u0001\u0005\u0002\u0005=\u0011!\u00024fi\u000eDGCAA\t!\u0019\t\u0019\"a\t\u0002*9!\u0011QCA\u0010\u001d\u0011\t9\"!\b\u000e\u0005\u0005e!bAA\u000e9\u00051AH]8pizJ\u0011\u0001F\u0005\u0004\u0003C\u0019\u0012a\u00029bG.\fw-Z\u0005\u0005\u0003K\t9CA\u0002TKFT1!!\t\u0014!\ry\u00141F\u0005\u0004\u0003[\u0011!\u0001\u0003%U\u001b2\u0003\u0016mZ3\t\u0013\u0005E\u0002A1A\u0005\n\u0005M\u0012AC;sYB\u000bG\u000f^3s]V\u0011\u0011Q\u0007\t\u0005\u0003o\t\t%\u0004\u0002\u0002:)!\u00111HA\u001f\u0003!i\u0017\r^2iS:<'bAA '\u0005!Q\u000f^5m\u0013\u0011\t\u0019%!\u000f\u0003\u000bI+w-\u001a=\t\u0011\u0005\u001d\u0003\u0001)A\u0005\u0003k\t1\"\u001e:m!\u0006$H/\u001a:oA!I\u00111\n\u0001C\u0002\u0013%\u00111G\u0001\u0010e>|G\u000fU1uQB\u000bG\u000f^3s]\"A\u0011q\n\u0001!\u0002\u0013\t)$\u0001\ts_>$\b+\u0019;i!\u0006$H/\u001a:oA!I\u00111\u000b\u0001C\u0002\u0013%\u00111G\u0001\u0014C\n\u001cx\u000e\\;uKB\u000bG\u000f\u001b)biR,'O\u001c\u0005\t\u0003/\u0002\u0001\u0015!\u0003\u00026\u0005!\u0012MY:pYV$X\rU1uQB\u000bG\u000f^3s]\u0002B\u0011\"a\u0017\u0001\u0005\u0004%I!a\r\u0002\u0019\td\u0017M\\6QCR$XM\u001d8\t\u0011\u0005}\u0003\u0001)A\u0005\u0003k\tQB\u00197b].\u0004\u0016\r\u001e;fe:\u0004\u0003\u0002CA2\u0001\u0011\u0005!!!\u001a\u0002'Utg-\u001b7uKJ,G\rR8dk6,g\u000e^:\u0015\u0005\u0005\u001d\u0004CBA\n\u0003G\tI\u0007E\u0003\u0013\u0003Wj\u0012,C\u0002\u0002nM\u0011a\u0001V;qY\u0016\u0014tACA9\u0005\u0005\u0005\t\u0012\u0001\u0002\u0002t\u0005Q\u0001\nV'M'>,(oY3\u0011\u0007}\n)HB\u0005\u0002\u0005\u0005\u0005\t\u0012\u0001\u0002\u0002xM!\u0011QO\t\u0018\u0011\u001da\u0014Q\u000fC\u0001\u0003w\"\"!a\u001d\t\u0015\u0005}\u0014QOI\u0001\n\u0003\t\t)A\u000e%Y\u0016\u001c8/\u001b8ji\u0012:'/Z1uKJ$C-\u001a4bk2$HEM\u000b\u0003\u0003\u0007S3aJACW\t\t9\t\u0005\u0003\u0002\n\u0006MUBAAF\u0015\u0011\ti)a$\u0002\u0013Ut7\r[3dW\u0016$'bAAI'\u0005Q\u0011M\u001c8pi\u0006$\u0018n\u001c8\n\t\u0005U\u00151\u0012\u0002\u0012k:\u001c\u0007.Z2lK\u00124\u0016M]5b]\u000e,\u0007BCAM\u0003k\n\n\u0011\"\u0001\u0002\u001c\u0006YB\u0005\\3tg&t\u0017\u000e\u001e\u0013he\u0016\fG/\u001a:%I\u00164\u0017-\u001e7uIM*\"!!(+\u00071\n)\t\u0003\u0006\u0002\"\u0006U\u0014\u0013!C\u0001\u0003G\u000b1\u0004\n7fgNLg.\u001b;%OJ,\u0017\r^3sI\u0011,g-Y;mi\u0012\"TCAASU\rA\u0014Q\u0011\u0005\u000b\u0003S\u000b)(!A\u0005\n\u0005-\u0016a\u0003:fC\u0012\u0014Vm]8mm\u0016$\"!!,\u0011\t\u0005=\u0016QW\u0007\u0003\u0003cS1!a-\"\u0003\u0011a\u0017M\\4\n\t\u0005]\u0016\u0011\u0017\u0002\u0007\u001f\nTWm\u0019;")
/* loaded from: input_file:com/github/catalystcode/fortis/spark/streaming/html/HTMLSource.class */
public class HTMLSource implements Serializable {
    public final URL com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL;
    private final int maxDepth;
    public final Map<String, String> com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$requestHeaders;
    public final double com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$cacheEditDistanceThreshold;
    private final int com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$connectTimeoutMillis = new StringOps(Predef$.MODULE$.augmentString((String) package$.MODULE$.env().getOrElse("HTML_SOURCE_CONNECT_TIMEOUT_MILLIS", new HTMLSource$$anonfun$1(this)))).toInt();
    private final int cacheTimeMinutes = new StringOps(Predef$.MODULE$.augmentString((String) package$.MODULE$.env().getOrElse("HTML_SOURCE_CACHE_TIME_MINUTES", new HTMLSource$$anonfun$2(this)))).toInt();
    private final Cache<URL, Document> com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$cache = CacheBuilder.newBuilder().expireAfterWrite(cacheTimeMinutes(), TimeUnit.MINUTES).build();
    private HTMLConnector connector = new HTMLConnector(this);
    private final Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$urlPattern = new StringOps(Predef$.MODULE$.augmentString(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"http[s]://.+"})).raw(Nil$.MODULE$))).r();
    private final Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$rootPathPattern = new StringOps(Predef$.MODULE$.augmentString(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"[/]+"})).raw(Nil$.MODULE$))).r();
    private final Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$absolutePathPattern = new StringOps(Predef$.MODULE$.augmentString(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"[/].+"})).raw(Nil$.MODULE$))).r();
    private final Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$blankPattern = new StringOps(Predef$.MODULE$.augmentString(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"\\\\s+"})).raw(Nil$.MODULE$))).r();

    /* compiled from: HTMLSource.scala */
    /* loaded from: input_file:com/github/catalystcode/fortis/spark/streaming/html/HTMLSource$HTMLConnector.class */
    public class HTMLConnector {
        public final /* synthetic */ HTMLSource $outer;

        public Connection connect(URL url) {
            return Jsoup.connect(url.toString()).timeout(com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$HTMLConnector$$$outer().com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$connectTimeoutMillis()).headers(JavaConversions$.MODULE$.mapAsJavaMap(com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$HTMLConnector$$$outer().com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$requestHeaders));
        }

        public /* synthetic */ HTMLSource com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$HTMLConnector$$$outer() {
            return this.$outer;
        }

        public HTMLConnector(HTMLSource hTMLSource) {
            if (hTMLSource == null) {
                throw null;
            }
            this.$outer = hTMLSource;
        }
    }

    public int com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$connectTimeoutMillis() {
        return this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$connectTimeoutMillis;
    }

    private int cacheTimeMinutes() {
        return this.cacheTimeMinutes;
    }

    public Cache<URL, Document> com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$cache() {
        return this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$cache;
    }

    public HTMLConnector connector() {
        return this.connector;
    }

    public void connector_$eq(HTMLConnector hTMLConnector) {
        this.connector = hTMLConnector;
    }

    public void reset() {
        com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$cache().cleanUp();
    }

    public Seq<HTMLPage> fetch() {
        return (Seq) ((TraversableLike) unfilteredDocuments().filter(new HTMLSource$$anonfun$fetch$1(this))).map(new HTMLSource$$anonfun$fetch$2(this), Seq$.MODULE$.canBuildFrom());
    }

    public Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$urlPattern() {
        return this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$urlPattern;
    }

    public Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$rootPathPattern() {
        return this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$rootPathPattern;
    }

    public Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$absolutePathPattern() {
        return this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$absolutePathPattern;
    }

    public Regex com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$blankPattern() {
        return this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$blankPattern;
    }

    public Seq<Tuple2<URL, Document>> unfilteredDocuments() {
        String str;
        String host = this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL.getHost();
        switch (this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL.getPort()) {
            case -1:
                str = "";
                break;
            default:
                str = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{":", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL.getPort())}));
                break;
        }
        String str2 = str;
        Document document = connector().connect(this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL).get();
        if (this.maxDepth < 1) {
            return Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{new Tuple2(this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL, document)}));
        }
        Elements select = document.select("a[href]");
        return (Seq) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{new Tuple2(this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL, document)})).$plus$plus((GenSeq) ((GenTraversableLike) ((GenTraversableLike) (select == null ? (GenSeq) Seq$.MODULE$.apply(Nil$.MODULE$) : (GenSeq) ((ParIterableLike) ((Parallelizable) JavaConversions$.MODULE$.asScalaIterator(select.iterator()).toSeq().filter(new HTMLSource$$anonfun$3(this))).par().map(new HTMLSource$$anonfun$4(this, host, str2), ParSeq$.MODULE$.canBuildFrom())).filter(new HTMLSource$$anonfun$5(this)).map(new HTMLSource$$anonfun$6(this), ParSeq$.MODULE$.canBuildFrom())).map(new HTMLSource$$anonfun$7(this, host), GenSeq$.MODULE$.canBuildFrom())).filter(new HTMLSource$$anonfun$8(this))).map(new HTMLSource$$anonfun$9(this), GenSeq$.MODULE$.canBuildFrom()), Seq$.MODULE$.canBuildFrom());
    }

    public HTMLSource(URL url, int i, Map<String, String> map, double d) {
        this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$siteURL = url;
        this.maxDepth = i;
        this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$requestHeaders = map;
        this.com$github$catalystcode$fortis$spark$streaming$html$HTMLSource$$cacheEditDistanceThreshold = d;
    }
}
