/*
 * Decompiled with CFR 0.152.
 */
package org.fit.layout.storage.example;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class LinkCrawler {
    private List<String> crawled = new ArrayList<String>();
    private List<String> toCrawl = new ArrayList<String>();
    private String baseUrl;
    private String actualCrawlUrl;
    private int LINK_LIMIT = 100;
    private final Boolean ADD_ONLY_VALID_URLS = false;
    private final Pattern BINARY_FILES_EXTENSIONS = Pattern.compile(".*\\.(bmp|gif|jpe?g|png|tiff?|pdf|ico|xaml|pict|rif|pptx?|ps|mid|mp2|mp3|mp4|wav|wma|au|aiff|flac|ogg|3gp|aac|amr|au|vox|avi|mov|mpe?g|ra?m|m4v|smil|wm?v|swf|aaf|asf|flv|mkv|zip|rar|gz|7z|aac|ace|alz|apk|arc|arj|dmg|jar|lzip|lha)(\\?.*)?$");

    public void setSeed(String newSeed) {
        this.baseUrl = this.getBaseUrl(newSeed);
        this.toCrawl.clear();
        this.toCrawl.add(this.baseUrl);
    }

    public void start() {
        while (this.toCrawl.size() > 0) {
            try {
                this.runNextUrlCrawling();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
            catch (LinkLimit e) {
                break;
            }
        }
    }

    public int size() {
        return this.crawled.size();
    }

    public Boolean contains(String str) {
        return this.crawled.contains(str);
    }

    public String get(int index) {
        return this.crawled.get(index);
    }

    public void setLinksLimit(int newLimit) {
        this.LINK_LIMIT = newLimit;
    }

    public List<String> getUrls() {
        return this.crawled;
    }

    private void runNextUrlCrawling() throws IOException, LinkLimit {
        if (this.toCrawl.size() == 0) {
            return;
        }
        this.actualCrawlUrl = this.toCrawl.get(0).toString();
        this.toCrawl.remove(0);
        URL url = new URL(this.actualCrawlUrl);
        String actualPage = this.loadURL2String(url);
        Document doc = null;
        doc = Jsoup.parse((String)actualPage);
        Elements links = doc.select("a");
        this.add(this.actualCrawlUrl);
        ListIterator li = links.listIterator();
        while (li.hasNext()) {
            Element e = (Element)li.next();
            this.appendUrlToCrawlIfDoesNotExist(e.attr("href"));
        }
    }

    private void appendUrlToCrawlIfDoesNotExist(String url) throws LinkLimit {
        if (this.BINARY_FILES_EXTENSIONS.matcher(url = url.toLowerCase()).matches()) {
            return;
        }
        if (this.size() > this.LINK_LIMIT && this.LINK_LIMIT > 0) {
            throw new LinkLimit("Limit exceeded " + this.LINK_LIMIT);
        }
        if (this.toCrawl.contains(url = this.normalizeUrl(url)) || this.contains(url).booleanValue() || !url.startsWith(this.baseUrl) || url.startsWith("mailto:") || url.startsWith("file:") || url.startsWith("file:")) {
            return;
        }
        try {
            URL urlTest = new URL(url);
            urlTest.openConnection();
            this.toCrawl.add(url);
            if (!this.ADD_ONLY_VALID_URLS.booleanValue()) {
                this.add(url);
            }
        }
        catch (Exception ex) {
            System.err.println("Nepridano URL " + url);
        }
    }

    private boolean add(String str) {
        if (!this.contains(str).booleanValue()) {
            this.crawled.add(str);
            return true;
        }
        return false;
    }

    private String normalizeUrl(String oldUrl) {
        String url = oldUrl.toLowerCase();
        if (oldUrl.startsWith("./")) {
            url = this.baseUrl + url.substring(1);
        } else if (oldUrl.startsWith("../")) {
            url = this.baseUrl + url.substring(1);
        } else if (oldUrl.startsWith("/")) {
            url = this.baseUrl + oldUrl;
        } else if (oldUrl.startsWith("?")) {
            url = this.actualCrawlUrl + oldUrl;
        } else if (oldUrl.startsWith("#")) {
            url = oldUrl.substring(0, oldUrl.indexOf("#"));
        } else if (!(oldUrl.startsWith("http:") || oldUrl.startsWith("https:") || oldUrl.startsWith("file:") || oldUrl.startsWith("mailto:"))) {
            url = this.getDirBasedUrl(this.actualCrawlUrl) + oldUrl;
        }
        return url;
    }

    private String getDirBasedUrl(String url) {
        return this.getBaseUrl(url) + "/";
    }

    private String getBaseUrl(String url) {
        if (url.endsWith("/")) {
            url = url.substring(0, url.length() - 1);
        }
        return url;
    }

    private String loadURL2String(URL url) throws IOException {
        int ch;
        URLConnection con = url.openConnection();
        Pattern p = Pattern.compile("text/html;\\s+charset=([^\\s]+)\\s*");
        Matcher m = p.matcher(con.getContentType());
        String charset = m.matches() ? m.group(1) : "ISO-8859-1";
        InputStreamReader r = new InputStreamReader(con.getInputStream(), charset);
        StringBuilder buf = new StringBuilder();
        while ((ch = ((Reader)r).read()) >= 0) {
            buf.append((char)ch);
        }
        return buf.toString();
    }

    class LinkLimit
    extends Exception {
        private static final long serialVersionUID = 1L;

        LinkLimit(String string) {
            super(string);
        }
    }
}

