package cn.tyoui.core;

import cn.tyoui.proxy.CrawlerProxyIP;
import cn.tyoui.httpclient.HttpCrawler;

import java.io.File;
import java.io.InputStream;
import java.util.*;

/**
 * 爬虫控制类
 *
 * @author Tyoui
 */
public class ControllerCrawler {

    private Map<String, String> map = new HashMap<String, String>();
    private String dir = "c:\\crawler";
    private String url = "http://www.baidu.com";
    private int start = 0, end = 0;

    /**
     * 启动爬虫
     *
     * @throws Exception 启动错误
     */
    public void start() throws Exception {
        Properties properties = new Properties();
        InputStream in = this.getClass().getResourceAsStream("/crawler.properties");
        properties.load(in);
        Iterator<String> it = properties.stringPropertyNames().iterator();
        while (it.hasNext()) {
            String key = it.next();
            String value = properties.getProperty(key);
            map.put(key.toLowerCase(), value);
        }
        in.close();
        initCrawler();
    }

    /**
     * 爬虫初始化
     *
     * @throws Exception 爬虫初始化错误
     */
    private void initCrawler() throws Exception {
        HttpCrawler httpCrawler = new HttpCrawler();
        String proxy = changer("isProxyIP", null);
        if (proxy != null) {
            boolean flag = Boolean.parseBoolean(changer("isFreeIP", "" + true));
            if (flag) {
                CrawlerProxyIP crawlerProxyIP = new CrawlerProxyIP();
                int allNum = Integer.parseInt(changer("ProxyAllNum", "500")) / 100;
                int maxIp = Integer.parseInt(changer("MaxProxyIP", "100"));
                for (int i = 1; i <= allNum; i++) {
                    String html = null;
                    try {
                        html = crawlerProxyIP.crawler("http://www.xicidaili.com/nn/" + i);
                        crawlerProxyIP.proxyRead(html);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                crawlerProxyIP.writeIP(maxIp);
                String path = new File("").getCanonicalPath() + File.separator + "text";
                httpCrawler.proxyInit(path + File.separator + "代理ip.txt");
            } else {
                String text = changer("oneselfProxyIPText", new File(".").getAbsolutePath());
                oneselfProxyIP(httpCrawler, text);
            }
        }
        int min = Integer.parseInt(changer("minTime", "10"));
        int max = Integer.parseInt(changer("maxTime", "50")) - min;
        url = changer("URL", url);
        dir = changer("CrawlerHtmlDir", dir);
        httpCrawler.setDir(dir);
        String joinEnd = changer("endIndex", null);
        if (joinEnd != null) {
            end = Integer.parseInt(joinEnd);
            start = Integer.parseInt(changer("startIndex", "0"));
            String URLPrefix = changer("URLPrefix", "");
            String URLSuffix = changer("URLSuffix", "");
            for (int i = start; i < end; i++) {
                httpCrawler.startCrawler(url + URLPrefix + i + URLSuffix, min, max);
            }
        } else {
            httpCrawler.startCrawler(url, min, max);
        }
        httpCrawler.close();
    }

    /**
     * 判断是否有值
     *
     * @param key    转入properties的主键
     * @param values 默认值
     * @return properties的值或者是默认值
     */
    private String changer(String key, String values) {
        String value = map.get(key.toLowerCase());
        if (value != null)
            return value;
        return values;
    }


    /**
     * 是否启动自己代理IP
     *
     * @param httpCrawler 爬虫对象
     * @param textIP      代理IP文本
     */
    private void oneselfProxyIP(HttpCrawler httpCrawler, String textIP) {
        try {
            httpCrawler.proxyInit(textIP);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
