/**
 *  Copyright (c) 1997-2013, tinygroup.org (luo_guo@live.cn).
 *
 *  Licensed under the GPL, Version 3.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.gnu.org/licenses/gpl.html
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 * --------------------------------------------------------------------------
 *  版权 (c) 1997-2013, tinygroup.org (luo_guo@live.cn).
 *
 *  本开源软件遵循 GPL 3.0 协议;
 *  如果您不遵循此协议，则不被允许使用此文件。
 *  你可以从下面的地址获取完整的协议文本
 *
 *       http://www.gnu.org/licenses/gpl.html
 */
package org.tinygroup.tinyspider.impl;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.tinygroup.htmlparser.HtmlDocument;
import org.tinygroup.htmlparser.node.HtmlNode;
import org.tinygroup.htmlparser.parser.HtmlStringParser;
import org.tinygroup.parser.NodeFilter;
import org.tinygroup.tinyspider.Processor;
import org.tinygroup.tinyspider.SiteVisitor;
import org.tinygroup.tinyspider.Spinder;
import org.tinygroup.tinyspider.UrlRepository;
import org.tinygroup.tinyspider.Watcher;

public class SpinderImpl implements Spinder {

	List<Watcher> watcherList = new ArrayList<Watcher>();
	List<SiteVisitor> siteVisitorList = new ArrayList<SiteVisitor>();
	private UrlRepository urlRepository;

	public void addWatcher(Watcher watcher) {
		watcherList.add(watcher);
	}

	public void processUrl(String url) {
		processUrl(url, null);
	}

	public void addSiteVisitor(SiteVisitor siteVisitor) {
		siteVisitorList.add(siteVisitor);
	}

	public void setUrlRepository(UrlRepository urlRepository) {
		this.urlRepository = urlRepository;

	}

	public void processUrl(String url, Map<String, Object> parameter) {
		if (urlRepository == null) {
			urlRepository = new UrlRepositoryMemory();
		}
		if (urlRepository.isExist(url)) {
			throw new RuntimeException(url + "已经处理过");
		}
		String content = null;
		if (siteVisitorList.size() == 0) {
			siteVisitorList.add(new SiteVisitorInclude(".*"));
		}
		for (SiteVisitor siteVisitor : siteVisitorList) {
			if (siteVisitor.isMatch(url)) {
				content = siteVisitor.getContent(url, parameter);
				break;
			}
		}
		// 如果没有拿到内容
		if (content == null) {
			throw new RuntimeException("读取" + url + "内容失败。");
		}
		HtmlDocument document = new HtmlStringParser().parse(content);
		for (Watcher watcher : watcherList) {
			NodeFilter<HtmlNode> nodeFilter = watcher.getNodeFilter();
			nodeFilter.init(document.getRoot());
			List<HtmlNode> nodeList = nodeFilter.findNodeList();
			for (HtmlNode htmlNode : nodeList) {
				for (Processor e : watcher.getProcessorList()) {
					e.process(htmlNode);
				}
			}
		}

	}

}
