package org.ow2.weblab.service.splitter.rss;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Properties;

import javax.annotation.PostConstruct;
import javax.jws.WebService;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.core.extended.factory.AnnotationFactory;
import org.ow2.weblab.core.extended.factory.ResourceFactory;
import org.ow2.weblab.core.extended.ontologies.DublinCore;
import org.ow2.weblab.core.helper.PoKHelper;
import org.ow2.weblab.core.helper.RDFHelperFactory;
import org.ow2.weblab.core.helper.ResourceHelper;
import org.ow2.weblab.core.model.ComposedResource;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.processing.WProcessingAnnotator;
import org.ow2.weblab.core.services.Analyser;
import org.ow2.weblab.core.services.ContentNotAvailableException;
import org.ow2.weblab.core.services.analyser.ProcessArgs;
import org.ow2.weblab.core.services.analyser.ProcessReturn;
import org.springframework.core.io.ClassPathResource;

import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.io.SyndFeedInput;

@WebService(endpointInterface = "org.weblab_project.services.analyser.Analyser")
public class WLRSSSplitter implements Analyser {

	protected static Properties PROPS;

	protected static Log LOGGER = LogFactory.getLog(WLRSSSplitter.class);

	protected static SimpleDateFormat SDF = new SimpleDateFormat(
			"yyyy-MM-dd'T'HH:mm:ssZ", Locale.ENGLISH);

	public final static String SERVICE_REF = "rss-splitter";

	@PostConstruct
	public void init() throws Exception {
		if (PROPS == null) {
			PROPS = new Properties();
			ClassPathResource classPathResource = new ClassPathResource(
					"rss-splitter.properties");
			PROPS.load(classPathResource.getInputStream());
		}
	}

	public ProcessReturn process(ProcessArgs args) throws ContentNotAvailableException {
		String source = checkResource(args.getResource());
		ComposedResource resourceCollection = ResourceFactory.createResource(
				SERVICE_REF, "rc_" + System.currentTimeMillis(),
				ComposedResource.class);
		ByteArrayInputStream byteArrayInputStream = null;
		try {
			
			URI uri = getNativeContent(args.getResource());
			
			SyndFeedInput feedInput = new SyndFeedInput();

			org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance()
					.newDocumentBuilder().parse(uri.toURL().openStream());
			SyndFeed syndFeed = feedInput.build(doc);

			// String language = syndFeed.getLanguage();

			for (Object feed : syndFeed.getEntries()) {
				if (feed instanceof SyndEntry) {
					Resource res = buildResourceFromSyndEntry((SyndEntry) feed,
							source);
					resourceCollection.getResource().add(res);
				}
			}

		} catch (Exception e) {
			LOGGER.info("Unable to parse content, maybe not an RSS Feed : "+ e,e);

		} finally {
			if (byteArrayInputStream != null) {
				try {
					byteArrayInputStream.close();
				} catch (IOException e) {
					LOGGER.info("Unable to close stream : " + e.getMessage(),e);
				}
			}
		}
		ProcessReturn processReturn = new ProcessReturn();
		processReturn.setResource(resourceCollection);

		/*
		 * if resource collection is empty -> not rss add first resource
		 */
		if (resourceCollection.getResource().size() == 0) {
			resourceCollection.getResource().add(args.getResource());
		}
		return processReturn;
	}

	public String checkResource(Resource resource) throws ContentNotAvailableException  {
		String source = null;
		
		URI uri = getNativeContent(resource);
		
		if (uri == null) {
			throw new ContentNotAvailableException("Source can not be null.","Source can not be null.");

		} else {
			ResourceHelper resourceHelper = RDFHelperFactory
					.getResourceHelper(resource);
			List<String> sources = resourceHelper.getLitsOnPredSubj(resource
					.getUri(), DublinCore.SOURCE_PROPERTY_NAME);
			if (sources.size() > 0) {
				if (sources.size() > 1) {
					LOGGER
							.warn("Multiple sources annotation, taking the first one: "
									+ sources.get(0));
				}
				source = sources.get(0);
			} else {
				throw new ContentNotAvailableException("Source can not be null.","Source can not be null.");
			}
		}
		return source;
	}

	protected Resource buildResourceFromSyndEntry(SyndEntry syndEntry,
			String source) {
		LOGGER.info("Creating RSS Feed Entry resource.");
		Document document = ResourceFactory.createResource(SERVICE_REF, "doc_"
				+ System.currentTimeMillis(), Document.class);

		PoKHelper poKHelper = RDFHelperFactory.getPoKHelper(AnnotationFactory
				.createAndLinkAnnotation(document));
		poKHelper.setAutoCommitMode(false);

		/*
		 * title
		 */
		if (syndEntry.getTitle() != null && syndEntry.getTitle().length() > 0) {
			poKHelper.createLitStat(document.getUri(), PROPS
					.getProperty("title"), syndEntry.getTitle());
			LOGGER.trace("Feed title: " + syndEntry.getTitle());
		}
		/*
		 * authors
		 */
		if (syndEntry.getAuthors() != null) {
			for (Object author : syndEntry.getAuthors()) {
				if (author instanceof String) {
					String strAuthor = (String) author;
					poKHelper.createLitStat(document.getUri(), PROPS
							.getProperty("author"), strAuthor);
					LOGGER.trace("Feed author: " + strAuthor);

				}

			}
		} else {
			if (syndEntry.getAuthor() != null
					&& syndEntry.getAuthor().length() > 0) {
				poKHelper.createLitStat(document.getUri(), PROPS
						.getProperty("author"), syndEntry.getAuthor());
				LOGGER.trace("Feed author: " + syndEntry.getAuthor());
			}
		}

		/*
		 * source
		 */
		if (syndEntry.getUri() != null && syndEntry.getUri().length() > 0) {
			poKHelper.createLitStat(document.getUri(), PROPS
					.getProperty("source"), source);
			LOGGER.trace("Feed source: " + source);
		}

		/*
		 * format
		 */
		if (syndEntry.getDescription() != null
				&& syndEntry.getDescription().getType() != null
				&& syndEntry.getDescription().getType().length() > 0) {
			poKHelper.createLitStat(document.getUri(), PROPS
					.getProperty("format"), "application/rss+xml");
			// syndEntry.getDescription()
			// .getType());
			LOGGER
					.trace("Feed format: "
							+ syndEntry.getDescription().getType());
		}

		/*
		 * date
		 */
		if (syndEntry.getPublishedDate() != null) {
			poKHelper.createLitStat(document.getUri(), PROPS
					.getProperty("date"), SDF.format(syndEntry
					.getPublishedDate()));
			LOGGER.trace("Feed date: " + syndEntry.getPublishedDate());
		} else if (syndEntry.getUpdatedDate() != null) {
			poKHelper.createLitStat(document.getUri(), PROPS
					.getProperty("date"), SDF
					.format(syndEntry.getUpdatedDate()));
			LOGGER.trace("Feed date: " + syndEntry.getUpdatedDate());
		} else {
			Date curDate = new Date();
			LOGGER.warn("No date in the feed, using current: " + curDate);
			poKHelper.createLitStat(document.getUri(), PROPS
					.getProperty("date"), SDF.format(curDate));
			LOGGER.trace("Feed date: " + curDate);
		}

		poKHelper.commit();

		return document;
	}

	
	private URI getNativeContent(Resource resource){
		WProcessingAnnotator wpa = new WProcessingAnnotator(resource);
		return wpa.readNativeContent().firstTypedValue();
	}

}
