/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2009 EADS DEFENCE AND SECURITY SYSTEMS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.normaliser.xml;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import javax.jws.WebService;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.content.binary.BinaryFolderContentManager;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.factory.MediaUnitFactory;
import org.weblab_project.core.helper.BeanHelper;
import org.weblab_project.core.helper.PoKHelper;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.helper.ResourceHelper;
import org.weblab_project.core.model.Annotation;
import org.weblab_project.core.model.ComposedUnit;
import org.weblab_project.core.model.Resource;
import org.weblab_project.core.model.text.Text;
import org.weblab_project.core.ontologies.DublinCore;
import org.weblab_project.services.analyser.Analyser;
import org.weblab_project.services.analyser.ProcessException;
import org.weblab_project.services.analyser.types.ProcessArgs;
import org.weblab_project.services.analyser.types.ProcessReturn;
import org.weblab_project.services.exception.WebLabException;
import org.xml.sax.SAXException;

/**
 * XML normaliser extracts a list of expressions defined in configuration bean with Xpath syntax.
 */
@WebService(endpointInterface = "org.weblab_project.services.analyser.Analyser")
public class XmlNormaliser implements Analyser {

	private final static Log logger = LogFactory.getLog(XmlNormaliser.class);

	/**
	 * The <code>BinaryFolderContentManager</code> to use
	 */
	private BinaryFolderContentManager contentManager;

	private static List<String> XML_TYPES;
	
	static {
		List<String> tempList = new ArrayList<String>();
		tempList.add("application/xml");
		tempList.add("application/rdf+xml");
		tempList.add("application/rss+xml");
		XmlNormaliser.XML_TYPES = Collections.unmodifiableList(tempList);
	}
	
	/**
	 * The <code>NormaliserConfig</code> used to normalise XML Resource
	 */
	private NormaliserConfig conf;

	public XmlNormaliser() {
		super();

		this.contentManager = BinaryFolderContentManager.getInstance();

		this.conf = BeanHelper.getInstance().getSpecificInstance(NormaliserConfig.DEFAULT_CONF_FILE, true).getBean(
				NormaliserConfig.DEFAULT_BEAN_NAME, NormaliserConfig.class);
	}

	@Override
	public ProcessReturn process(ProcessArgs args) throws ProcessException {
		ProcessReturn pr = new ProcessReturn();
		ComposedUnit cu = XmlNormaliser.checkArgs(args);

		// Check XML mime type
		ResourceHelper resourceHelper = RDFHelperFactory.getResourceHelper(cu);
		List<String> formatList = resourceHelper.getLitsOnPredSubj(cu.getUri(), DublinCore.FORMAT_PROPERTY_NAME);
		if (formatList != null && formatList.size() > 0) {
			boolean isXml = false;
			for (String type : XML_TYPES) {
				if (formatList.get(0).toLowerCase().contains(type))
					isXml = true;
			}
			
			if (!isXml) {
				logger.warn("Non XML resources could not be processed by XML normaliser");
				pr.setResource(cu);
				
				return pr;
			}
		} else {
			logger.warn(DublinCore.FORMAT_PROPERTY_NAME + " property not found, " + cu.getUri()
					+ " will not be processed by XML normaliser.");
			pr.setResource(cu);
			
			return pr;
		}

		logger.info("Process the document " + cu.getUri() + ".");

		// Get file from the native content URI and check existence and access.
		File file;
		try {
			file = this.contentManager.getNativeFileFromResource(cu);
		}
		catch (final WebLabCheckedException wlce) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E3");
			wle.setErrorMessage("Content not available");
			throw new ProcessException("Unable to retrieve content.", wle, wlce);
		}

		DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
		domFactory.setNamespaceAware(true);
		DocumentBuilder builder;
		try {
			builder = domFactory.newDocumentBuilder();
		}
		catch (ParserConfigurationException pce) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E5");
			wle.setErrorMessage("Unsupported configuration for XML parser.");
			throw new ProcessException("Unsupported configuration for XML parser.", wle, pce);
		}
		Document doc;
		try {
			doc = builder.parse(file);
		}
		catch (SAXException saxe) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E0");
			wle.setErrorMessage("Unexpected error when trying to parse xml resource.");
			throw new ProcessException("Unexpected error when trying to parse xml resource.", wle, saxe);
		}
		catch (IOException ioe) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E3");
			wle.setErrorMessage("Content not available");
			throw new ProcessException("Unable to parse retrieved content.", wle, ioe);
		}

		XPathFactory factory = XPathFactory.newInstance();
		XPath xpath = factory.newXPath();
		
		Text text = null;
		Annotation annot = AnnotationFactory.createAndLinkAnnotation(cu);
		ResourceHelper rh = RDFHelperFactory.getResourceHelper(cu);
		PoKHelper h = RDFHelperFactory.getPoKHelper(annot);
		h.setAutoCommitMode(false);
		for (XmlProperty xmlProperty : this.conf.getPropertyList()) {
			Object result;
			try {
				XPathExpression expr = xpath.compile(xmlProperty.getXpathExpression());
				result = expr.evaluate(doc, XPathConstants.NODESET);
			}
			catch (XPathExpressionException xpee) {
				WebLabException wle = new WebLabException();
				wle.setErrorId("E3");
				wle.setErrorMessage("Content not available for xpath expression: " + xmlProperty.getXpathExpression());
				throw new ProcessException("Unable to parse retrieved content.", wle, xpee);
			}
			NodeList nodes = (NodeList) result;
			for (int i = 0; i < nodes.getLength(); i++) {
				logger.debug("node value: " + nodes.item(i).getNodeValue());
				
				// Process annotations part
				for (String propertyURI : xmlProperty.getNormalisedPropertyURI()) {
					// Test if property can be write into resource (possible if override mode is enable or if properties
					// are not already presents)
					if (this.conf.isInOverrrideMode() || (!this.conf.isInOverrrideMode()
							&& rh.getLitsOnPredSubj(cu.getUri(), propertyURI).size() == 0
							&& rh.getRessOnPredSubj(cu.getUri(), propertyURI).size() == 0)) {
						if (xmlProperty.getNormalisedPropertyType().equals(XmlProperty.NormalisedPropertyType.LITERAL)) {
							h.createLitStat(cu.getUri(), propertyURI, nodes.item(i).getNodeValue());
						}
						else if (xmlProperty.getNormalisedPropertyType().equals(XmlProperty.NormalisedPropertyType.RESOURCE)) {
							h.createResStat(cu.getUri(), propertyURI, nodes.item(i).getNodeValue());
						}
					}
					else {
						logger.info("Property \"" + propertyURI + "\" is already present on Resource and 'override mode' is disabled.");
					}
				}
				// Process text part
				if (xmlProperty.isAppendValuesToTextUnit()) {
					if (text == null)
						 text = MediaUnitFactory.createAndLinkMediaUnit(cu, Text.class);
					
					if (text.getContent() != null)
						text.setContent(text.getContent() + nodes.item(i).getNodeValue() + "\n");
					else
						text.setContent(nodes.item(i).getNodeValue() + "\n");
				}
			}
		}
		h.commit();
		
		pr.setResource(cu);
		return pr;
	}

	/**
	 * @param args
	 *            The <code>ProcessArgs</code> of the process method.
	 * @return The <code>ComposedUnit</code> that must be contained by <code>args</code>.
	 * @throws ProcessException
	 *             If <code>resource</code> in <code>args</code> is not a <code>ComposedUnit</code>.
	 */
	protected static ComposedUnit checkArgs(final ProcessArgs args) throws ProcessException {
		WebLabException wle = new WebLabException();
		wle.setErrorId("E1");
		wle.setErrorMessage("Invalid parameter.");

		if (args == null) {
			throw new ProcessException("ProcessArgs was null.", wle);
		}
		final Resource res = args.getResource();
		if (res == null) {
			throw new ProcessException("Resource of ProcessArgs was null.", wle);
		}
		if (!(res instanceof ComposedUnit)) {
			throw new ProcessException("Resource of ProcessArgs was not a Com" + "posedUnit, but a "
					+ res.getClass().getName() + ".", wle);
		}
		return (ComposedUnit) res;
	}

	/**
	 * @return the conf
	 */
	public NormaliserConfig getConf() {
		return this.conf;
	}

	/**
	 * @param conf
	 *            the conf to set
	 */
	public void setConf(NormaliserConfig conf) {
		this.conf = conf;
	}

}
