/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2009 EADS DEFENCE AND SECURITY SYSTEMS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.duplicates;

import java.io.File;

import javax.jws.WebService;
import javax.servlet.ServletContext;
import javax.xml.ws.WebServiceContext;
import javax.xml.ws.handler.MessageContext;

import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.update.processor.TextProfileSignature;
import org.ow2.weblab.services.indexer.impl.SolrIndexer;
import org.ow2.weblab.services.searcher.impl.SolrSearcher;
import org.ow2.weblab.services.solr.SolrComponent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.helper.PoKHelper;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.model.Annotation;
import org.weblab_project.core.model.MediaUnit;
import org.weblab_project.core.ontologies.WebLab;
import org.weblab_project.services.analyser.Analyser;
import org.weblab_project.services.analyser.ProcessException;
import org.weblab_project.services.analyser.types.ProcessArgs;
import org.weblab_project.services.analyser.types.ProcessReturn;
import org.weblab_project.services.exception.WebLabException;
import org.weblab_project.services.indexer.IndexException;

@WebService(endpointInterface = "org.weblab_project.services.analyser.Analyser")
public class DuplicatesDetectorService implements Analyser {

	private final static Logger logger = LoggerFactory.getLogger(DuplicatesDetectorService.class);

	@javax.annotation.Resource
	protected WebServiceContext wsContext;

	private SolrComponent comp;

	/**
	 * 
	 */
	@Override
	public ProcessReturn process(ProcessArgs args) throws ProcessException {
		final MediaUnit unit = DuplicatesDetectorService.checkArgs(args);
		logger.info("Process method of DuplicatesDetectorService called for Document: " + unit.getUri());

		boolean docIsAlreadyPresent = false;

		this.comp = SolrComponent.getInstance();
		try {
			this.comp.open(this.getWebAppPath());

			// Use fuzzy hashing to search near duplicates into index
			String sigString = DuplicatesDetectorService.getFuzzyHashing(unit);
			logger.info("Signature of doc to process: " + sigString);

			QueryResponse qr = this.comp.search("signature:" + sigString, 0, 1);
			if (qr.getResults() != null && qr.getResults().size() > 0) {
				for (SolrDocument hit : qr.getResults()) {
					docIsAlreadyPresent = true;
					logger.info("Duplicate document found: " + String.valueOf(hit.getFieldValue("id")));
				}
			}
			else {
				logger.info("No near duplicates found, add document to index.");
				this.comp.addDocument(unit);
				this.comp.flushIndexBuffer();
			}
			
			// Add annotation if document is already indexed
			if (docIsAlreadyPresent) {
				logger.info("Document (" + unit.getUri() + ") is a duplicate.");
				Annotation annot = AnnotationFactory.createAndLinkAnnotation(unit);
				PoKHelper pokHlp = RDFHelperFactory.getPoKHelper(annot);

				pokHlp.createLitStat(unit.getUri(), WebLab.CAN_BE_IGNORED, "true");
			}
			else {
				logger.info("Document (" + unit.getUri() + ") is a new document.");
			}
		}
		catch (WebLabCheckedException e) {
			WebLabException exp = new WebLabException();
			exp.setErrorId("E0");
			exp.setErrorMessage("Unexpected error");
			throw new ProcessException("Error when calling solr index.", exp, e);
		}

		ProcessReturn pr = new ProcessReturn();
		pr.setResource(unit);

		return pr;
	}

	/**
	 * Hash a <code>MediaUnit</code> using the SolR fuzzy hashing.
	 * @param unit the media unit to hash
	 * @return
	 */
	private static String getFuzzyHashing(MediaUnit unit) {
		TextProfileSignature tps = new TextProfileSignature();
		// initialise with empty parameters to force default values of TextProfileSignature attributes
		tps.init(SolrParams.toSolrParams(new NamedList<String>()));
		
		// The following lines are copied from SignatureUpdateProcessorFactory SolR class
		tps.add("text");
		tps.add(SolrComponent.extractTextFromResource(unit));
		byte[] signature = tps.getSignature();
		char[] arr = new char[signature.length << 1];
		for (int i = 0; i < signature.length; i++) {
			int b = signature[i];
			int idx = i << 1;
			arr[idx] = StrUtils.HEX_DIGITS[(b >> 4) & 0xf];
			arr[idx + 1] = StrUtils.HEX_DIGITS[b & 0xf];
		}
		return new String(arr);
	}
	
	/**
	 * @param args
	 *            The <code>ProcessArgs</code> to check in the begin of <code>process</code>.
	 * @return The contained <code>MediaUnit</code>
	 * @throws IndexException
	 *             if we are unable to extract the contained <code>MediaUnit</code>
	 */
	private static MediaUnit checkArgs(final ProcessArgs args) throws ProcessException {
		WebLabException wle = new WebLabException();
		wle.setErrorId("E1");
		wle.setErrorMessage("Invalid parameter");
		if (args == null) {
			LogFactory.getLog(SolrIndexer.class).error("IndexArgs was null.");
			throw new ProcessException("IndexArgs was null.", wle);
		}
		if (args.getResource() == null) {
			LogFactory.getLog(SolrIndexer.class).error("Args must contain a non-null Resource to index");
			throw new ProcessException("Args must contain a " + "non-null Resource to index", wle);
		}
		if (!(args.getResource() instanceof MediaUnit)) {
			LogFactory.getLog(SolrIndexer.class).error("Resource to index as not a MediaUnit.");
			throw new ProcessException("Resource to index " + "as not a MediaUnit.", wle);
		}
		return (MediaUnit) args.getResource();
	}

	/**
	 * Return the Web application path
	 * 
	 * @return webapp path
	 * @throws WebLabCheckedException
	 */
	private String getWebAppPath() throws WebLabCheckedException {
		String appPath = "./src/main/resources";
		if (this.wsContext != null) {
			ServletContext ctx = (ServletContext) this.wsContext.getMessageContext()
					.get(MessageContext.SERVLET_CONTEXT);
			appPath = ctx.getRealPath("WEB-INF/classes");
		}
		else {
			LogFactory.getLog(SolrSearcher.class).warn(
					"Webservice context not available returning current local path as default web app path");
		}

		File f = new File(appPath);
		if (!f.exists())
			throw new WebLabCheckedException("Webapp path [" + appPath + "] does not exists...");
		return appPath;
	}

	/**
	 * @return the wsContext
	 */
	public WebServiceContext getWsContext() {
		return this.wsContext;
	}

	/**
	 * @param wsContext
	 *            the wsContext to set
	 */
	public void setWsContext(WebServiceContext wsContext) {
		this.wsContext = wsContext;
	}

}
