/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2011 Cassidian SAS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.service.gate;

import gate.Corpus;
import gate.CorpusController;
import gate.Factory;
import gate.Gate;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.persist.PersistenceException;
import gate.util.GateException;
import gate.util.persistence.PersistenceManager;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

import javax.jws.WebService;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.service.gate.converter.GateConverter;
import org.ow2.weblab.service.gate.converter.GateToWebLab;
import org.springframework.core.io.ClassPathResource;
import org.weblab_project.core.exception.WebLabUncheckedException;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.helper.ResourceHelper;
import org.weblab_project.core.model.MediaUnit;
import org.weblab_project.core.model.Resource;
import org.weblab_project.core.model.text.Text;
import org.weblab_project.core.model.user.UsageContext;
import org.weblab_project.core.ontologies.DublinCore;
import org.weblab_project.core.util.ResourceUtil;
import org.weblab_project.services.analyser.Analyser;
import org.weblab_project.services.analyser.ProcessException;
import org.weblab_project.services.analyser.types.ProcessArgs;
import org.weblab_project.services.analyser.types.ProcessReturn;


/**
 * This class is a WebService calling Gate with different configurations.
 * It initialises gate once with a gapp configuration and implements a corpus pipeline.
 * Most of the parameters are set in the converter.
 * 
 * @see GateConverter
 * @author khelif, ymombrun
 */
@WebService(endpointInterface = "org.weblab_project.services.analyser.Analyser")
public class GateService implements Analyser {

	/**
	 * The unknown language
	 */
	private static final String UNKNOWN = "unknown";


	/**
	 * The name of the gate home directory
	 */
	private static final String GATE_HOME_DIR = "gate";


	/**
	 * The name of the gate user file
	 */
	private static final String GATE_USER_FILE = "gate.xml";


	/**
	 * The name of the plugin directory
	 */
	private static final String GATE_PLUGINS_DIR = "plugins";


	/**
	 * The name of the default gapp
	 */
	private static final String DEFAULT_GAPP_FILE = "default.gapp";


	/*
	 * TODO Check how we can handle the trouble with language and snowball porter stemmer.
	 * The trouble is that in snowball language codes are defined in the plugin, and they cannot be bypassed.
	 */
	private static final String GATE_LANGUAGE_FEATURE = "language";



	/**
	 * The initialisation flag
	 */
	private static boolean INITIALISED = false;



	/**
	 * The gapp file to be used when no configure has been done or when the usageContext is not specified.
	 */
	private final File defaultGappFile;


	/**
	 * The converter in charge of transferring information extracted by Gate into the input resource.
	 */
	private final GateConverter converter;


	/**
	 * The logger for this class.
	 */
	private final Log log;


	/**
	 * @throws IOException
	 *             If the default gapp file cannot be found in classpath resources or If Gate home cannot be found in
	 *             classpath resource
	 */
	public GateService() throws IOException {
		this(new ClassPathResource(DEFAULT_GAPP_FILE).getFile());
	}


	/**
	 * @param defaultGappFile
	 *            The default Gapp file to be used
	 * @throws IOException
	 *             If Gate home cannot be found in classpath resource
	 */
	public GateService(final File defaultGappFile) throws IOException {
		this(defaultGappFile, new File(new ClassPathResource(GATE_HOME_DIR).getFile(), GATE_PLUGINS_DIR));
	}



	/**
	 * @param defaultGappFile
	 *            The default Gapp file to be used
	 * @param pluginsPath
	 *            The path to the plugin repository
	 * @throws IOException
	 *             If Gate home cannot be found in classpath resource
	 */
	public GateService(final File defaultGappFile, final File pluginsPath) throws IOException {
		this(defaultGappFile, pluginsPath, new GateToWebLab());
	}


	/**
	 * @param defaultGappFile
	 *            The default Gapp file to be used
	 * @param pluginsPath
	 *            The path to the plugin repository
	 * @param converter
	 *            The GateConverter in charge of reading Gate documents and annotate WebLab texts
	 * @throws IOException
	 *             If Gate home cannot be found in classpath resource
	 */
	public GateService(final File defaultGappFile, final File pluginsPath, final GateConverter converter) throws IOException {
		super();
		this.log = LogFactory.getLog(this.getClass());
		this.defaultGappFile = defaultGappFile;
		this.converter = converter;
		this.initGate(pluginsPath);
		LogFactory.getLog(this.getClass()).info("Gate Service successully initialised.");
	}



	@Override
	public ProcessReturn process(final ProcessArgs args) throws ProcessException {
		this.log.debug("Early starting of Gate Analyser");

		// Tests the input parameters and get every Text section contained by the resource in args.
		final List<Text> texts = this.checkParameters(args);

		if (texts.isEmpty()) {
			this.log.warn("No text content found in resource '" + args.getResource().getUri() + "'. Nothing done.");
			final ProcessReturn theRet = new ProcessReturn();
			theRet.setResource(args.getResource());
			return theRet;
		}

		this.log.info("Starting the processing of resource '" + args.getResource().getUri() + "' with Gate.");

		// Instantiates an empty Gate Corpus
		final Corpus corpusGate;
		try {
			corpusGate = Factory.newCorpus(args.getResource().getUri() + " " + System.nanoTime());
		} catch (final ResourceInstantiationException rie) {
			this.log.error("Unable to instanciate new Corpus.", rie);
			throw new ProcessException("Unable to instanciate new Corpus.", WebLabExceptions.E0.wle, rie);
		}

		/*
		 * Creates a RDF helper that can process the whole resource.
		 * It will enable to get the dc:language property of each text section.
		 * If none, the document property will be used.
		 * If none, nothing will be added as feature of the document.
		 */
		final ResourceHelper helper = RDFHelperFactory.getResourceHelper(args.getResource());
		final String docDefaultLanguage = this.getLanguage(helper, args.getResource().getUri());

		// For each Text section
		final Map<gate.Document, Text> gateDocsAndText = new HashMap<gate.Document, Text>();
		for (final Text text : texts) {
			this.log.debug("Text section to process by GateAnalyserComponent: " + text.getUri());
			this.log.debug("Number of segments before GateAnalyserComponent: " + text.getSegment().size());

			// Creates an empty Gate Document
			final gate.Document docGate;
			try {
				docGate = Factory.newDocument(text.getContent());
			} catch (final ResourceInstantiationException rie) {
				this.log.warn("Unable to create a new Gate Document for text '" + text.getUri() + ". Skipping it.", rie);
				continue;
			}

			// Extract the language of the text and add this as feature in the Gate Document.
			final String language = this.getLanguage(helper, text.getUri(), docDefaultLanguage);
			docGate.getFeatures().put(GATE_LANGUAGE_FEATURE, language);

			// Add the Gate doc in the corpus and maps it to its WebLab Text.
			addGateDocumentToCorpus(corpusGate, docGate);
			gateDocsAndText.put(docGate, text);
		}

		texts.clear();

		if (corpusGate.isEmpty()) {
			// Remove the corpus from Gate memory
			Factory.deleteResource(corpusGate);
			final ProcessException pe = new ProcessException("Unable to instanciate Gate documents for each text section of resource " + args.getResource().getUri() + ".", WebLabExceptions.E0.wle);
			this.log.error(pe.getMessage(), pe);
			throw pe;
		}

		// Get the usageContext that is used to map with the required controller.
		final CorpusController controller = this.getController(args.getUsageContext());

		/*
		 * CorpusController are not thread safe (they can process only one corpus at a time).
		 * Here we synchronized on the controller instance. It means that on a given usageContext process are
		 * sequential.
		 * If inputs are from two usageContexts, parallelism is enabled.
		 */
		synchronized (controller) {
			controller.setCorpus(corpusGate);
			try {
				controller.execute();
			} catch (final ExecutionException ee) {
				Factory.deleteResource(corpusGate);
				this.log.error("Unable to process pipeline of corpus.", ee);
				final ProcessException pe = new ProcessException("Unable to process pipeline of corpus.", WebLabExceptions.E0.wle, ee);
				throw pe;
			}
		}

		this.log.info("Answer received from Gate. Starting annotation convertion for Resource '" + args.getResource().getUri() + "'.");

		// Extract annotations of each Gate Document and add them to the WebLab Text.
		this.converter.convertInformation(corpusGate, args.getResource(), gateDocsAndText);

		// Creates the return wrapper and add the resource in it.
		final ProcessReturn theRet = new ProcessReturn();
		theRet.setResource(args.getResource());

		this.log.info("Resource '" + args.getResource().getUri() + "' successfully processed with Gate Analyser");

		return theRet;
	}


	/**
	 * This method first check if a controller exists in configuration singleton for the given usageContext.
	 * If not it creates a controller from the gapp file path in config (if exists) or from the default path.
	 * 
	 * @param uc
	 *            The usageContext or null if not define
	 * @return An existing corpus controller or a newly created one.
	 * @throws ProcessException
	 *             If the instantiation of the controller fails.
	 */
	private synchronized CorpusController getController(final UsageContext uc) throws ProcessException {
		final String usageContext;
		if (uc == null) {
			usageContext = "";
		} else if (uc.getUri() == null) {
			usageContext = "";
		} else {
			usageContext = uc.getUri();
		}

		if (!Configuration.getInstance().isConfigured(usageContext)) {
			Configuration.getInstance().setGappFileForUsageContext(usageContext, this.defaultGappFile.getPath());
		} 
		final String gappFile = Configuration.getInstance().getGappFilePathForUsageContext(usageContext);
		
		// Checks if controller has already been initialised and use the loaded one if any.
		final CorpusController controller;
		if (Configuration.getInstance().isControllerInitialised(gappFile)) {
			controller = Configuration.getInstance().getController(gappFile);
		} else {
			// Else: loads the required controller from a gapp file in config
			try {
				controller = (CorpusController) PersistenceManager.loadObjectFromFile(new File(gappFile));
			} catch (final IOException ioe) {
				this.log.error("Unable to load gapp file.", ioe);
				throw new ProcessException("Unable to load gapp file.", WebLabExceptions.E2.wle, ioe);
			} catch (final PersistenceException pe) {
				this.log.error("Unable to load gapp file.", pe);
				throw new ProcessException("Unable to load gapp file.", WebLabExceptions.E0.wle, pe);
			} catch (final ResourceInstantiationException rie) {
				this.log.error("Unable to load gapp file.", rie);
				throw new ProcessException("Unable to load gapp file.", WebLabExceptions.E0.wle, rie);
			}

			Configuration.getInstance().setControllerForUsageContextAndGapp(usageContext, controller);
		}
		return controller;
	}


	/**
	 * @param helper
	 *            A resource Helper at the document level
	 * @param uri
	 *            URI of the resource to extract language
	 * @return The language or null if not found
	 */
	private String getLanguage(final ResourceHelper helper, final String uri) {
		return this.getLanguage(helper, uri, null);
	}


	/**
	 * @param helper
	 *            A resource Helper at the document level
	 * @param uri
	 *            URI of the resource to extract language
	 * @param defaultValue
	 *            The default language value to be used
	 * @return The language or defaultValue if not found (may be null)
	 */
	private String getLanguage(final ResourceHelper helper, final String uri, final String defaultValue) {
		final String language;
		final List<String> languages = helper.getLitsOnPredSubj(uri, DublinCore.LANGUAGE_PROPERTY_NAME);
		if (languages.isEmpty() || languages.get(0).equalsIgnoreCase(UNKNOWN)) {
			language = defaultValue;
		} else {
			language = languages.get(0).toLowerCase();
		}
		return language;
	}


	/**
	 * @param corpusGate
	 *            The Corpus
	 * @param docGate
	 *            The document to be added into corpusGate
	 */
	@SuppressWarnings("unchecked")
	private void addGateDocumentToCorpus(Corpus corpusGate, final gate.Document docGate) {
		corpusGate.add(docGate);
	}


	/**
	 * Synchronised method that initialises Gate only one time.
	 * 
	 * @param pluginsdir
	 *            The default plugin directory to be used to initialise the Gate instance.
	 * @throws IOException
	 *             If the Gate Home cannot be found.
	 */
	private synchronized void initGate(final File pluginsdir) throws IOException {
		// Gate must be initialised only one time !
		if (!INITIALISED) {
			final File gateHome = new ClassPathResource(GATE_HOME_DIR).getFile();
			this.log.debug("Here is the path of your application => " + gateHome.getAbsolutePath());
			Gate.setGateHome(gateHome);
			Gate.setUserConfigFile(new File(gateHome, GATE_USER_FILE));
			Gate.setPluginsHome(pluginsdir);
			Gate.setSiteConfigFile(new File(gateHome, GATE_USER_FILE));

			try {
				Gate.init();
			} catch (final GateException ge) {
				this.log.warn("Error when initialising Gate.", ge);
				throw new WebLabUncheckedException("Unable to find initialise Gate.", ge);
			}

			INITIALISED = true;
		}
	}

	/**
	 * @param args
	 *            The ProcessArgs
	 * @return The list of Text contained by the Resource in args.
	 * @throws ProcessException
	 *             For any reason preventing the retrieval of text unit to be done.
	 */
	protected List<Text> checkParameters(final ProcessArgs args) throws ProcessException {
		if (args == null) {
			throw new ProcessException("ProcessArgs was null.", WebLabExceptions.E1.wle);
		}

		final Resource res = args.getResource();
		if (res == null) {
			throw new ProcessException("Resource in ProcessArg was null.", WebLabExceptions.E1.wle);
		}
		if (!(res instanceof MediaUnit)) {
			throw new ProcessException("This service only process MediaUnit; Resource was a: " + res.getClass().getSimpleName() + ".", WebLabExceptions.E1.wle);
		}

		final List<Text> texts;
		if (res instanceof Text) {
			texts = new LinkedList<Text>();
			texts.add((Text) res);
		} else {
			texts = ResourceUtil.getSelectedSubResources(args.getResource(), Text.class);
		}

		for (final ListIterator<Text> textIt = texts.listIterator(); textIt.hasNext();) {
			final Text text = textIt.next();
			if (text.getContent() == null) {
				textIt.remove();
			}
		}

		return texts;
	}

}
