/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2012 Cassidian, an EADS company
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.service.gate;

import gate.Corpus;
import gate.CorpusController;
import gate.Factory;
import gate.Gate;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.persist.PersistenceException;
import gate.util.GateException;
import gate.util.persistence.PersistenceManager;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

import javax.jws.WebService;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.ontologies.DublinCore;
import org.ow2.weblab.core.extended.properties.PropertiesLoader;
import org.ow2.weblab.core.extended.util.ResourceUtil;
import org.ow2.weblab.core.helper.ResourceHelper;
import org.ow2.weblab.core.helper.impl.JenaResourceHelper;
import org.ow2.weblab.core.model.MediaUnit;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.core.services.Analyser;
import org.ow2.weblab.core.services.InsufficientResourcesException;
import org.ow2.weblab.core.services.InvalidParameterException;
import org.ow2.weblab.core.services.UnexpectedException;
import org.ow2.weblab.core.services.analyser.ProcessArgs;
import org.ow2.weblab.core.services.analyser.ProcessReturn;
import org.ow2.weblab.service.gate.converter.GateConverter;
import org.ow2.weblab.service.gate.converter.GateToWebLab;
import org.springframework.core.io.ClassPathResource;

/**
 * This class is a WebService calling Gate with different configurations. It initialises gate once with a gapp configuration and implements a corpus pipeline.
 * Most of the parameters are set in the converter.
 * 
 * @see GateConverter
 * @author khelif, ymombrun
 */
@WebService(endpointInterface = "org.ow2.weblab.core.services.Analyser")
public class GateService implements Analyser {


	/**
	 * The unknown language
	 */
	private static final String UNKNOWN = "unknown";


	/**
	 * The name of the default gate home directory
	 */
	private static final String DEFAULT_GATE_HOME_DIR = "gate";


	/**
	 * The name of the gate user file
	 */
	private static final String GATE_USER_FILE = "gate.xml";


	/**
	 * The name of the default plugin directory
	 */
	private static final String DEFAULT_GATE_PLUGINS_DIR = "plugins";


	/**
	 * The name of the default gapp
	 */
	private static final String DEFAULT_GAPP_FILE = "default.gapp";


	/*
	 * TODO Check how we can handle the trouble with language and snowball porter stemmer. The trouble is that in snowball language codes are defined in the
	 * plugin, and they cannot be bypassed.
	 */
	private static final String GATE_LANGUAGE_FEATURE = "language";


	/**
	 * Name of the property file to be loaded.
	 */
	public final static String PROPERTIES_FILE_NAME = "gateservice.properties";


	/**
	 * Name of the property that contains the class of the converter to be instantiated
	 */
	public final static String CONVERTER_PROPERTY = "converter";


	/**
	 * Name of the property that contains the path to the default gapp file
	 */
	private static final String DEFAULT_GAPP_FILE_PROPERTY = "defaultgappfile";


	/**
	 * Name of the property that contains the path to the gate plugins directory
	 */
	private static final String GATE_PLUGINS_DIR_PROPERTY = "gatepluginsdir";


	/**
	 * Name of the property that contains the path to the gate plugins directory
	 */
	private static final String GATE_HOME_PROPERTY = "gatehome";


	/**
	 * Name of the property that contains the URI of the gateService
	 */
	private static final String SERVICE_URI_PROPERTY = "serviceuri";


	/**
	 * The initialisation flag
	 */
	private static boolean initialised = false;


	/**
	 * The gapp file to be used when no configure has been done or when the usageContext is not specified.
	 */
	private final File defaultGappFile;


	/**
	 * The converter in charge of transferring information extracted by Gate into the input resource.
	 */
	private final GateConverter converter;


	/**
	 * The logger
	 */
	private final Log logger;


	/**
	 * @throws IOException
	 *             If something fails with defaultGappFile or pluginsPath
	 * @throws WebLabCheckedException
	 *             If converter is null of if something fails in Gate internal initialisation
	 */
	public GateService() throws IOException, WebLabCheckedException {
		this(GateService.getDefaultGappFile());
	}


	/**
	 * @param defaultGappFile
	 *            The default Gapp file to be used
	 * @throws IOException
	 *             If something fails with defaultGappFile or pluginsPath
	 * @throws WebLabCheckedException
	 *             If converter is null of if something fails in Gate internal initialisation
	 */
	public GateService(final File defaultGappFile) throws IOException, WebLabCheckedException {
		this(defaultGappFile, GateService.getGatePluginsDir());
	}


	/**
	 * @param defaultGappFile
	 *            The default Gapp file to be used
	 * @param pluginsPath
	 *            The path to the plugin repository
	 * @throws IOException
	 *             If something fails with defaultGappFile or pluginsPath
	 * @throws WebLabCheckedException
	 *             If converter is null of if something fails in Gate internal initialisation
	 */
	public GateService(final File defaultGappFile, final File pluginsPath) throws IOException, WebLabCheckedException {
		this(defaultGappFile, pluginsPath, GateService.getGateConverter());
	}


	/**
	 * @param defaultGappFile
	 *            The default Gapp file to be used
	 * @param pluginsPath
	 *            The path to the plugin repository
	 * @param converter
	 *            The GateConverter in charge of reading Gate documents and annotate WebLab texts
	 * @throws IOException
	 *             If something fails with defaultGappFile or pluginsPath
	 * @throws WebLabCheckedException
	 *             If converter is null of if something fails in Gate internal initialisation
	 */
	public GateService(final File defaultGappFile, final File pluginsPath, final GateConverter converter) throws IOException, WebLabCheckedException {
		this(GateService.getGateHomeDir(), defaultGappFile, pluginsPath, converter);
	}


	/**
	 * @param gateHomePath
	 *            The path to Gate home folder
	 * @param defaultGappFile
	 *            The default Gapp file to be used
	 * @param pluginsPath
	 *            The path to the plugin repository
	 * @param converter
	 *            The GateConverter in charge of reading Gate documents and annotate WebLab texts
	 * @throws IOException
	 *             If something fails with defaultGappFile or pluginsPath
	 * @throws WebLabCheckedException
	 *             If converter is null of if something fails in Gate internal initialisation
	 */
	public GateService(final File gateHomePath, final File defaultGappFile, final File pluginsPath, final GateConverter converter) throws IOException, WebLabCheckedException {
		super();
		this.logger = LogFactory.getLog(this.getClass());
		if (defaultGappFile != null) {
			if (!defaultGappFile.exists()) {
				this.logger.warn("defaultGappFile " + defaultGappFile.getPath() + " does not exist.");
				throw new IOException("defaultGappFile " + defaultGappFile.getPath() + " does not exist.");
			} else if (!defaultGappFile.isFile()) {
				this.logger.warn("defaultGappFile " + defaultGappFile.getPath() + " is not a file.");
				throw new IOException("defaultGappFile " + defaultGappFile.getPath() + " is not a file.");
			} else if (!defaultGappFile.canRead()) {
				this.logger.warn("defaultGappFile " + defaultGappFile.getPath() + " is not readable.");
				throw new IOException("defaultGappFile " + defaultGappFile.getPath() + " is not readable.");
			}
		}
		this.defaultGappFile = defaultGappFile;
		if (converter == null) {
			throw new WebLabCheckedException("GateConverter class was null.");
		}
		this.converter = converter;
		this.initGate(gateHomePath, pluginsPath);
		this.logger.info("Gate Service successully initialised.");
	}


	/**
	 * 
	 * @return the GateConverter to be used. Use GateToWebLab() if the CONVERTER_PROPERTY is not defined or if something fails with the defined GateConverter
	 *         instantiation
	 */
	private static GateConverter getGateConverter() {
		final Map<String, String> props = PropertiesLoader.loadProperties(GateService.PROPERTIES_FILE_NAME, GateService.class);

		GateConverter gateConverter = new GateToWebLab();
		if ((props.get(GateService.CONVERTER_PROPERTY) != null) && !props.get(GateService.CONVERTER_PROPERTY).isEmpty()) {
			try {
				gateConverter = (GateConverter) Class.forName(props.get(GateService.CONVERTER_PROPERTY)).newInstance();
			} catch (final InstantiationException ie) {
				LogFactory.getLog(GateService.class).warn(
						"Unable to instantiate GateConverter '" + props.get(GateService.CONVERTER_PROPERTY) + "'. Using default '" + gateConverter.getClass().getCanonicalName() + "'.");
			} catch (final IllegalAccessException iae) {
				LogFactory.getLog(GateService.class).warn(
						"Access denied to GateConverter '" + props.get(GateService.CONVERTER_PROPERTY) + "'. Using default '" + gateConverter.getClass().getCanonicalName() + "'.");
			} catch (final ClassNotFoundException cnfe) {
				LogFactory.getLog(GateService.class).warn(
						"GateConverter '" + props.get(GateService.CONVERTER_PROPERTY) + "' not found. " + "Using default '" + gateConverter.getClass().getCanonicalName() + "'.");
			}
		}
		final String serviceUri = props.get(GateService.SERVICE_URI_PROPERTY);
		if ((serviceUri != null) && !serviceUri.isEmpty()) {
			gateConverter.setServiceURI(serviceUri);
		}
		return gateConverter;
	}


	/**
	 * 
	 * @return File The default GappFile to be used
	 * @throws IOException
	 *             If something fails with defaultGappFile
	 */
	private static File getDefaultGappFile() throws IOException {
		final Map<String, String> props = PropertiesLoader.loadProperties(GateService.PROPERTIES_FILE_NAME, GateService.class);

		File defaultGappFile;
		if ((props.get(GateService.DEFAULT_GAPP_FILE_PROPERTY) != null) && !props.get(GateService.DEFAULT_GAPP_FILE_PROPERTY).isEmpty()) {
			defaultGappFile = new File(props.get(GateService.DEFAULT_GAPP_FILE_PROPERTY));
			if (!defaultGappFile.exists() || !defaultGappFile.isFile()) {
				defaultGappFile = new ClassPathResource(props.get(GateService.DEFAULT_GAPP_FILE_PROPERTY)).getFile();
			}
		} else {
			defaultGappFile = new ClassPathResource(GateService.DEFAULT_GAPP_FILE).getFile();
		}
		return defaultGappFile;
	}


	/**
	 * 
	 * @return File The GATE Plugins directory path to be used
	 * @throws IOException
	 *             If something fails with the gate plugins path
	 */
	private static File getGatePluginsDir() throws IOException {

		final Map<String, String> props = PropertiesLoader.loadProperties(GateService.PROPERTIES_FILE_NAME, GateService.class);
		if ((props.get(GateService.GATE_PLUGINS_DIR_PROPERTY) != null) && !props.get(GateService.GATE_PLUGINS_DIR_PROPERTY).isEmpty()) {
			return new File(props.get(GateService.GATE_PLUGINS_DIR_PROPERTY));
		}
		return new File(GateService.getGateHomeDir(), GateService.DEFAULT_GATE_PLUGINS_DIR);

	}


	/**
	 * 
	 * @return File The GATE home directory path to be used
	 * @throws IOException
	 *             If something fails with the gate home path
	 */
	private static File getGateHomeDir() throws IOException {

		final Map<String, String> props = PropertiesLoader.loadProperties(GateService.PROPERTIES_FILE_NAME, GateService.class);
		if ((props.get(GateService.GATE_HOME_PROPERTY) != null) && !props.get(GateService.GATE_HOME_PROPERTY).isEmpty()) {
			return new File(props.get(GateService.GATE_HOME_PROPERTY));
		}
		return new ClassPathResource(GateService.DEFAULT_GATE_HOME_DIR).getFile();

	}


	@Override
	public ProcessReturn process(final ProcessArgs args) throws UnexpectedException, InvalidParameterException, InsufficientResourcesException {
		this.logger.debug("Early starting of Gate Analyser");

		// Tests the input parameters and get every Text section contained by the resource in args.
		final List<Text> texts = this.checkParameters(args);

		if (texts.isEmpty()) {
			this.logger.warn("No text content found in resource '" + args.getResource().getUri() + "'. Nothing done.");
			final ProcessReturn theRet = new ProcessReturn();
			theRet.setResource(args.getResource());
			return theRet;
		}

		this.logger.info("Starting the processing of resource '" + args.getResource().getUri() + "' with Gate.");

		// Instantiates an empty Gate Corpus
		final Corpus corpusGate;
		try {
			corpusGate = Factory.newCorpus(args.getResource().getUri() + " " + System.nanoTime());
		} catch (final ResourceInstantiationException rie) {
			throw new UnexpectedException("Unable to instanciate new Corpus.", "Unable to instanciate new Corpus.");
		}

		/*
		 * Creates a RDF helper that can process the whole resource. It will enable to get the dc:language property of each text section. If none, the document
		 * property will be used. If none, defaultLanguage will be used.
		 */
		final ResourceHelper helper = new JenaResourceHelper(args.getResource());
		final String docDefaultLanguage = this.getLanguage(helper, args.getResource().getUri());

		// For each Text section
		final Map<gate.Document, Text> gateDocsAndText = new HashMap<gate.Document, Text>();
		for (final Text text : texts) {
			this.logger.debug("Text section to process by GateAnalyserComponent: " + text.getUri());
			this.logger.debug("Number of segments before GateAnalyserComponent: " + text.getSegment().size());

			// Creates an empty Gate Document
			final gate.Document docGate;
			try {
				docGate = Factory.newDocument(text.getContent());
			} catch (final ResourceInstantiationException rie) {
				this.logger.warn("Unable to create a new Gate Document for text '" + text.getUri() + ". Skipping it.", rie);
				continue;
			}

			// Extract the language of the text and add this as feature in the Gate Document.
			final String language = this.getLanguage(helper, text.getUri(), docDefaultLanguage);
			if (language != null) {
				docGate.getFeatures().put(GateService.GATE_LANGUAGE_FEATURE, language);
			}

			// Add the Gate doc in the corpus and maps it to its WebLab Text.
			this.addGateDocumentToCorpus(corpusGate, docGate);
			gateDocsAndText.put(docGate, text);
		}

		texts.clear();

		if (corpusGate.isEmpty()) {
			// Remove the corpus from Gate memory
			Factory.deleteResource(corpusGate);
			final UnexpectedException ue = new UnexpectedException("Unable to instanciate Gate documents for each text section of resource " + args.getResource().getUri() + ".",
					"Unable to instanciate Gate documents for each text section of resource " + args.getResource().getUri() + ".");
			this.logger.error(ue);
			throw ue;
		}

		// Get the usageContext that is used to map with the required controller.
		final CorpusController controller;
		try {
			controller = this.getController(args.getUsageContext());
		} catch (final InsufficientResourcesException ire) {
			Factory.deleteResource(corpusGate);
			throw ire;
		} catch (final UnexpectedException ue) {
			Factory.deleteResource(corpusGate);
			throw ue;
		}

		/*
		 * CorpusController are not thread safe (they can process only one corpus at a time). Here we synchronized on the controller instance. It means that on
		 * a given usageContext process are sequential. If inputs are from two usageContexts, parallelism is enabled.
		 */
		synchronized (controller) {
			controller.setCorpus(corpusGate);
			try {
				controller.execute();
			} catch (final ExecutionException ee) {
				Factory.deleteResource(corpusGate);
				this.logger.error("ExecutionException on the controller. UsageContext was '" + args.getUsageContext() + "'. Resource was '" + args.getResource() + "'.", ee);
				throw new UnexpectedException("Unable to process pipeline of corpus.", "Unable to process pipeline of corpus.", ee);
			}
		}

		this.logger.info("Answer received from Gate. Starting annotation convertion for Resource '" + args.getResource().getUri() + "'.");

		// Extract annotations of each Gate Document and add them to the WebLab Text.
		this.converter.convertInformation(corpusGate, args.getResource(), gateDocsAndText);

		// Creates the return wrapper and add the resource in it.
		final ProcessReturn theRet = new ProcessReturn();
		theRet.setResource(args.getResource());

		this.logger.info("Resource '" + args.getResource().getUri() + "' successfully processed with Gate Analyser");

		return theRet;
	}


	/**
	 * This method first check if a controller exists in configuration singleton for the given usageContext. If not it creates a controller from the gapp file
	 * path in config (if exists) or from the default path.
	 * 
	 * @param uc
	 *            The usageContext or null if not define
	 * @return An existing corpus controller or a newly created one.
	 * @throws ProcessException
	 *             If the instantiation of the controller fails.
	 */
	private synchronized CorpusController getController(final String uc) throws InsufficientResourcesException, UnexpectedException {
		final String usageContext;
		if (uc == null) {
			usageContext = "";
		} else {
			usageContext = uc;
		}

		// Checks if controller already exists in config.
		CorpusController controller = Configuration.getInstance().getController(usageContext);

		// Returns it if it exists.
		if (controller != null) {
			return controller;
		}

		// Else: loads the required controller from a gapp file in config
		final File gappFile = this.getGappFile(usageContext);
		try {
			controller = (CorpusController) PersistenceManager.loadObjectFromFile(gappFile);
		} catch (final IOException ioe) {
			throw new InsufficientResourcesException("Unable to load gapp file " + gappFile + ".", "Unable to load gapp file " + gappFile + ".", ioe);
		} catch (final PersistenceException pe) {
			throw new UnexpectedException("Unable to load gapp file " + gappFile + ".", "Unable to load gapp file " + gappFile + ".", pe);
		} catch (final ResourceInstantiationException rie) {
			throw new UnexpectedException("Unable to load gapp file " + gappFile + ".", "Unable to load gapp file " + gappFile + ".", rie);
		}

		Configuration.getInstance().setController(usageContext, controller);
		return controller;
	}


	/**
	 * @param helper
	 *            A resource Helper at the document level
	 * @param uri
	 *            URI of the resource to extract language
	 * @return The language or null if not found
	 */
	private String getLanguage(final ResourceHelper helper, final String uri) {
		return this.getLanguage(helper, uri, null);
	}


	/**
	 * @param helper
	 *            A resource Helper at the document level
	 * @param uri
	 *            URI of the resource to extract language
	 * @param defaultValue
	 *            The default language value to be used
	 * @return The language or the default one if not found (may be null)
	 */
	private String getLanguage(final ResourceHelper helper, final String uri, final String defaultValue) {
		final String language;
		final List<String> languages = helper.getLitsOnPredSubj(uri, DublinCore.LANGUAGE_PROPERTY_NAME);
		if (languages.isEmpty() || languages.get(0).equalsIgnoreCase(GateService.UNKNOWN)) {
			language = defaultValue;
		} else {
			language = languages.get(0).toLowerCase();
		}
		return language;
	}


	/**
	 * It looks in configuration singleton if a gapp file path exists and returns it if any. Else, returns the default gapp file.
	 * 
	 * @param usageContext
	 *            URI of the usageContext or null if not defined
	 * @return The gapp file to be used to instantiate a controller for this usageContext.
	 */
	private File getGappFile(final String usageContext) {
		// Gets the right gapp file in the configuration singleton if configure method as previously been called.
		final String gappFilePath = Configuration.getInstance().getGappFilePath(usageContext);

		// Creates the file from the path if not null. Else, uses the default one.
		final File gappFile;
		if (gappFilePath != null) {
			gappFile = new File(gappFilePath);
		} else {
			gappFile = this.defaultGappFile;
		}

		return gappFile;
	}


	/**
	 * @param corpusGate
	 *            The Corpus
	 * @param docGate
	 *            The document to be added into corpusGate
	 */
	@SuppressWarnings("unchecked")
	private void addGateDocumentToCorpus(final Corpus corpusGate, final gate.Document docGate) {
		corpusGate.add(docGate);
	}


	private synchronized void initGate(final File gateHome, final File pluginsdir) throws IOException, WebLabCheckedException {
		// Gate must be initialized only one time !
		if (!GateService.initialised) {
			if (!pluginsdir.exists()) {
				this.logger.warn("pluginsdir " + pluginsdir.getPath() + " does not exist.");
				throw new IOException("pluginsdir " + pluginsdir.getPath() + " does not exist.");
			} else if (!pluginsdir.isDirectory()) {
				this.logger.warn("pluginsdir " + pluginsdir.getPath() + " is not a directory.");
				throw new IOException("pluginsdir " + pluginsdir.getPath() + " is not a file.");
			} else if (!pluginsdir.canRead()) {
				this.logger.warn("pluginsdir " + pluginsdir.getPath() + " is not readable.");
				throw new IOException("pluginsdir " + pluginsdir.getPath() + " is not readable.");
			}
			if (!gateHome.exists()) {
				this.logger.warn("gateHome " + gateHome.getPath() + " does not exist.");
				throw new IOException("gateHome " + gateHome.getPath() + " does not exist.");
			} else if (!gateHome.isDirectory()) {
				this.logger.warn("gateHome " + gateHome.getPath() + " is not a directory.");
				throw new IOException("gateHome " + gateHome.getPath() + " is not a file.");
			} else if (!gateHome.canRead()) {
				this.logger.warn("gateHome " + gateHome.getPath() + " is not readable.");
				throw new IOException("gateHome " + gateHome.getPath() + " is not readable.");
			}
			this.logger.debug("Here is the path of your application => " + gateHome.getAbsolutePath());
			Gate.setGateHome(gateHome);
			Gate.setUserConfigFile(new File(gateHome, GateService.GATE_USER_FILE));
			Gate.setPluginsHome(pluginsdir);
			Gate.setSiteConfigFile(new File(gateHome, GateService.GATE_USER_FILE));

			try {
				Gate.init();
			} catch (final GateException ge) {
				this.logger.warn("Error when initialising Gate.", ge);
				throw new WebLabCheckedException("Unable to find initialise Gate.", ge);
			}

			GateService.initialised = true;
		}
	}


	/**
	 * @param args
	 *            The ProcessArgs
	 * @return The list of Text contained by the Resource in args.
	 * @throws ProcessException
	 *             For any reason preventing the retrieval of text unit to be done.
	 */
	protected List<Text> checkParameters(final ProcessArgs args) throws InvalidParameterException {
		if (args == null) {
			throw new InvalidParameterException("ProcessArgs was null.", "ProcessArgs was null.");
		}

		final Resource res = args.getResource();
		if (res == null) {
			throw new InvalidParameterException("Resource in ProcessArg was null.", "Resource in ProcessArg was null.");
		}
		if (!(res instanceof MediaUnit)) {
			throw new InvalidParameterException("This service only process MediaUnit; Resource was a: " + res.getClass().getSimpleName() + ".", "This service only process MediaUnit; Resource was a: "
					+ res.getClass().getSimpleName() + ".");
		}

		final List<Text> texts;
		if (res instanceof Text) {
			texts = new LinkedList<Text>();
			texts.add((Text) res);
		} else {
			texts = ResourceUtil.getSelectedSubResources(args.getResource(), Text.class);
		}

		for (final ListIterator<Text> textIt = texts.listIterator(); textIt.hasNext();) {
			final Text text = textIt.next();
			if ((text.getContent() == null) || text.getContent().replaceAll("\\s+", "").isEmpty()) {
				textIt.remove();
			}
		}

		return texts;
	}

}
