package org.ow2.weblab.service.gate;


import gate.Corpus;
import gate.CorpusController;
import gate.Factory;
import gate.Gate;
import gate.creole.ConditionalSerialAnalyserController;
import gate.creole.ConditionalSerialController;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.SerialAnalyserController;
import gate.creole.SerialController;
import gate.persist.PersistenceException;
import gate.util.GateException;
import gate.util.persistence.PersistenceManager;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;

import javax.jws.WebService;

import org.apache.commons.logging.LogFactory;
import org.springframework.core.io.ClassPathResource;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.exception.WebLabUncheckedException;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.helper.ResourceHelper;
import org.weblab_project.core.model.MediaUnit;
import org.weblab_project.core.model.Resource;
import org.weblab_project.core.model.text.Text;
import org.weblab_project.core.ontologies.DublinCore;
import org.weblab_project.core.util.ResourceUtil;
import org.weblab_project.services.analyser.Analyser;
import org.weblab_project.services.analyser.ProcessException;
import org.weblab_project.services.analyser.types.ProcessArgs;
import org.weblab_project.services.analyser.types.ProcessReturn;
import org.weblab_project.services.exception.WebLabException;



/**
 * This class is a WebService calling Gate with different configurations.
 * It initializes gate once with a gapp configuration and implements a corpus pipeline
 * 
 * @author khelif, ymombrun
 */

@WebService(endpointInterface = "org.weblab_project.services.analyser.Analyser")
public class GateService implements Analyser {

	private static final String GATE_HOME_DIR = "gate";

	private static final String GATE_USER_FILE = "gate.xml";

	final static private String GATE_PLUGINS_DIR = "plugins";

	/*
	 * TODO Check how we can handle the trouble with language and snowball porter stemmer.
	 * 
	 * The trouble is that in snowball language codes are defined in the plugin, and they cannot be by passed.
	 */
	private static final String GATE_LANGUAGE_FEATURE = "language";

	private static final String DEFAULT_GAPP_FILE = "default.gapp";

	public final static String DEFAULT_LANGUAGE = "en";

	private static boolean INITIALIZED = false;

	private final File defaultGappFile;

	private final String defaultLanguage;

	/**
	 * Default constructor using DEFAULT_GAPP_FILE as defaultGappFile and DEFAULT_SERVICE_URI as serviceURI
	 * 
	 * @throws IOException
	 *             If the default gapp file cannot be found in classpath resources or If Gate home cannot be found in classpath resource
	 */
	public GateService() throws IOException {
		this(new ClassPathResource(DEFAULT_GAPP_FILE).getFile());
	}

	/**
	 * @param defaultGappFile
	 *            The defaultGappFile
	 * @throws IOException
	 *             If Gate home cannot be found in classpath resource
	 */
	public GateService(final File defaultGappFile) throws IOException {
		this(defaultGappFile, new File(new ClassPathResource(GATE_HOME_DIR).getFile(), GATE_PLUGINS_DIR));
	}

	
	/**
	 * 
	 * @param defaultGappFile
	 * @param pluginsPath
	 * @throws IOException
	 *             If Gate home cannot be found in classpath resource
	 */

	public GateService(final File defaultGappFile, final File pluginsPath) throws IOException {
		this(defaultGappFile, pluginsPath, DEFAULT_LANGUAGE);
	}
	
	
	/**
	 * 
	 * @param defaultGappFile
	 * @param pluginsPath
	 * @param defaultLanguage
	 * @throws IOException
	 *             If Gate home cannot be found in classpath resource
	 */

	public GateService(final File defaultGappFile, final File pluginsPath, final String defaultLanguage) throws IOException {
		super();
		this.defaultGappFile = defaultGappFile;
		this.defaultLanguage = defaultLanguage;
		initGate(pluginsPath);
	}

	@Override
	public ProcessReturn process(ProcessArgs args) throws ProcessException {
		LogFactory.getLog(this.getClass()).debug("Early starting of Gate Analyser");

		// Tests the input parameters and get every Text section contained by the resource in args.
		List<Text> texts = this.checkParameters(args);
		
		LogFactory.getLog(this.getClass()).info("Starting of resource '" + args.getResource().getUri() + "' with Gate Analyser");

		// Instantiates an empty Gate Corpus
		Corpus corpusGate;
		try {
			corpusGate = Factory.newCorpus(args.getResource().getUri() + " " + System.nanoTime());
		} catch (final ResourceInstantiationException rie) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E0");
			wle.setErrorMessage("Unexpected error");
			throw new ProcessException("Unable to instanciate new Corpus.", wle, rie);
		}

		/*
		 * Creates a RDF helper that can process the whole resource.
		 * It will enable to get the dc:language property of each text section.
		 * If none, the document property will be used.
		 * If none, defaultLanguage will be used.
		 */
		ResourceHelper helper = RDFHelperFactory.getResourceHelper(args.getResource());
		String docDefaultLanguage = GateService.getLanguage(helper, args.getResource().getUri());
		if (docDefaultLanguage == null) {
			docDefaultLanguage = this.defaultLanguage;
		}

		int errorCount = 0;
		// For each Text section
		Map<gate.Document, Text> gateDocsAndText = new HashMap<gate.Document, Text>();
		TextLoop: for (final Text text : texts) {
			LogFactory.getLog(this.getClass()).debug("Text section to process by GateAnalyserComponent: " + text.getUri());
			LogFactory.getLog(this.getClass()).debug("Number of segments before GateAnalyserComponent: " + text.getSegment().size());

			// Creates an empty Gate Document
			gate.Document docGate;
			try {
				docGate = Factory.newDocument(text.getContent());
			} catch (final ResourceInstantiationException rie) {
				LogFactory.getLog(this.getClass()).error("Unable to create a new Gate Document.");
				errorCount++;
				if (errorCount < 3) {
					continue TextLoop;
				}
				WebLabException wle = new WebLabException();
				wle.setErrorId("E0");
				wle.setErrorMessage("Unexpected error");
				throw new ProcessException("Fails three times to instanciate new Gate Document.", wle, rie);
			}

			// Extract the language of the text and add this as feature in the Gate Document.
			final String language = GateService.getLanguage(helper, text.getUri());
			if (language != null) {
				docGate.getFeatures().put(GATE_LANGUAGE_FEATURE, language);
			} else {
				docGate.getFeatures().put(GATE_LANGUAGE_FEATURE, docDefaultLanguage);
			}

			// Add the Gate doc in the corpus and maps it to its WebLab Text.
			addGateDocumentToCorpus(corpusGate, docGate);
			gateDocsAndText.put(docGate, text);
		}

		final String usageContext;
		if (args.getUsageContext() != null) {
			usageContext = args.getUsageContext().getUri();
		} else {
			usageContext = null;
		}
		CorpusController controller = this.getController(usageContext);

		/*
		 * CorpusController are not thread safe (they can process only one corpus at a time).
		 * 
		 * Here we synchronized on the controller instance. It means that on a given usageContext process are sequential.
		 * If inputs are from two usageContexts, parallelism is enabled.
		 */
		synchronized (controller) {
			controller.setCorpus(corpusGate);
			try {
				controller.execute();
			} catch (final ExecutionException ee) {
				WebLabException wle = new WebLabException();
				wle.setErrorId("E0");
				wle.setErrorMessage("Unexpected exception.");
				throw new ProcessException("Unable to process pipeline of corpus.", wle, ee);
			}
		}
		LogFactory.getLog(this.getClass()).info("Starting annotation extraction");
		// Extract annotations of each Gate Document and add them to the WebLab Text. 
		for (Entry<gate.Document, Text> entry : gateDocsAndText.entrySet()) {
			GateHelper.linkGateAnnotsToText(entry.getValue(), entry.getKey().getAnnotations());

			if (LogFactory.getLog(this.getClass()).isDebugEnabled()) {
				LogFactory.getLog(this.getClass()).debug("Number of segment after GateExtractionComponent: " + entry.getValue().getSegment().size());
				try {
					LogFactory.getLog(this.getClass()).debug(ResourceUtil.saveToXMLString(entry.getValue()));
				} catch (final WebLabCheckedException wlce) {
					LogFactory.getLog(this.getClass()).warn("Unable to serialise to XML the resource: '" + entry.getValue().getUri() + "'.", wlce);
				}
			}

			// Empties the memory for each doc
			corpusGate.unloadDocument(entry.getKey());
			Factory.deleteResource(entry.getKey());
		}
		// Empties the memory from the corpus
		Factory.deleteResource(corpusGate);

		// Creates the return wrapper and add the resource in it.
		ProcessReturn theRet = new ProcessReturn();
		theRet.setResource(args.getResource());
		
		LogFactory.getLog(this.getClass()).info("Resource '" + args.getResource().getUri() + "' successfully processed with Gate Analyser");

		return theRet;
	}

	/**
	 * This method first check if a controller exists in configuration singleton for the given usageContext.
	 * If not it creates a controller from the gapp file path in config (if exists) or from the default path.
	 * 
	 * @param usageContext
	 *            The URI of usageContext or null if not define
	 * @return An existing corpus controller or a newly created one.
	 * @throws ProcessException
	 *             If the instantiation of the controller fails.
	 */
	private synchronized CorpusController getController(final String usageContext) throws ProcessException {
		// Checks if controller already exists in config.
		CorpusController controller = Configuration.getInstance().getController(usageContext);

		// Returns it if it exists.
		if (controller != null) {
			return controller;
		}

		// Else: loads the required controller from a gapp file in config
		final SerialController genericController;
		try {
			genericController = (SerialController) PersistenceManager.loadObjectFromFile(this.getGappFile(usageContext));
		} catch (final IOException ioe) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E2");
			wle.setErrorMessage("Insufficient resources.");
			throw new ProcessException("Unable to load gapp file.", wle, ioe);
		} catch (final PersistenceException pe) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E0");
			wle.setErrorMessage("Unexpected exception.");
			throw new ProcessException("Unable to load gapp file.", wle, pe);
		} catch (final ResourceInstantiationException rie) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E0");
			wle.setErrorMessage("Unexpected exception.");
			throw new ProcessException("Unable to load gapp file.", wle, rie);
		}

		/*
		 * In a Gapp file, only serialAnlyser are enabled.
		 * In our implementation only corpus controller are enabled.
		 * 
		 * Due to a trouble in Gate conception we need to do some strange cast in the case of conditional corpus controller.
		 */

		if (genericController.getClass().getCanonicalName().equals(SerialAnalyserController.class.getCanonicalName())) {
			controller = (CorpusController) genericController;
		} else {
			final ConditionalSerialController csController = (ConditionalSerialController) genericController;
			final ConditionalSerialAnalyserController conditionalPipeline;
			try {
				conditionalPipeline = (ConditionalSerialAnalyserController) Factory.createResource(ConditionalSerialAnalyserController.class.getCanonicalName());
			} catch (final ResourceInstantiationException rie) {
				WebLabException wle = new WebLabException();
				wle.setErrorId("E0");
				wle.setErrorMessage("Unexpected exception.");
				throw new ProcessException("Unable to create a '" + ConditionalSerialAnalyserController.class.getCanonicalName() + "' when converting conditional pipeline.", wle, rie);
			}
			conditionalPipeline.setPRs(csController.getPRs());
			conditionalPipeline.setRunningStrategies(csController.getRunningStrategies());
			controller = conditionalPipeline;
			// Empties Gate from the useless Pipeline created.
			Factory.deleteResource(csController);
		}

		Configuration.getInstance().setController(usageContext, controller);
		return controller;
	}

	/**
	 * @param helper
	 *            A resource Helper at the document level
	 * @param uri
	 *            URI of the resource to extract language
	 * @return The language or null if not found
	 */
	private static String getLanguage(final ResourceHelper helper, final String uri) {
		final String language;
		final List<String> languages = helper.getLitsOnPredSubj(uri, DublinCore.LANGUAGE_PROPERTY_NAME);
		if (languages.isEmpty() || languages.get(0).toLowerCase().equals("unknown")) {
			language = null;
		} else {
			language = languages.get(0).toLowerCase();
		}
		return language;
	}

	/**
	 * It looks in configuration singleton if a gapp file path exists and returns it if any.
	 * Else, returns the default gapp file.
	 * 
	 * @param usageContext
	 *            URI of the usageContext or null if not defined
	 * @return The gapp file to be used to instantiate a controller for this usageContext.
	 * 
	 * @throws IOException
	 *             If the default gapp file was not found in the classpath.
	 */
	private File getGappFile(final String usageContext) throws IOException {
		// Gets the right gapp file in the configuration singleton if configure method as previously been called.
		final String gappFilePath = Configuration.getInstance().getGateApplicationStateFilePath(usageContext);


		// Creates the file from the path if not null. Else, uses the default one. 
		File gappFile;
		if (gappFilePath != null) {
			gappFile = new File(gappFilePath);
		} else {
			gappFile = this.defaultGappFile;
		}

		return gappFile;
	}

	/**
	 * @param corpusGate
	 *            The Corpus
	 * @param docGate
	 *            The document to be added into corpusGate
	 */
	@SuppressWarnings("unchecked")
	private void addGateDocumentToCorpus(Corpus corpusGate, final gate.Document docGate) {
		corpusGate.add(docGate);
	}

	private synchronized void initGate(final File pluginsdir) throws IOException {
		// Gate must be initialized only one time !
		if (!INITIALIZED) {
			final File gateHome = new ClassPathResource(GATE_HOME_DIR).getFile();
			LogFactory.getLog(this.getClass()).debug("Here is the path of your application => " + gateHome.getAbsolutePath());
			Gate.setGateHome(gateHome);
			Gate.setUserConfigFile(new File(gateHome, GATE_USER_FILE));
			Gate.setPluginsHome(pluginsdir);
			Gate.setSiteConfigFile(new File(gateHome, GATE_USER_FILE));

			try {
				Gate.init();
			} catch (final GateException ge) {
				throw new WebLabUncheckedException("Unable to find initialise Gate.", ge);
			}
			GateHelper.init();

			INITIALIZED = true;
		}
	}

	/**
	 * @param args
	 *            The ProcessArgs
	 * @return The list of Text contained by the Resource in args.
	 * @throws ProcessException
	 *             For any reason preventing the retrieval of text unit to be done.
	 */
	protected List<Text> checkParameters(final ProcessArgs args) throws ProcessException {
		if (args == null) {
			throw new ProcessException("ProcessArgs was null.", this.createInvalidParameterWLE());
		}

		Resource res = args.getResource();
		if (res == null) {
			throw new ProcessException("Resource in ProcessArg was null.", this.createInvalidParameterWLE());
		}
		if (!(res instanceof MediaUnit)) {
			throw new ProcessException("This service only process MediaUnit; Resource was a: " + res.getClass().getSimpleName() + ".", this.createInvalidParameterWLE());
		}

		List<Text> texts = new ArrayList<Text>();
		if (res instanceof Text) {
			texts.add((Text) res);
		} else {
			texts.addAll(ResourceUtil.getSelectedSubResources(args.getResource(), Text.class));
		}

		for (ListIterator<Text> textIt = texts.listIterator(); textIt.hasNext();) {
			Text text = textIt.next();
			if (text.getContent() == null) {
				textIt.remove();
			}
		}

		return texts;
	}

	/**
	 * @return A "E1" WebLabException
	 */
	private WebLabException createInvalidParameterWLE() {
		WebLabException wle = new WebLabException();
		wle.setErrorId("E1");
		wle.setErrorMessage("Invalid parameter");
		return wle;
	}

}
