package org.ow2.weblab.service.translate.moses;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.PostConstruct;
import javax.jws.WebService;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.core.extended.comparator.SegmentComparator;
import org.ow2.weblab.core.extended.factory.MediaUnitFactory;
import org.ow2.weblab.core.extended.factory.SegmentFactory;
import org.ow2.weblab.core.extended.properties.PropertiesLoader;
import org.ow2.weblab.core.extended.util.ResourceUtil;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.LinearSegment;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.core.model.processing.WProcessingAnnotator;
import org.ow2.weblab.core.services.AccessDeniedException;
import org.ow2.weblab.core.services.Analyser;
import org.ow2.weblab.core.services.ContentNotAvailableException;
import org.ow2.weblab.core.services.InsufficientResourcesException;
import org.ow2.weblab.core.services.InvalidParameterException;
import org.ow2.weblab.core.services.ServiceNotConfiguredException;
import org.ow2.weblab.core.services.UnexpectedException;
import org.ow2.weblab.core.services.UnsupportedRequestException;
import org.ow2.weblab.core.services.analyser.ProcessArgs;
import org.ow2.weblab.core.services.analyser.ProcessReturn;
import org.ow2.weblab.rdf.Value;
import org.ow2.weblab.service.translate.Translator;
import org.purl.dc.elements.DublinCoreAnnotator;

/**
 * 
 * Translation web service using moses open source framework.
 * 
 * @author Jérémie Doucy, CASSIDIAN, an EADS company
 * 
 */

@WebService(endpointInterface = "org.ow2.weblab.core.services.Analyser")
public class MosesTranslatorService implements Analyser {

	/*
	 * manage if we create each align segment
	 */
	protected boolean alignSegment = false;

	/*
	 * regexp used to split between sentences
	 */
	protected String splitRegexp = "[\\.|?|!\\n]";

	/*
	 * map (langageCode -> mosesTranslator)
	 */
	protected static Map<String, Translator> TRANSLATORS;

	/*
	 * pattern for reading model from config file
	 */
	static Pattern CONFIG_LANG_MODEL_PATTERN = Pattern
			.compile("(\\w{2,3})-(\\w{2,3})-model");

	protected static Log LOG = LogFactory.getLog(MosesTranslatorService.class);

	public void init(String configFile) throws IOException {
		/*
		 * loading property file
		 */
		Map<String, String> props = PropertiesLoader.loadProperties(configFile);
		/*
		 * initialize PARSERS map (langageCode -> translator) from
		 * moses_config.properties file.
		 */
		TRANSLATORS = Collections
				.synchronizedMap(new HashMap<String, Translator>());

		/*
		 * find appropriate property keys
		 */

		for (Entry<String, String> entry : props.entrySet()) {
			/*
			 * align ?
			 */
			if ("align-segment".equals(entry.getKey())) {
				this.alignSegment = Boolean.parseBoolean(entry.getValue());
				LOG.debug("Setting align segment to " + this.alignSegment);
			} else if ("sentence-regexp".equals(entry.getKey())) {
				this.splitRegexp = entry.getValue();
				LOG.debug("Setting regexp used to slipt between sentences to "
						+ this.splitRegexp);
			} else {

				Matcher m = CONFIG_LANG_MODEL_PATTERN.matcher(entry.getKey());
				/*
				 * good key ?
				 */
				if (m.matches()) {
					try {
						/*
						 * is there a specific regular expression
						 */
						String[] values = entry.getValue().split(",");
						/*
						 * adding to translators map
						 */
						if (values.length == 1) {
							/*
							 * default regular expression
							 */
							TRANSLATORS.put(m.group(1), new MosesTranslator(
									values[0], m.group(2)));
						} else if (values.length == 2) {
							/*
							 * specialised regular expression
							 */
							TRANSLATORS.put(m.group(1), new MosesTranslator(
									values[0], m.group(2), values[1].trim()));
						} else {
							String errorMsg = "Bad value " + entry.getValue()
									+ " for entry " + entry.getKey()
									+ " inside " + configFile
									+ " moses configuration file.";
							LOG.trace(errorMsg);
							throw new IOException(errorMsg);
						}

					} catch (IOException e) {
						LOG.error(
								"Error during initialisation of moses translator from file: "
										+ entry.getValue()
										+ " for languages couple: "
										+ entry.getKey(), e);

						throw e;
					}
				} else {
					LOG.warn("Skipping unknowed property: " + entry.getKey());
				}
			}
		}
	}

	@PostConstruct
	public void init() throws IOException {
		init("moses_config.properties");
	}

	@Override
	public ProcessReturn process(ProcessArgs processArgs)
			throws AccessDeniedException, ContentNotAvailableException,
			InsufficientResourcesException, InvalidParameterException,
			ServiceNotConfiguredException, UnexpectedException,
			UnsupportedRequestException {
		/*
		 * checks args
		 */
		this.checkArgs(processArgs);
		Document document = (Document) processArgs.getResource();
		/*
		 * return sub text units or text if directly a text
		 */
		List<Text> texts = ResourceUtil.getSelectedSubResources(document,
				Text.class);

		if (texts != null && texts.size() > 0) {
			long start = 0;
			if (LOG.isDebugEnabled())
				start = System.currentTimeMillis();
			for (Text text : texts) {
				translate(text, document);
			}

			if (LOG.isDebugEnabled())
				LOG.debug("Translation duration :"
						+ (System.currentTimeMillis() - start) + "ms");
			LOG.info("Document [" + processArgs.getResource().getUri()
					+ "]: translation done.");
		}
		ProcessReturn ret = new ProcessReturn();
		ret.setResource(processArgs.getResource());
		return ret;
	}

	private void translate(Text text, Document parent) {
		/*
		 * get text language
		 */
		DublinCoreAnnotator dublinCoreAnnotator = new DublinCoreAnnotator(text);

		Value<String> languageVal = dublinCoreAnnotator.readLanguage();
		if (languageVal != null && languageVal.hasValue()) {
			String language = languageVal.firstTypedValue();

			Translator translator = TRANSLATORS.get(language);
			if (translator != null) {
				try {
					/*
					 * initialize the buffer which will contains translated text
					 */
					StringBuffer translatedBuffer = new StringBuffer();

					/*
					 * creation of the translated media unit
					 */
					Text translatedTextUnit = MediaUnitFactory
							.createAndLinkMediaUnit(parent, Text.class);
					/*
					 * media unit alignment
					 */
					WProcessingAnnotator wProcessingAnnotator = new WProcessingAnnotator(
							translatedTextUnit);
					wProcessingAnnotator.writeTranslationOf(new URI(text
							.getUri()));

					/*
					 * index of currently used segment for the main text unit
					 */
					int mainIndex = 0;

					/*
					 * split on sentence
					 */

					for (String trans : text.getContent().split(splitRegexp)) {
						trans = trans.trim();
						if (trans.length() > 0) {
							String curTranslation = translator.translate(trans);

							/*
							 * segment alignment or not
							 */
							if (this.alignSegment) {

								/*
								 * getting real current indexes
								 */
								mainIndex = text.getContent().indexOf(trans,
										mainIndex);
								int translatedIndex = translatedBuffer.length();

								/*
								 * creating segments
								 */
								LinearSegment mainLinearSegment = SegmentFactory
										.createAndLinkLinearSegment(text,
												mainIndex,
												mainIndex + trans.length());
								LinearSegment translatedLinearSegment = SegmentFactory
										.createAndLinkLinearSegment(
												translatedTextUnit,
												translatedIndex,
												translatedIndex
														+ curTranslation
																.length());

								/*
								 * segment alignment annotation
								 */
								wProcessingAnnotator
										.startInnerAnnotatorOn(new URI(
												translatedLinearSegment
														.getUri()));
								wProcessingAnnotator
										.writeTranslationOf(new URI(
												mainLinearSegment.getUri()));
							}

							/*
							 * append translated text to the buffer
							 */
							translatedBuffer.append(curTranslation + "\n");
							mainIndex += trans.length();

						}
					}
					String translatedText = translatedBuffer.toString();
					translatedTextUnit.setContent(translatedText);
					DublinCoreAnnotator coreAnnotator = new DublinCoreAnnotator(
							translatedTextUnit);
					coreAnnotator.writeLanguage(translator.getLangCode());

					/*
					 * segment ordering
					 */
					Collections
							.sort(text.getSegment(), new SegmentComparator());
					Collections.sort(translatedTextUnit.getSegment(),
							new SegmentComparator());

				} catch (IOException e) {
					/*
					 * no translator defined for this language
					 */
					LOG.warn("Error during translation of " + text.getUri()
							+ " with identified language: <" + language
							+ "> skipping text unit.");
					if (LOG.isDebugEnabled()) {
						LOG.debug("Text content which failed to translate: "
								+ text.getContent());
					}
				} catch (URISyntaxException e) {
					LOG.error("UNABLE TO TRANSFORM WLURI:" + text.getUri()
							+ "to URI");
				}
			} else {
				/*
				 * no translator defined for this language
				 */
				LOG.warn("No translator defined for <" + language
						+ "> skipping text unit.");
			}
		} else {
			/*
			 * no identified language on the text unit
			 */
			LOG.warn("No language identified on <" + text.getUri()
					+ "> skipping text unit.");

		}

	}

	/**
	 * @param processArg
	 *            The processArgs; i.e. a usageContext not used and a Resource
	 *            containing (or being itself) text unit(s)
	 * @return A list of text contained in resource of processArgs
	 * @throws ProcessException
	 *             If processArgs is null; or if resource is null; or if
	 *             resource is neither a ComposedUnit nor a Text.
	 */
	private void checkArgs(final ProcessArgs processArg)
			throws AccessDeniedException, ContentNotAvailableException,
			InsufficientResourcesException, InvalidParameterException,
			ServiceNotConfiguredException, UnexpectedException,
			UnsupportedRequestException {
		if (processArg == null) {
			throw new InvalidParameterException("ProcessArgs was null.",
					"ProcessArgs was null.");
		}
		Resource res = processArg.getResource();
		if (res == null) {
			throw new InsufficientResourcesException(
					"Resource in ProcessArgs was null.",
					"Resource in ProcessArgs was null.");
		}
		if (!(res instanceof Document)) {
			throw new InvalidParameterException(
					"Resource in ProcessArgs was not an instance of Document but of ["
							+ res.getClass().getCanonicalName() + "].", "");
		}
	}

}
