/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2012 Cassidian, an EADS company
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.service.gate.converter;

import gate.AnnotationSet;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.creole.ontology.OConstants.RDF;
import gate.creole.ontology.OConstants.RDFS;

import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.core.extended.comparator.SegmentComparator;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.factory.AnnotationFactory;
import org.ow2.weblab.core.extended.factory.SegmentFactory;
import org.ow2.weblab.core.extended.ontologies.WebLabProcessing;
import org.ow2.weblab.core.extended.util.ResourceUtil;
import org.ow2.weblab.core.extended.util.TextUtil;
import org.ow2.weblab.core.helper.PoKHelper;
import org.ow2.weblab.core.helper.impl.JenaPoKHelper;
import org.ow2.weblab.core.model.Annotation;
import org.ow2.weblab.core.model.LinearSegment;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;

/**
 * This class is the basic implementation GateConverter.
 * It enables the creation of WOOKIE instances of NamedEntities in each Text section of the input WebLab Resource.
 * 
 * @author lserrano
 * @date 2011-03-16
 */
public class GateToWookie implements GateConverter {


	/**
	 * The URI of the service to be added in each annotation created (or null if nothing shall be added).
	 */

	private String serviceURI;


	/**
	 * The base URI of the created instances instances.
	 */
	private final static String WOOKIE_URI = "http://weblab.ow2.org/wookie#";


	/**
	 * The first part of the URI of the created instances.
	 */
	private static final String INSTANCE_URI_PREFIX = "http://weblab.ow2.org/wookie/instances/";


	@Override
	public void convertInformation(final Corpus corpusGate, final Resource resource, final Map<Document, Text> gateDocsAndText) {

		final Iterator<Entry<gate.Document, Text>> it = gateDocsAndText.entrySet().iterator();
		while (it.hasNext()) {
			final Map.Entry<gate.Document, Text> entry = it.next();
			this.linkGateAnnotsToText(entry.getValue(), entry.getKey().getAnnotations());

			if (LogFactory.getLog(this.getClass()).isDebugEnabled()) {
				LogFactory.getLog(this.getClass()).debug("Number of segment after GateExtractionComponent: " + entry.getValue().getSegment().size());
				try {
					LogFactory.getLog(this.getClass()).debug(ResourceUtil.saveToXMLString(entry.getValue()));
				} catch (final WebLabCheckedException wlce) {
					LogFactory.getLog(this.getClass()).warn("Unable to serialise to XML the resource: '" + entry.getValue().getUri() + "'.", wlce);
				}
			}

			// Empties the memory for each doc
			corpusGate.unloadDocument(entry.getKey());
			Factory.deleteResource(entry.getKey());
			it.remove();
		}

		// Clears the map to get memory back
		gateDocsAndText.clear();

		// Empties the memory from the corpus
		Factory.deleteResource(corpusGate);
	}


	/**
	 * Annotate text with each annotation in annotation set.
	 * At the end, sorts the segments list to ease further process.
	 * 
	 * @param text
	 *            The WebLab Text to be annotated
	 * @param annots
	 *            The Gate annotation set to be used to annotate text
	 */
	private void linkGateAnnotsToText(final Text text, final AnnotationSet annots) {
		int nbAnnots = 0;

		// Creates the annotation that will contains the information extracted from Gate
		final Annotation wlAnnot = AnnotationFactory.createAndLinkAnnotation(text);

		/*
		 * Creates the helper that will be used.
		 * Set the autoCommit mode to false to prevent from a lot of useless serializations.
		 * Add some prefixes to have a "beautiful" RDF.
		 */
		final PoKHelper pokhe = new JenaPoKHelper(wlAnnot);
		pokhe.setAutoCommitMode(false);
		pokhe.setNSPrefix("wlp", WebLabProcessing.NAMESPACE);
		pokhe.setNSPrefix("wookie", "http://weblab.ow2.org/wookie#");

		// Add is producedBy statement if needed
		if (this.serviceURI != null) {
			pokhe.createResStat(wlAnnot.getUri(), WebLabProcessing.IS_PRODUCED_BY, this.serviceURI);
		}

		if (LogFactory.getLog(this.getClass()).isDebugEnabled()) {
			LogFactory.getLog(this.getClass()).debug("Gate Annotation set: " + annots);
		}

		// For each annotation in the annotation Set
		for (final gate.Annotation gateAnnot : annots) {
			if (gateAnnot.getType().equals("Person")) {
				this.linkGateAnnotToText(text, gateAnnot, pokhe, "Person");
				nbAnnots++;
			} else if (gateAnnot.getType().equals("Organization")) {
				this.linkGateAnnotToText(text, gateAnnot, pokhe, "Unit");
				nbAnnots++;
			} else if (gateAnnot.getType().equals("Location")) {
				this.linkGateAnnotToText(text, gateAnnot, pokhe, "Place");
				nbAnnots++;
			}
		}

		if (nbAnnots > 0) {
			// Final commit of the extracted RDF. A RuntimeException might occur in case of bad syntax. Catch it to prevent the whole not document to be annotated
			try {
				pokhe.commit();
			} catch (final Exception e) {
				LogFactory.getLog(this.getClass()).error("Unable to serialise RDF model for text '" + text.getUri() + "'.", e);
				LogFactory.getLog(this.getClass()).debug(annots);
			}

			// Sort segments in the right order to have a better usability
			Collections.sort(text.getSegment(), new SegmentComparator());
		} else {
			// Remove useless annotation
			text.getAnnotation().remove(wlAnnot);
		}

	}


	/**
	 * Creates a <code>LinearSegment</code> at the position of the <code>gate.Annotation</code>.
	 * Creates an instance of this entity using the PoKHelper.
	 * 
	 * @param text
	 *            The text section to process
	 * @param annotGate
	 *            An annotation in gate format
	 * @param pokh
	 *            The pokHelper to be used to create instances.
	 */
	private void linkGateAnnotToText(final Text text, final gate.Annotation annotGate, final PoKHelper pokh, final String annotWookieType) {
		// Creates the segment from start and end of the Gate Annotation
		final LinearSegment segment = SegmentFactory.createAndLinkLinearSegment(text, annotGate.getStartNode().getOffset().intValue(), annotGate.getEndNode().getOffset().intValue());

		// The random URI to be used by the RDF instance to be created
		String instanceURI = GateToWookie.INSTANCE_URI_PREFIX + annotWookieType + '#' + System.nanoTime();

		// The type of the RDF instance
		final String typeURI = GateToWookie.WOOKIE_URI + annotWookieType;

		/*
		 * Try to retrieve the label from the text content.
		 * If it's throws an exception, it means that the segment is not properly set and need to be removed.
		 */
		final String label;
		try {
			label = TextUtil.getSegmentText(text, segment);
			instanceURI = this.getUriFromLabel(GateToWookie.INSTANCE_URI_PREFIX + annotWookieType + '#', label);
		} catch (final WebLabCheckedException wlce) {
			LogFactory.getLog(this.getClass()).warn("Unable to retrieve text at segment: " + segment.getUri() + " - " + segment.getStart() + " - " + segment.getEnd() + ". Removing it.", wlce);
			text.getSegment().remove(segment);
			return;
		}

		// Add simple statements: type, refersTo and label.
		pokh.createResStat(instanceURI, RDF.TYPE, typeURI);
		pokh.createResStat(segment.getUri(), WebLabProcessing.REFERS_TO, instanceURI);
		if (!label.trim().isEmpty()) {
			pokh.createLitStat(instanceURI, RDFS.LABEL, label.trim());
		}
	}


	@Override
	public void setServiceURI(final String serviceURI) {
		if ((serviceURI != null) && !serviceURI.isEmpty()) {
			this.serviceURI = serviceURI;
		}
	}


	private String getUriFromLabel(final String baseUri, final String label) {
		String uri = baseUri + System.nanoTime();
		try {
			final String cleanedName = label.replace('(', '_').replace(' ', '_').replace(')', '_').replace("N°", "N_").replace("n°", "n.").replace('$', '_').replace('/', '_').replace('\\', '_')
					.replace('#', '_').replace('\'', '_').replace('.', '_').replace(',', '_').replace('?', '_').replace('!', '_').replace('@', '_').toLowerCase();
			uri = new URL(baseUri + cleanedName).toURI().toString();
		} catch (final URISyntaxException urise) {
			LogFactory.getLog(this.getClass()).warn("Unable to transform the label '" + label + "' into a uri", urise);
		} catch (final MalformedURLException murle) {
			LogFactory.getLog(this.getClass()).warn("Unable to transform the label '" + label + "' into a uri", murle);
		}
		return uri;
	}

}
