/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2009 EADS DEFENCE AND SECURITY SYSTEMS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.solr.indexer;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.jws.WebService;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.DateUtil;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.exception.WebLabUncheckedException;
import org.ow2.weblab.core.extended.ontologies.RDF;
import org.ow2.weblab.core.helper.impl.JenaResourceHelper;
import org.ow2.weblab.core.model.Audio;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Image;
import org.ow2.weblab.core.model.MediaUnit;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.core.model.Video;
import org.ow2.weblab.core.services.Indexer;
import org.ow2.weblab.core.services.InvalidParameterException;
import org.ow2.weblab.core.services.ServiceNotConfiguredException;
import org.ow2.weblab.core.services.UnexpectedException;
import org.ow2.weblab.core.services.indexer.IndexArgs;
import org.ow2.weblab.core.services.indexer.IndexReturn;
import org.ow2.weblab.services.solr.SolrComponent;
import org.ow2.weblab.util.index.Field;

/**
 * Indexer using Embedded Solr server. This class only check index arguments and
 * call Solr component
 */

@WebService(endpointInterface = "org.ow2.weblab.core.services.Indexer")
public class SolrIndexer implements Indexer {
	public static final String BEAN_NAME = "indexerServiceBean";

	private Log logger = LogFactory.getLog(SolrIndexer.class);
	private SimpleDateFormat dateFormat = new SimpleDateFormat(SolrIndexerConfig.SOLR_DATE_FORMAT);
	private SolrIndexerConfig indexerConfig;
	private String solrURL;

	private boolean noCore = false;

	@PostConstruct
	public void init() {
		try {// init default instance
			SolrComponent.getInstance(solrURL, null);

			// get indexingConfig if not loaded
			if (indexerConfig == null) {
				// get it directly ?
			}

			new URL(solrURL);
		} catch (WebLabCheckedException e) {
			throw new WebLabUncheckedException("Cannot start the SolrComponent.", e);
		} catch (MalformedURLException e) {
			throw new WebLabUncheckedException("Cannot start the service. The solrULR is invalid [" + solrURL + "].", e);
		}
	}

	@PreDestroy
	public void destroy() {
		// destroy what ?
	}

	@Override
	public IndexReturn index(IndexArgs args) throws UnexpectedException, InvalidParameterException, ServiceNotConfiguredException {

		final MediaUnit unit = checkArgs(args);

		if (unit instanceof Audio || unit instanceof Video || unit instanceof Image) {
			logger.warn("MediaUnit of type: " + unit.getClass().getSimpleName() + " cannot be indexed with Solr.");
			return new IndexReturn();
		}
		try {
			// convert WebLab resource into SolrDocument
			SolrInputDocument doc = convertMediaUnit(unit);

			SolrComponent instance;
			if (noCore) {
				instance = SolrComponent.getInstance(solrURL, null);
			} else {
				instance = SolrComponent.getInstance(solrURL, args.getUsageContext());
			}
			instance.addDocument(doc);

		} catch (WebLabCheckedException e) {
			throw new UnexpectedException("Cannot index document.", e);
		}
		return new IndexReturn();
	}

	/**
	 * @param args
	 *            The <code>IndexArgs</code> to check in the begin of
	 *            <code>index</code>.
	 * @return The contained <code>MediaUnit</code>
	 * @throws IndexException
	 *             if we are unable to extract the contained
	 *             <code>MediaUnit</code>
	 */
	private MediaUnit checkArgs(final IndexArgs args) throws InvalidParameterException {
		if (args == null) {
			logger.error("IndexArgs was null.");
			throw new InvalidParameterException("IndexArgs was null.", "IndexArgs was null.");
		}
		if (args.getResource() == null) {
			logger.error("Args must contain a non-null Resource to index");
			throw new InvalidParameterException("Args must contain a " + "non-null Resource to index", "Args must contain a " + "non-null Resource to index");
		}
		if (!(args.getResource() instanceof MediaUnit)) {
			logger.error("Resource to index is not a MediaUnit.");
			throw new InvalidParameterException("Resource to index " + "is not a MediaUnit.", "Resource to index " + "is not a MediaUnit.");
		}
		return (MediaUnit) args.getResource();
	}

	private SolrInputDocument convertMediaUnit(MediaUnit unit) throws WebLabCheckedException {
		// simple case : one SolrDocument for each WebLab Document
		SolrInputDocument doc = new SolrInputDocument();
		try {
			new URI(unit.getUri());
		} catch (URISyntaxException e) {
			throw new WebLabCheckedException("Resource to index does not have valid URI.", e);
		}
		doc.addField(SolrIndexerConfig.ID_FIELD, unit.getUri());

		for (Field field : indexerConfig.getFields()) {
			// index text content if needed
			if (field.isIndexTextContent()) {
				addTextToField(doc, field.getName(), unit);
			}

			// index RDF properties if some are associated to the field
			if (field.getProperties() != null && field.getProperties().size() > 0) {
				// select RDF properties to index
				JenaResourceHelper hlpr = new JenaResourceHelper(unit);

				Set<String> subjects = new HashSet<String>();
				for (String prop : field.getProperties()) {
					subjects.addAll(hlpr.getSubjsOnPred(prop));
				}

				// check if we need to restrict to a sub list of typed RDF
				// resources
				if (field.getEntityTypes() != null && field.getEntityTypes().size() > 0) {
					Set<String> validEntityURIs = new HashSet<String>();
					for (String entityTypeURI : field.getEntityTypes()) {
						validEntityURIs.addAll(hlpr.getSubjsOnPredRes(RDF.TYPE, entityTypeURI));
					}
					subjects.retainAll(validEntityURIs);
				}

				// index the property values from the selected subjects
				for (String subject : subjects) {
					for (String property : field.getProperties()) {
						List<String> values = hlpr.getLitsOnPredSubj(subject, property);
						// adding the values to the doc field : note that should
						// work fine with multi-valued fields
						// for non-multi-valued field this may raise issue since
						// only the last value may be
						// taken into account (but then it should have been
						// multi-valued).
						for (String value : values) {
							addFieldValue(doc, field, value);
						}
					}
				}
			}
		}
		return doc;
	}

	private void addFieldValue(SolrInputDocument doc, Field field, String value) throws WebLabCheckedException {
		switch (field.getType()) {
		case DATE:
			Date d;
			synchronized (dateFormat) {
				try {
					d = DateUtil.parseDate(value);
				} catch (ParseException e) {
					throw new WebLabCheckedException("Cannot convert field value [" + value + "] to date.", e);
				}
			}
			doc.addField(field.getName(), d);
			break;
		case LONG:
			long l;
			try {
				l = Long.parseLong(value);
			} catch (NumberFormatException e) {
				throw new WebLabCheckedException("Cannot convert field value [" + value + "] to long.", e);
			}
			doc.addField(field.getName(), l);
			break;
		default:
			// case RES_URI:
			// case URI:
			// case TEXT:
			doc.addField(field.getName(), value);
		}

	}

	/**
	 * 
	 * @param doc
	 *            is the SolRDocument to be indexed
	 * @param name
	 *            is the field name
	 * @param unit
	 *            is where the text content should be found
	 */
	private void addTextToField(SolrInputDocument doc, String name, MediaUnit unit) {
		if (unit instanceof Document) {
			Document document = (Document) unit;
			for (MediaUnit u : document.getMediaUnit()) {
				addTextToField(doc, name, u);
			}
		} else if (unit instanceof Text) {
			Text text = (Text) unit;
			doc.addField(name, text.getContent());
		} else {
			logger.debug("Canot extarct text content from [" + unit.getClass().getSimpleName() + "].");
		}
	}

	public SolrIndexerConfig getIndexerConfig() {
		return indexerConfig;
	}

	public void setIndexerConfig(SolrIndexerConfig indexerConfig) {
		this.indexerConfig = indexerConfig;
	}

	public String getSolrURL() {
		return solrURL;
	}

	public void setSolrURL(String solrURL) {
		this.solrURL = solrURL;
	}

	public boolean isNoCore() {
		return noCore;
	}

	public void setNoCore(boolean noCore) {
		this.noCore = noCore;
	}
}
