/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2011 Cassidian, an EADS company
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.solr;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.lang3.time.DateUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.core.CoreContainer;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.exception.WebLabUncheckedException;
import org.ow2.weblab.core.extended.ontologies.RDF;
import org.ow2.weblab.core.extended.ontologies.WebLabProcessing;
import org.ow2.weblab.core.extended.util.ResourceUtil;
import org.ow2.weblab.core.helper.BeanHelper;
import org.ow2.weblab.core.helper.ResourceHelper;
import org.ow2.weblab.core.helper.impl.JenaResourceHelper;
import org.ow2.weblab.core.helper.impl.RDFSelectorFactory;
import org.ow2.weblab.core.helper.impl.Results;
import org.ow2.weblab.core.helper.impl.SimpleSelector;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.LinearSegment;
import org.ow2.weblab.core.model.MediaUnit;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Segment;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.services.indexer.impl.SolrIndexerConfig;
import org.ow2.weblab.services.searcher.impl.SolrSearcherConfig;
import org.ow2.weblab.util.index.EntityField;
import org.ow2.weblab.util.index.Field;
import org.ow2.weblab.util.index.IndexerConfig;
import org.ow2.weblab.util.search.SearcherConfig;
import org.xml.sax.SAXException;


/**
 * Component used to :
 * <ul>
 * <li>open a connection to embedded solr server</li>
 * <li>add a document to the index</li>
 * <li>search in the index</li>
 * </ul>
 * 
 * This class implements singleton pattern in order to share index connection
 * and improve performance (Solr server is thread-safe).<br/>
 * The index buffer is managed by Solr so we just use a counter to commit
 * documents when buffer configured size is achieved.
 */

public class SolrComponent {


	private static SolrComponent singleton;


	public static final String SPRING_INDEX_CONF = "IndexerBean.xml";


	public static final String DEFAULT_HOME = "/solr";


	public static final int MINI_TEXT_SIZE = 3;


	public static final int BUFFER_DEFAULT_SIZE = 50;


	public static String home = SolrComponent.DEFAULT_HOME;


	private final SolrIndexerConfig indexerConfig;


	private final SolrSearcherConfig searcherConfig;


	private EmbeddedSolrServer server;


	/**
	 * List of entities stored into index
	 */
	private final Map<String, EntityField> storedEntities;


	private final int bufferSize;


	private volatile int docInBufferCounter = 0;


	private static final SimpleDateFormat solrFormat = new SimpleDateFormat("yyyy-MM-dd");


	private final static Log logger = LogFactory.getLog(SolrComponent.class);


	private CoreContainer container;


	/**
	 * Retrive singleton depending on spring configuration file and bean name
	 * 
	 * @param springConfFile
	 * @param solrHome
	 *            name
	 * @param beanName
	 * @return the <code>SolrComponent</solr> singleton
	 */
	public static synchronized SolrComponent getInstance(final String springConfFile, final String solrHome, final String beanName) {
		SolrComponent.home = solrHome;
		if (SolrComponent.singleton == null) {
			SolrComponent.singleton = new SolrComponent(springConfFile, beanName);
		}
		return SolrComponent.singleton;
	}


	/**
	 * Retrive singleton depending on spring configuration file and bean name
	 * 
	 * @param springConfFile
	 * @param beanName
	 * @return the <code>SolrComponent</solr> singleton
	 */
	public static synchronized SolrComponent getInstance(final String springConfFile, final String beanName) {
		if (SolrComponent.singleton == null) {
			SolrComponent.singleton = new SolrComponent(springConfFile, beanName);
		}
		return SolrComponent.singleton;
	}


	/**
	 * Retrieve default singleton (using default bean configuration)
	 * 
	 * @return the <code>SolrComponent</solr> singleton
	 */
	public static SolrComponent getInstance() {
		return SolrComponent.getInstance(SolrComponent.SPRING_INDEX_CONF, IndexerConfig.DEFAULT_BEAN_NAME);
	}



	/**
	 * Allows to retrieve the format of date used by SolR
	 * 
	 * @return the date format
	 */
	public static SimpleDateFormat getSolrFormat() {
		return SolrComponent.solrFormat;
	}


	/**
	 * 
	 * @param springConfFile
	 * @param beanName
	 */
	private SolrComponent(final String springConfFile, final String beanName) {

		this.indexerConfig = BeanHelper.getInstance().getSpecificInstance(springConfFile, true).getBean(beanName, SolrIndexerConfig.class);

		this.searcherConfig = BeanHelper.getInstance().getSpecificInstance(SolrSearcherConfig.DEFAULT_CONF_FILE, true)
				.getBean(SearcherConfig.DEFAULT_BEAN_NAME, SolrSearcherConfig.class);

		this.bufferSize = this.indexerConfig.getMinBufferSize() != 0 ? this.indexerConfig.getMinBufferSize() : SolrComponent.BUFFER_DEFAULT_SIZE;

		this.storedEntities = new HashMap<String, EntityField>();
		for (final Entry<String, EntityField> entityField : this.indexerConfig.getEntityFields().entrySet()) {
			for (final String entityType : entityField.getValue().getEntityTypes()) {
				this.storedEntities.put(entityType, entityField.getValue());
			}
		}
	}


	public void open(final String appPath) {
		this.open(appPath, this.indexerConfig.getSolrCoreName());
	}


	/**
	 * Open method is synchronized in order to open only one embedded Solr
	 * server instance
	 */
	public synchronized void open(final String appPath, final String core) {
		if (this.server == null) {
			SolrComponent.logger.info("Openning SOLR server...");

			final File theHome = new File(appPath + SolrComponent.home);
			if (!theHome.exists() || !theHome.isDirectory()) {
				throw new WebLabUncheckedException("SOLR home [" + theHome + "]does not exists or is not a directory.");
			}

			final File f = new File(theHome, "solr.xml");
			if (!f.exists() || f.isDirectory()) {
				throw new WebLabUncheckedException("SOLR core configuration file [" + f + "]does not exists or is a directory.");
			}

			this.container = new CoreContainer();
			try {
				this.container.load(appPath + SolrComponent.home, f);
			} catch (final ParserConfigurationException e) {
				throw new WebLabUncheckedException("Cannot initialize the SOLR server due to erros in configuration file", e);
			} catch (final IOException e) {
				throw new WebLabUncheckedException("Cannot initialize the SOLR server due to IO error", e);
			} catch (final SAXException e) {
				throw new WebLabUncheckedException("Cannot initialize the SOLR server due to XML parser error", e);
			}

			this.server = new EmbeddedSolrServer(this.container, core);

			SolrComponent.logger.info("SOLR server ready for battle.");
		}
	}


	/**
	 * Add a Weblab Resource to Solr index
	 * 
	 * @param toBeIndexed
	 * @throws WebLabCheckedException
	 */
	public void addDocument(final Resource toBeIndexed) throws WebLabCheckedException {
		if (toBeIndexed == null) {
			throw new WebLabCheckedException("Input Document is null.");
		}

		SolrComponent.logger.info("Indexing Resource [" + toBeIndexed.getUri() + "]...");

		ResourceHelper helper = null;
		final SolrInputDocument doc = new SolrInputDocument();

		try {
			new URI(toBeIndexed.getUri());
		} catch (final URISyntaxException urise) {
			throw new WebLabCheckedException("Resource to index does not have valid URI.", urise);
		}

		doc.addField("id", toBeIndexed.getUri());

		for (final Entry<String, Field> entry : this.indexerConfig.getFields().entrySet()) {

			final Field field = entry.getValue();
			final String fieldName = field.getName();

			if (field.isIndexTextMediaUnit()) {
				doc.addField(fieldName, SolrComponent.extractTextFromResource(toBeIndexed));
			}

			if ((field.getProperties() != null) && (field.getProperties().size() > 0)) {
				if (helper == null) {
					helper = new JenaResourceHelper(toBeIndexed);
				}

				final List<String> predList = new ArrayList<String>();
				for (final String property : field.getProperties()) {
					predList.addAll(helper.getLitsOnPredSubj(toBeIndexed.getUri(), property));
					predList.addAll(helper.getRessOnPredSubj(toBeIndexed.getUri(), property));
				}
				boolean maxNbOfDatesAchieved = false;
				for (final String p : predList) {
					if (field.getType().equals("date") && !maxNbOfDatesAchieved) {
						// convert date to Solr format
						try {
							final Date date = DateUtils.parseDate(p, IndexerConfig.DATE_FORMATS);
							final String solrDate = SolrComponent.solrFormat.format(date);
							// DateFormatUtils.ISO_DATETIME_FORMAT.format(date)
							// + 'Z';
							doc.addField(fieldName, solrDate);
							maxNbOfDatesAchieved = true;
						} catch (final ParseException pe) {
							SolrComponent.logger.debug("Unable to parse date : " + p);
						}
					} else {
						doc.addField(fieldName, p);
						SolrComponent.logger.debug("add field: " + fieldName + " = " + p);
					}
				}
			}
		}


		/*
		 * Extract from the document entityTypes to read.
		 */
		final SimpleSelector selector = RDFSelectorFactory.getSelector();
		selector.limitToFirstLevelAnnotation(false);
		final Results stats = selector.select(toBeIndexed);

		// List<LinearSegment> linearSegmentList = ResourceUtil.getSelectedSubResources(toBeIndexed, LinearSegment.class);

		final List<MediaUnit> mediaUnitList = ResourceUtil.getSelectedSubResources(toBeIndexed, MediaUnit.class);

		final List<LinearSegment> linearSegmentList = new ArrayList<LinearSegment>();

		for (final MediaUnit mediaUnit : mediaUnitList) {
			for (final Segment seg : mediaUnit.getSegment()) {
				if (seg instanceof LinearSegment) {
					linearSegmentList.add((LinearSegment) seg);
				}
			}
		}

		for (final LinearSegment seg : linearSegmentList) {
			for (final String entityUri : stats.getTypedValues(seg.getUri(), WebLabProcessing.REFERS_TO, String.class)) {
				final HashSet<String> types = new HashSet<String>(stats.getTypedValues(entityUri, RDF.TYPE, String.class));

				// Retains all stored entities
				types.retainAll(this.storedEntities.keySet());
				for (final String type : types) {

					for (final Entry<String, EntityField> entityField : this.indexerConfig.getEntityFields().entrySet()) {
						if (entityField.getValue().getEntityTypes().contains(type)) {
							for (final String labelUri : entityField.getValue().getLabelProperties()) {
								final HashSet<String> labels = new HashSet<String>(stats.getTypedValues(entityUri, labelUri, String.class));
								if (labels.size() > 1) {
									SolrComponent.logger.warn("More than one label found for entity " + entityUri + ", use first.");
								}
								if (labels.size() <= 0) {
									SolrComponent.logger.warn("No labels found for entity " + entityUri + ", will not be stored.");
								} else {
									final String fieldName = this.storedEntities.get(type).getName();
									final String label = labels.iterator().next();
									SolrComponent.logger.debug("Add entity field : " + fieldName + " = " + label);
									doc.addField(fieldName, label);
									break;
								}
							}
						}
					}
				}
			}
		}

		/*------------------------------------------------------------------------------*/

		for (final Entry<String, EntityField> entityField : this.indexerConfig.getEntityFields().entrySet()) {
			for (final String entityType : entityField.getValue().getEntityTypes()) {

				if (helper == null) {
					helper = new JenaResourceHelper(toBeIndexed);
				}

				final List<String> predList = new ArrayList<String>();
				predList.addAll(helper.getLitsOnPredSubj(toBeIndexed.getUri(), entityType));
				predList.addAll(helper.getRessOnPredSubj(toBeIndexed.getUri(), entityType));


				for (final String p : predList) {
					doc.addField(entityField.getValue().getName(), p);
					SolrComponent.logger.debug("Add entity field : " + entityField.getValue().getName() + " = " + p);
				}
			}
		}
		/*------------------------------------------------------------------------------*/




		/*
		 * This block is synchronized to prevent adding documents during index
		 * flush.
		 */
		synchronized (this) {
			try {
				this.server.add(doc);
				this.docInBufferCounter++;
				SolrComponent.logger.info("Resource [" + toBeIndexed.getUri() + "] added to the indexing buffer.");
			} catch (final IOException e) {
				throw new WebLabCheckedException("I/O access error when adding documents", e);
			} catch (final SolrServerException e) {
				throw new WebLabCheckedException("Server error while adding documents", e);
			}

			if (this.docInBufferCounter >= this.bufferSize) {
				this.flushIndexBuffer();
			}
		}
	}



	public static String extractTextFromResource(final Resource res) {

		final StringBuffer concatenationOfText = new StringBuffer();
		if (res instanceof Document) {
			final List<Text> textList = ResourceUtil.getSelectedSubResources(res, Text.class);

			for (final Text t : textList) {
				if ((t.getContent() != null) && (t.getContent().length() > SolrComponent.MINI_TEXT_SIZE)) {
					concatenationOfText.append(t.getContent().trim() + "\n");
				}
			}
		} else if (res instanceof Text) {
			final Text t = (Text) res;
			if ((t.getContent() != null) && (t.getContent().length() > SolrComponent.MINI_TEXT_SIZE)) {
				concatenationOfText.append(t.getContent().trim() + "\n");
			}
		}
		return concatenationOfText.toString();
	}


	/**
	 * Close method only flush and optimize index.
	 */
	public synchronized void close() {
		if ((this.server != null) && (this.container != null)) {
			try {
				SolrComponent.logger.info("Closing SOLR server...");
				if (this.docInBufferCounter > 0) {
					this.flushIndexBuffer();
				}
				this.server.optimize();
			} catch (final WebLabCheckedException e) {
				SolrComponent.logger.error("Cannot flush the index indexing buffer properly.", e);
				throw new WebLabUncheckedException("Cannot flush the index indexing buffer properly.", e);
			} catch (final IOException e) {
				throw new WebLabUncheckedException("I/O access error while optimizing the index.", e);
			} catch (final SolrServerException e) {
				SolrComponent.logger.warn("Cannot optimize the index properly.", e);
			} finally {
				this.container.shutdown();
				this.container = null;
				this.server = null;
				SolrComponent.logger.info("SOLR server closed.");
			}
		}
	}


	/**
	 * Commit documents in Solr Index
	 * 
	 * @throws WebLabCheckedException
	 */
	public void flushIndexBuffer() throws WebLabCheckedException {
		try {
			this.server.commit();
			this.docInBufferCounter = 0;
			SolrComponent.logger.info("Indexing buffer flushed.");
		} catch (final SolrServerException e) {
			throw new WebLabCheckedException("Server error while adding documents", e);
		} catch (final IOException e) {
			throw new WebLabCheckedException("I/O access error when adding documents", e);
		}
	}


	/**
	 * Return buffer size
	 * 
	 * @return the buffer size
	 */
	public int getBufferSize() {
		return this.bufferSize;
	}


	/**
	 * Search method query solr server with
	 * <ul>
	 * <li>string of the query</li>
	 * <li>offset</li>
	 * <li>limit</li>
	 * <li>request handler</li>
	 * </ul>
	 * 2 request handlers are used. The first ("weblab_with_meta") retrieve all
	 * fields indexed and the second ("weblab") retrieve only doc id and score.
	 * 
	 * @param queryString
	 * @param offset
	 * @param limit
	 * @return the Solr <code>QueryResponse</code>
	 * @throws WebLabCheckedException
	 */
	public QueryResponse search(final String queryString, final int offset, final int limit) throws WebLabCheckedException {
		QueryResponse response;
		if (!queryString.isEmpty()) {
			final SolrQuery query = new SolrQuery();
			query.setQuery(queryString);
			// query.setSortField("date", ORDER.desc);
			query.setParam("start", String.valueOf(offset));

			query.setParam("rows", String.valueOf(limit));
			// System.out.println("query.Start : "+query.getStart());
			// System.out.println("query.Rows : "+query.getRows());


			// Add facet fields
			if (this.searcherConfig.getFacetFields().size() > 0) {
				query.setFacet(true);
				query.setFacetMinCount(this.searcherConfig.getFacetMinCount());
				query.setFacetLimit(this.searcherConfig.getFacetLimitNumber());
				for (final String facetFieldName : this.searcherConfig.getFacetFields()) {
					query.addFacetField(facetFieldName);
				}
			}

			// Set request handler (with or without all meta)
			if (this.searcherConfig.isHitsEnrichedWithMetas()) {
				query.setParam("qt", "weblab_with_meta");
			} else {
				query.setParam("qt", "weblab");
			}

			// Send query to index
			try {
				response = this.server.query(query);

			} catch (final SolrServerException sse) {
				SolrComponent.logger.error("Cannot post search request", sse);
				throw new WebLabCheckedException("Cannot post search request", sse);
			}
		} else {
			response = new QueryResponse();
		}

		return response;
	}


	/**
	 * MoreLikeThis is a specific Solr query used to retrieve similar documents
	 * from a document id or text. Source, title and text fields are used to
	 * find similar documents.
	 * 
	 * @param queryString
	 *            the reference text or document to find similar docs (id:"doc1"
	 *            for example)
	 * @return the Solr <code>QueryResponse</code> with document list
	 * @throws WebLabCheckedException
	 */
	public QueryResponse moreLikeThis(final String queryString) throws WebLabCheckedException {
		QueryResponse response;
		if (!queryString.isEmpty()) {
			final SolrQuery query = new SolrQuery();

			query.setQueryType(MoreLikeThisParams.MLT);
			query.set(MoreLikeThisParams.MLT);
			query.set(MoreLikeThisParams.MATCH_INCLUDE, false);
			query.set(MoreLikeThisParams.MIN_DOC_FREQ, 1);
			query.set(MoreLikeThisParams.MIN_TERM_FREQ, 1);
			query.set(MoreLikeThisParams.SIMILARITY_FIELDS, "source,title,text");
			query.setQuery(queryString);

			// Send query to index
			try {
				response = this.server.query(query);
			} catch (final SolrServerException sse) {
				SolrComponent.logger.error("Cannot post search request", sse);
				throw new WebLabCheckedException("Cannot post search request", sse);
			}
		} else {
			response = new QueryResponse();
		}

		return response;
	}



	public static void setHome(final String home) {
		SolrComponent.home = home;
	}


	public static String getHome() {
		return SolrComponent.home;
	}

}
