/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2009 EADS DEFENCE AND SECURITY SYSTEMS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.solr;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map.Entry;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.lang.time.DateUtils;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.core.CoreContainer;
import org.ow2.weblab.services.indexer.impl.SolrIndexerConfig;
import org.ow2.weblab.services.searcher.impl.SolrSearcherConfig;
import org.ow2.weblab.util.index.Field;
import org.ow2.weblab.util.index.IndexerConfig;
import org.ow2.weblab.util.search.SearcherConfig;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.exception.WebLabUncheckedException;
import org.weblab_project.core.helper.BeanHelper;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.helper.ResourceHelper;
import org.weblab_project.core.model.ComposedUnit;
import org.weblab_project.core.model.Resource;
import org.weblab_project.core.model.text.Text;
import org.weblab_project.core.util.ComposedUnitUtil;
import org.xml.sax.SAXException;

/**
 * Component used to :
 * <ul>
 * <li>open a connection to embedded solr server</li>
 * <li>add a document to the index</li>
 * <li>search in the index</li>
 * </ul>
 * 
 * This class implements singleton pattern in order to share index connection and improve performance (Solr server is
 * thread-safe).<br/>
 * The index buffer is managed by Solr so we just use a counter to commit documents when buffer configured size is
 * achieved.
 */

public class SolrComponent {

	private static SolrComponent singleton;

	public static final String SPRING_INDEX_CONF = "IndexerBean.xml";

	public static final String DEFAULT_HOME = "/solr";

	public static final int MINI_TEXT_SIZE = 3;
	public static final int BUFFER_DEFAULT_SIZE = 50;

	private SolrIndexerConfig indexerConfig;
	private SolrSearcherConfig searcherConfig;
	private EmbeddedSolrServer server;

	private int bufferSize;
	private volatile int docInBufferCounter = 0;

	private static final SimpleDateFormat solrFormat = new SimpleDateFormat("yyyy-MM-dd");

	/**
	 * Retrive singleton depending on spring configuration file and bean name
	 * 
	 * @param springConfFile
	 * @param beanName
	 * @return the <code>SolrComponent</solr> singleton
	 */
	public static synchronized SolrComponent getInstance(String springConfFile, String beanName) {
		if (singleton == null)
			singleton = new SolrComponent(springConfFile, beanName);
		return singleton;
	}

	/**
	 * Retrieve default singleton (using default bean configuration)
	 * 
	 * @return the <code>SolrComponent</solr> singleton
	 */
	public static SolrComponent getInstance() {
		return getInstance(SPRING_INDEX_CONF, IndexerConfig.DEFAULT_BEAN_NAME);
	}

	/**
	 * 
	 * @param springConfFile
	 * @param beanName
	 */
	private SolrComponent(String springConfFile, String beanName) {
		this.indexerConfig = BeanHelper.getInstance().getSpecificInstance(springConfFile, true).getBean(beanName,
				SolrIndexerConfig.class);
		this.searcherConfig = BeanHelper.getInstance().getSpecificInstance(SolrSearcherConfig.DEFAULT_CONF_FILE, true)
				.getBean(SearcherConfig.DEFAULT_BEAN_NAME, SolrSearcherConfig.class);
		this.bufferSize = this.indexerConfig.getMinBufferSize() != 0 ? this.indexerConfig.getMinBufferSize()
				: BUFFER_DEFAULT_SIZE;
	}

	public void open(String appPath) {
		this.open(appPath, this.indexerConfig.getSolrCoreName());
	}

	/**
	 * Open method is synchronized in order to open only one embedded Solr server instance
	 */
	public synchronized void open(String appPath, String core) {
		if (this.server == null) {
			LogFactory.getLog(SolrComponent.class).info("Openning SOLR server...");

			File home = new File(appPath + DEFAULT_HOME);
			if (!home.exists() || !home.isDirectory())
				throw new WebLabUncheckedException("SOLR home [" + home + "]does not exists or is not a directory.");

			File f = new File(home, "solr.xml");
			if (!f.exists() || f.isDirectory())
				throw new WebLabUncheckedException("SOLR core configuration file [" + f
						+ "]does not exists or is a directory.");

			CoreContainer container = new CoreContainer();
			try {
				container.load(appPath + DEFAULT_HOME, f);
			}
			catch (ParserConfigurationException e) {
				throw new WebLabUncheckedException(
						"Cannot initialize the SOLR server due to erros in configuration file", e);
			}
			catch (IOException e) {
				throw new WebLabUncheckedException("Cannot initialize the SOLR server due to IO error", e);
			}
			catch (SAXException e) {
				throw new WebLabUncheckedException("Cannot initialize the SOLR server due to XML parser error", e);
			}

			this.server = new EmbeddedSolrServer(container, core);

			LogFactory.getLog(SolrComponent.class).info("SOLR server ready for battle.");
		}
	}

	/**
	 * Add a Weblab Resource to Solr index
	 * 
	 * @param toBeIndexed
	 * @throws WebLabCheckedException
	 */
	public void addDocument(Resource toBeIndexed) throws WebLabCheckedException {
		if (toBeIndexed == null)
			throw new WebLabCheckedException("Input Document is null.");

		LogFactory.getLog(this.getClass()).info("Indexing Resource [" + toBeIndexed.getUri() + "]...");

		ResourceHelper helper = null;
		SolrInputDocument doc = new SolrInputDocument();

		try {
			new URI(toBeIndexed.getUri());
		}
		catch (URISyntaxException e) {
			throw new WebLabCheckedException("Resource to index does not have valid URI.", e);
		}

		doc.addField("id", toBeIndexed.getUri());

		for (Entry<String, Field> entry : this.indexerConfig.getFields().entrySet()) {

			Field field = entry.getValue();
			String fieldName = field.getName();

			if (field.isIndexTextMediaUnit()) {
				doc.addField(fieldName, SolrComponent.extractTextFromResource(toBeIndexed));
			}

			if (field.getProperties() != null && field.getProperties().size() > 0) {
				if (helper == null)
					helper = RDFHelperFactory.getResourceHelper(toBeIndexed);

				List<String> predList = new ArrayList<String>();
				for (String property : field.getProperties()) {
					predList.addAll(helper.getLitsOnPredSubj(toBeIndexed.getUri(), property));
					predList.addAll(helper.getRessOnPredSubj(toBeIndexed.getUri(), property));
				}
				boolean maxNbOfDatesAchieved = false;
				for (String p : predList) {
					if (field.getType().equals("date") && !maxNbOfDatesAchieved) {
						// convert date to Solr format
						try {
							final Date date = DateUtils.parseDate(p, IndexerConfig.DATE_FORMATS);
							String solrDate = solrFormat.format(date);
							// DateFormatUtils.ISO_DATETIME_FORMAT.format(date) + 'Z';
							doc.addField(fieldName, solrDate);
							maxNbOfDatesAchieved = true;
						}
						catch (final ParseException pe) {
							LogFactory.getLog(SolrComponent.class).debug("Unable to parse date : " + p);
						}
					}
					else {
						doc.addField(fieldName, p);
					}
				}
			}
		}
		/*
		 * This block is synchronized to prevent adding documents during index flush.
		 */
		synchronized (this) {
			try {
				this.server.add(doc);
				this.docInBufferCounter++;
				LogFactory.getLog(this.getClass()).info(
						"Resource [" + toBeIndexed.getUri() + "] added to the indexing buffer.");
			}
			catch (IOException e) {
				throw new WebLabCheckedException("I/O access error when adding documents", e);
			}
			catch (SolrServerException e) {
				throw new WebLabCheckedException("Server error while adding documents", e);
			}

			if (this.docInBufferCounter >= this.bufferSize)
				flushIndexBuffer();
		}
	}

	public static String extractTextFromResource(Resource res) {
		StringBuffer concatenationOfText = new StringBuffer();

		if (res instanceof ComposedUnit) {
			List<Text> textList = ComposedUnitUtil.getSelectedSubMediaUnits((ComposedUnit) res, Text.class);

			for (Text t : textList) {
				if (t.getContent() != null && t.getContent().length() > MINI_TEXT_SIZE)
					concatenationOfText.append(t.getContent().trim() + "\n");
			}
		}
		else if (res instanceof Text) {
			Text t = (Text) res;
			if (t.getContent() != null && t.getContent().length() > MINI_TEXT_SIZE)
				concatenationOfText.append(t.getContent().trim() + "\n");
		}

		return concatenationOfText.toString();
	}

	/**
	 * Close method only flush and optimize index.
	 */
	public synchronized void close() {
		try {
			LogFactory.getLog(this.getClass()).info("Closing SOLR server...");
			if (this.docInBufferCounter > 0)
				flushIndexBuffer();
		}
		catch (WebLabCheckedException e) {
			LogFactory.getLog(this.getClass()).error("Cannot flush the index indexing buffer properly.", e);
			throw new WebLabUncheckedException("Cannot flush the index indexing buffer properly.", e);
		}
		try {
			try {
				this.server.optimize();
			}
			catch (SolrServerException e) {
				throw new WebLabCheckedException("Server error while optimizing the index.", e);
			}
			catch (IOException e) {
				throw new WebLabCheckedException("I/O access error while optimizing the index.", e);
			}
			LogFactory.getLog(this.getClass()).info("SOLR server closed.");
		}
		catch (WebLabCheckedException e) {
			LogFactory.getLog(this.getClass()).warn("Cannot optimize the index properly.", e);
		}

	}

	/**
	 * Commit documents in Solr Index
	 * 
	 * @throws WebLabCheckedException
	 */
	public void flushIndexBuffer() throws WebLabCheckedException {
		try {
			this.server.commit();
			this.docInBufferCounter = 0;
			LogFactory.getLog(this.getClass()).info("Indexing buffer flushed.");
		}
		catch (SolrServerException e) {
			throw new WebLabCheckedException("Server error while adding documents", e);
		}
		catch (IOException e) {
			throw new WebLabCheckedException("I/O access error when adding documents", e);
		}
	}

	/**
	 * Return buffer size
	 * 
	 * @return the buffer size
	 */
	public int getBufferSize() {
		return this.bufferSize;
	}

	/**
	 * Search method query solr server with
	 * <ul>
	 * <li>string of the query</li>
	 * <li>offset</li>
	 * <li>limit</li>
	 * <li>request handler</li>
	 * </ul>
	 * 2 request handlers are used. The first ("weblab_with_meta") retrieve all fields indexed and the second ("weblab")
	 * retrieve only doc id and score.
	 * 
	 * @param queryString
	 * @param offset
	 * @param limit
	 * @return the Solr <code>QueryResponse</code>
	 * @throws WebLabCheckedException
	 */
	public QueryResponse search(String queryString, int offset, int limit) throws WebLabCheckedException {
		QueryResponse response;
		if (!queryString.isEmpty()) {
			SolrQuery query = new SolrQuery();
			query.setQuery(queryString);
//			query.setSortField("", ORDER.desc);
			query.setParam("start", String.valueOf(offset));
			query.setParam("rows", String.valueOf(limit));

			// Set request handler (with or without all meta)
			if (this.searcherConfig.isHitsEnrichedWithMetas())
				query.setParam("qt", "weblab_with_meta");
			else
				query.setParam("qt", "weblab");

			// Send query to index
			try {
				response = this.server.query(query);
			}
			catch (SolrServerException sse) {
				LogFactory.getLog(this.getClass()).error("Cannot post search request", sse);
				throw new WebLabCheckedException("Cannot post search request", sse);
			}
		}
		else {
			response = new QueryResponse();
		}

		return response;
	}

	/**
	 * MoreLikeThis is a specific Solr query used to retrieve similar documents from a document id or text.
	 * Source, title and text fields are used to find similar documents.
	 * 
	 * @param queryString the reference text or document to find similar docs (id:"doc1" for example)
	 * @return the Solr <code>QueryResponse</code> with document list
	 * @throws WebLabCheckedException
	 */
	public QueryResponse moreLikeThis(String queryString) throws WebLabCheckedException {
		QueryResponse response;
		if (!queryString.isEmpty()) {
			SolrQuery query = new SolrQuery();

			query.setQueryType(MoreLikeThisParams.MLT);
			query.set(MoreLikeThisParams.MLT);
			query.set(MoreLikeThisParams.MATCH_INCLUDE, false);
			query.set(MoreLikeThisParams.MIN_DOC_FREQ, 1);
			query.set(MoreLikeThisParams.MIN_TERM_FREQ, 1);
			query.set(MoreLikeThisParams.SIMILARITY_FIELDS, "source,title,text");
			query.setQuery(queryString);

			// Send query to index
			try {
				response = this.server.query(query);
			}
			catch (SolrServerException sse) {
				LogFactory.getLog(this.getClass()).error("Cannot post search request", sse);
				throw new WebLabCheckedException("Cannot post search request", sse);
			}
		}
		else {
			response = new QueryResponse();
		}

		return response;
	}
}
