package org.ow2.weblab.service.transcript.sphinx;

import java.io.File;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;

import edu.cmu.sphinx.decoder.search.Token;
import edu.cmu.sphinx.frontend.util.AudioFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;

public class SphinxTranscriptor {

	protected static Log LOG = LogFactory.getLog(SphinxTranscriptor.class);

	protected Recognizer recognizer;
	protected AudioFileDataSource audioFileDataSource;
	protected String xmlConfigPath;

	/**
	 * Initialise a sphinx transciptor using XML config file. Recognizer must be
	 * named 'recognizer' and AudioFileDataSource must be named
	 * 'audioFileDataSource'
	 * 
	 * @param xmlConfigPath
	 */
	public SphinxTranscriptor(String xmlConfigPath) {
		super();
		LOG.info("Instanciating sphinx from " + xmlConfigPath);
		this.xmlConfigPath = xmlConfigPath;
		ConfigurationManager cm = new ConfigurationManager(xmlConfigPath);
		this.recognizer = (Recognizer) cm.lookup("recognizer");
		this.recognizer.allocate();
		this.audioFileDataSource = (AudioFileDataSource) cm
				.lookup("audioFileDataSource");

		LOG.info("Recognizer instanciated.");

	}

	/**
	 * Transcript an audio file into a list of Sphinx Token (to be able to
	 * retain any available information on the tokens)
	 * 
	 * @param audioFile
	 *            the file to be transcript
	 * @return the list of Sphinx Token
	 * @throws WebLabCheckedException
	 *             when unable to transcript something
	 */
	public synchronized List<Token> transcript(File audioFile) {
		long begin = System.currentTimeMillis();
		List<Token> returnedTokenList = new LinkedList<Token>();

		/*
		 * setting the audio file to be transcripted
		 */
		LOG.info("Begin of transcript on: " + audioFile);
		audioFileDataSource.setAudioFile(audioFile, audioFile.getName());

		/*
		 * create the return token list
		 */
		Result result = recognizer.recognize();
		if (result == null) {
			/*
			 * unable to get any transcript
			 */
			LOG.warn("No transcript extract from: " + audioFile);
			return returnedTokenList;
		}

		while (result != null) {
			if (LOG.isDebugEnabled()) {
				LOG.debug("transcript: "
						+ result.getTimedBestResult(false, true));
			}
			returnedTokenList.add(result.getBestToken());

			/*
			 * switch to next recognize result
			 */
			result = recognizer.recognize();
		}
		if (LOG.isDebugEnabled()) {
			LOG.debug("Transcript time for file " + audioFile + ": "
					+ (System.currentTimeMillis() - begin) + " ms");
		}

		return returnedTokenList;
	}

	public String toString() {
		return "SphinxTranscriptor[" + this.xmlConfigPath + "]";
	}
}
