/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2011 Cassidian, an EADS company
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.service.normaliser.tika;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Calendar;

import javax.jws.WebService;
import javax.xml.bind.DatatypeConverter;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.ProfilingHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.TeeContentHandler;
import org.ow2.weblab.content.api.ContentManager;
import org.ow2.weblab.content.impl.FileContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.factory.AnnotationFactory;
import org.ow2.weblab.core.extended.ontologies.DCTerms;
import org.ow2.weblab.core.extended.ontologies.WebLabProcessing;
import org.ow2.weblab.core.extended.util.ResourceUtil;
import org.ow2.weblab.core.helper.impl.JenaPoKHelper;
import org.ow2.weblab.core.model.Annotation;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.core.services.Analyser;
import org.ow2.weblab.core.services.ContentNotAvailableException;
import org.ow2.weblab.core.services.InvalidParameterException;
import org.ow2.weblab.core.services.UnexpectedException;
import org.ow2.weblab.core.services.analyser.ProcessArgs;
import org.ow2.weblab.core.services.analyser.ProcessReturn;
import org.ow2.weblab.rdf.Value;
import org.ow2.weblab.service.normaliser.tika.handlers.WebLabHandlerDecorator;
import org.ow2.weblab.service.normaliser.tika.metadatawriter.MetadataWriter;
import org.purl.dc.elements.DublinCoreAnnotator;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/**
 * Tika extractor is quite simple since it does not handle with structure of
 * documents (sheets in Excel, paragraphs in Word, etc.) The structure might
 * have been represented as various MediaUnits.
 * 
 * @todo Rewrite the class comment which is not good... TODO
 */
@WebService(endpointInterface = "org.ow2.weblab.core.services.Analyser")
public class TikaExtractorService implements Analyser {

	/**
	 * The logger to be used inside this class.
	 */
	protected final Log logger;

	/**
	 * The <code>ContentManager</code> to use. Various implementation exists.
	 * They are defined through a configuration file.
	 */
	final protected ContentManager contentManager;

	/**
	 * The configuration to be used for the service.
	 */
	final protected TikaConfiguration serviceConfig;

	/**
	 * The configuration Tika by it self.
	 */
	final protected TikaConfig tikaConfig;

	/**
	 * Whether or not to remove content. Just a flag to prevent calculation on
	 * each process method call. True only and only if the reader of the content
	 * manager is not a file AND
	 */
	final protected boolean removeContent;

	/**
	 * The formatter used to annotate dates (like 2011-12-31)
	 */
	protected final DateFormat simpleDateFormat;

	/**
	 * MetadataWriter used
	 */
	protected MetadataWriter metadataWriter;

	/**
	 * The only constructor of this class that needs a configuration.
	 * 
	 * @param conf
	 *            The service configuration.
	 * 
	 * @throws IOException
	 *             If an error occurs accessing the tika configuration or
	 *             instanciating the content manager.
	 * @throws TikaException
	 *             If an error occurs reading the tika configuration.
	 */
	public TikaExtractorService(final TikaConfiguration conf)
			throws TikaException, IOException {
		this.logger = LogFactory.getLog(this.getClass());
		this.serviceConfig = conf;
		this.contentManager = ContentManager.getInstance();
		this.removeContent = (!(this.contentManager.getReader() instanceof FileContentManager))
				&& this.serviceConfig.isRemoveTempContent();
		this.simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");

		if (this.serviceConfig.getPathToXmlConfigurationFile() == null) {
			this.logger.debug(Messages
					.getString(Constants.KEY_DEBUG_DEFAULT_TIKA_CONFIG));
			this.tikaConfig = new TikaConfig();
		} else {
			try {
				this.tikaConfig = new TikaConfig(this
						.getClass()
						.getClassLoader()
						.getResource(
								this.serviceConfig
										.getPathToXmlConfigurationFile()));
			} catch (SAXException e) {
				throw new IOException(e);
			}
		}

		if (this.contentManager == null) {
			this.logger
					.fatal(Messages
							.getString(Constants.KEY_ERROR_UNABLE_TO_LOAD_CONTENT_MANAGER));
			throw new IOException(
					Messages.getString(Constants.KEY_ERROR_UNABLE_TO_LOAD_CONTENT_MANAGER));
		}

		if (!(this.tikaConfig.getParser() instanceof CompositeParser)) {
			this.logger.warn(Messages.getString(
					Constants.KEY_WARN_NOT_A_COMPOSITE_PARSER_1,
					this.tikaConfig.getParser().getClass().getCanonicalName()));
		}

		try {
			metadataWriter = conf.getMetadataWriterClass().newInstance();
		} catch (InstantiationException e) {
			this.logger.fatal("Failed to initialize the metadataWriter field");
			throw new IOException(
					"Failed to initialize the metadataWriter field");

		} catch (IllegalAccessException e) {
			this.logger.fatal("Failed to initialize the metadataWriter field");
			throw new IOException(
					"Failed to initialize the metadataWriter field");
		}

		this.logger
				.info(Messages.getString(Constants.KEY_INFO_SERVICE_STARTED));
	}

	@Override
	public ProcessReturn process(final ProcessArgs args)
			throws InvalidParameterException, ContentNotAvailableException,
			UnexpectedException {
		this.logger.trace("Process method called.");

		// Check that the processArgs contains a document and return it.
		final Document document = this.checkArgs(args);

		this.logger.info(Messages.getString(
				Constants.KEY_INFO_PROCESS_DOCUMENT_1, document.getUri()));

		// Check that the document contains a file in content manager and return
		// it.
		final File file = this.getContent(document);

		// Feed the document with Text unit from file content and put metadata
		// in the map.
		Metadata extractedMeta_l = this.extractTextAndMetadata(document, file,
				false);

		// If no text unit are extracted from document, try to extract once
		// again but with the auto-detect parser.
		if (ResourceUtil.getSelectedSubResources(document, Text.class)
				.isEmpty()) {
			this.logger.warn(Messages.getString(
					Constants.KEY_WARN_NO_TEXT_FOUND_2, file.getAbsolutePath(),
					document.getUri()));
			extractedMeta_l = this.extractTextAndMetadata(document, file, true);
		}

		// Annotate the document with content of the map.
		if (this.serviceConfig.isAddMetadata()) {
			// Prepare annotation
			final Annotation annot = AnnotationFactory
					.createAndLinkAnnotation(document);
			final JenaPoKHelper ahe = new JenaPoKHelper(annot);
			ahe.setAutoCommitMode(false);
			boolean addDCTPrefix = false, addTikaPrefix = false, addWLPPrefix = false;

			try {
				metadataWriter.write(extractedMeta_l, ahe,
						new URI(document.getUri()));

				if (this.serviceConfig.getServiceUri() != null) {
					ahe.createResStat(annot.getUri(),
							WebLabProcessing.IS_PRODUCED_BY,
							this.serviceConfig.getServiceUri());
					ahe.createLitStat(annot.getUri(), DCTerms.CREATED,
							DatatypeConverter.printDateTime(Calendar
									.getInstance()));

					ahe.setNSPrefix(DCTerms.PREFERRED_PREFIX, DCTerms.NAMESPACE);
					ahe.setNSPrefix(WebLabProcessing.PREFERRED_PREFIX,
							WebLabProcessing.NAMESPACE);
				} else {
					if (addDCTPrefix) {
						ahe.setNSPrefix(DCTerms.PREFERRED_PREFIX,
								DCTerms.NAMESPACE);
					}
					if (addWLPPrefix) {
						ahe.setNSPrefix(WebLabProcessing.PREFERRED_PREFIX,
								WebLabProcessing.NAMESPACE);
					}
				}

				if (addTikaPrefix) {
					ahe.setNSPrefix(
							this.serviceConfig.getUnmappedPropertiesPrefix(),
							this.serviceConfig.getUnmappedPropertiesBaseUri());
				}

				ahe.commit();
			} catch (URISyntaxException e) {
				logger.error("Document URI is not a valid : "
						+ e.getLocalizedMessage());
			}
		}

		// Remove temporary content file if needed
		if (this.removeContent) {
			if (!file.delete()) {
				this.logger.warn(Messages.getString(
						Constants.KEY_WARN_UNABLE_TO_DELETE_TEMP_2,
						file.getAbsolutePath(), document.getUri()));
			}
		}

		// Create the return wrapper.
		final ProcessReturn pr = new ProcessReturn();
		pr.setResource(document);

		this.logger.info(Messages.getString(
				Constants.KEY_INFO_END_OF_PROCESS_1, document.getUri()));

		return pr;
	}

	/**
	 * Get the document inside the process args or throw an
	 * <code>InvalidParameterException</code> if not possible.
	 * 
	 * @param args
	 *            The <code>ProcessArgs</code> of the process method.
	 * @return The <code>Document</code> that must be contained by
	 *         <code>args</code>.
	 * @throws InvalidParameterException
	 *             If <code>resource</code> in <code>args</code> is
	 *             <code>null</code> or not a <code>Document</code>.
	 */
	protected Document checkArgs(final ProcessArgs args)
			throws InvalidParameterException {
		if (args == null) {
			final String err = Messages
					.getString(Constants.KEY_ERROR_PROCESSARGS_NULL);
			this.logger.error(err);
			throw new InvalidParameterException(err,
					Messages.getString(Constants.KEY_ERROR_INVALID_PARAM));
		}
		final Resource res = args.getResource();
		if (res == null) {
			final String err = Messages
					.getString(Constants.KEY_ERROR_RESOURCE_NULL);
			this.logger.error(err);
			throw new InvalidParameterException(err,
					Messages.getString(Constants.KEY_ERROR_INVALID_PARAM));
		}
		if (!(res instanceof Document)) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_NOT_A_DOCUMENT_2, res.getUri(), res
							.getClass().getCanonicalName());
			this.logger.error(err);
			throw new InvalidParameterException(err,
					Messages.getString(Constants.KEY_ERROR_INVALID_PARAM));
		}
		return (Document) res;
	}

	/**
	 * Uses the content manager to retrieve the normalised content of the
	 * document in input.
	 * 
	 * @param document
	 *            The document that must contains an hasNativeContentProperty
	 * @return The file, either a temp one (if the Content is managed remotely)
	 *         or the real if is in local.
	 * @throws ContentNotAvailableException
	 *             If the ContentManager fails or if the file does not exist or
	 *             is not accessible.
	 */
	private File getContent(final Document document)
			throws ContentNotAvailableException {
		final File file;
		try {
			file = this.contentManager.readNativeContent(document);
		} catch (final WebLabCheckedException wlce) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_1,
					document.getUri());
			this.logger.error(err, wlce);
			throw new ContentNotAvailableException(
					err,
					Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE),
					wlce);
		}
		if (!file.exists()) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_CONTENT_FILE_NOT_FOUND_2,
					file.getAbsolutePath(), document.getUri());
			this.logger.error(err);
			throw new ContentNotAvailableException(
					err,
					Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE));
		}
		if (!file.canRead()) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_CONTENT_FILE_NOT_READABLE_2,
					file.getAbsolutePath(), document.getUri());
			this.logger.error(err);
			throw new ContentNotAvailableException(
					err,
					Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE));
		}
		return file;
	}

	/**
	 * @param document
	 *            The document to be fill with MediaUnit units
	 * @param contentFile
	 *            The file to be parsed
	 * @param forceAutoDetectParser
	 *            Whether to let Tika guess the parser to use from file content
	 *            or use existing mimeType on the document (dc:format) to select
	 *            the appropriated parser.
	 * 
	 * @throws UnexpectedException
	 *             If the Tika parser fails.
	 * @throws ContentNotAvailableException
	 *             If the file is not reachable. (This should not appear this
	 *             its access has been checked before)
	 */
	protected Metadata extractTextAndMetadata(final Document document,
			final File contentFile, final boolean forceAutoDetectParser)
			throws UnexpectedException, ContentNotAvailableException {

		/*
		 * Try to get mimeType in media unit if forceAutoDetectParser condition
		 * is false
		 */
		final String mimeType;
		if (forceAutoDetectParser) {
			mimeType = null;
		} else {
			final Value<String> format = new DublinCoreAnnotator(document)
					.readFormat();
			if ((format != null) && format.hasValue()) {
				mimeType = format.firstTypedValue();
				if (format.getValues().size() > 1) {
					this.logger.warn(Messages.getString(
							Constants.KEY_WARN_MORE_THAN_ONE_TYPE_2,
							document.getUri(), mimeType));
				}
			} else {
				mimeType = null;
			}
			this.logger.debug("Mime type detected in Resource: " + mimeType);
		}

		/*
		 * If the mime type not defined, uses the auto-detect parser. Otherwise,
		 * look up in the Tika config to get the appropriated parser.
		 */
		final Parser parser;
		if (mimeType == null) {
			parser = new AutoDetectParser(this.tikaConfig);
		} else if (this.tikaConfig.getParser() instanceof CompositeParser) {
			final CompositeParser composite = (CompositeParser) this.tikaConfig
					.getParser();
			final MediaType mediaType = MediaType.parse(mimeType);
			if (composite.getParsers().containsKey(mediaType)) {
				parser = composite.getParsers().get(mediaType);
			} else {
				this.logger.debug("No parser for type " + mediaType
						+ " let Tika guess type.");
				parser = new AutoDetectParser(this.tikaConfig);
			}
		} else {
			parser = this.tikaConfig.getParser();
			// The Parser in the configuration is not composite. That's weird?
			// We only parser one type of file?
			this.logger
					.debug("Tika Config does not use an AutodetectParser but a "
							+ parser.getClass().getCanonicalName() + ".");
		}

		// The handler that will guess language in the document
		final ProfilingHandler langGuesser = new ProfilingHandler();

		/*
		 * Create an xhtmlOutput file in the temp directory (even if not used).
		 * The variable generateHtml is used to keep track on errors. Will be
		 * true only at the end of the method only if the content has been
		 * generated.
		 */
		boolean generateHtml = this.serviceConfig.isGenerateHtml();
		File xhtmlOutputFile;
		try {
			xhtmlOutputFile = File.createTempFile("tika", ".xhtml");
		} catch (final IOException ioe) {
			this.logger.warn(Messages.getString(
					Constants.KEY_WARN_UNABLE_TO_CREATE_TEMP_FILE_1,
					document.getUri()), ioe);
			xhtmlOutputFile = new File(FileUtils.getTempDirectory(), "noFile");
			generateHtml = false;
		}

		/*
		 * Create the appropriated handler (or tee handler) depending on the
		 * things needed. --> MediaUnit + Language guesser + Normalised content
		 * generator --> MediaUnit + Normalised content generator --> MediaUnit
		 * + Language guesser --> MediaUnit only.
		 * 
		 * If an error occurs creating the transformer for normalised content
		 * generator, it is just skipped and the generateHtml variable is set to
		 * false to prevent use of an empty content.
		 */
		ContentHandler handler;
		if (this.serviceConfig.isAddMetadata()
				&& this.serviceConfig.isAnnotateDocumentWithLang()
				&& generateHtml) {
			this.logger
					.trace("Create a TeeContentHandler for language guesser, MediaUnit creation and XHTML output creation.");
			try {
				handler = new TeeContentHandler(
						this.getMUCreatorCHandler(document), langGuesser,
						this.getHtmlCreatorCHandler(xhtmlOutputFile));
			} catch (final TransformerConfigurationException tce) {
				this.logger.warn(Messages.getString(
						Constants.KEY_WARN_UNABLE_TO_CREATE_TRANSFORMER_1,
						document.getUri()), tce);
				generateHtml = false;
				handler = new TeeContentHandler(
						this.getMUCreatorCHandler(document), langGuesser);
			}
		} else if (generateHtml) {
			this.logger
					.trace("Create a TeeContentHandler for MediaUnit creation and XHTML output creation.");
			try {
				handler = new TeeContentHandler(
						this.getMUCreatorCHandler(document),
						this.getHtmlCreatorCHandler(xhtmlOutputFile));
			} catch (final TransformerConfigurationException tce) {
				this.logger.warn(Messages.getString(
						Constants.KEY_WARN_UNABLE_TO_CREATE_TRANSFORMER_1,
						document.getUri()), tce);
				generateHtml = false;
				handler = new TeeContentHandler(
						this.getMUCreatorCHandler(document));
			}
		} else if (this.serviceConfig.isAddMetadata()
				&& this.serviceConfig.isAnnotateDocumentWithLang()) {
			this.logger
					.trace("Create a TeeContentHandler for language guesser and MediaUnit creation.");
			handler = new TeeContentHandler(
					this.getMUCreatorCHandler(document), langGuesser);
		} else {
			this.logger
					.trace("Create a ContentHandler for MediaUnit creation.");
			handler = this.getMUCreatorCHandler(document);
		}

		// The metadata object to be filled by Tika parser.
		final Metadata metadata = new Metadata();

		// The parsecontext
		final ParseContext context = new ParseContext();

		// The inputstream on the content to parse
		final InputStream stream;
		try {
			stream = new FileInputStream(contentFile);
		} catch (final FileNotFoundException fnfe) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_CONTENT_FILE_NOT_FOUND_2,
					contentFile.getAbsolutePath(), document.getUri());
			this.logger.error(err);
			throw new ContentNotAvailableException(
					err,
					Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE));
		}

		this.logger.debug("Start parsing " + contentFile.getPath()
				+ " for document " + document.getUri() + ".");
		try {
			parser.parse(stream, handler, metadata, context);
		} catch (final IOException ioe) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_IOE_ON_CONTENT_2,
					contentFile.getPath(), document.getUri());
			this.logger.error(err, ioe);
			throw new UnexpectedException(
					err,
					Messages.getString(Constants.KEY_ERROR_IOE_ON_CONTENT_SIMPLE),
					ioe);
		} catch (final SAXException saxe) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_SAXE_ON_CONTENT_2,
					contentFile.getPath(), document.getUri());
			this.logger.error(err, saxe);
			throw new UnexpectedException(
					err,
					Messages.getString(Constants.KEY_ERROR_ERROR_ON_CONTENT_SIMPLE),
					saxe);
		} catch (final TikaException te) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_TIKA_EX_ON_CONTENT_2,
					contentFile.getPath(), document.getUri());
			this.logger.error(err, te);
			throw new UnexpectedException(
					err,
					Messages.getString(Constants.KEY_ERROR_ERROR_ON_CONTENT_SIMPLE),
					te);
		} finally {
			IOUtils.closeQuietly(stream);
		}
		this.logger.debug("Finished parsing " + contentFile.getPath()
				+ " for document " + document.getUri() + ".");

		/*
		 * If the language identification is enabled and certain enough, add the
		 * language to metadata
		 */
		if (this.serviceConfig.isAddMetadata()
				&& this.serviceConfig.isAnnotateDocumentWithLang()
				&& langGuesser.getLanguage().isReasonablyCertain()) {
			metadata.set(org.apache.tika.metadata.DublinCore.LANGUAGE,
					langGuesser.getLanguage().getLanguage());
		} else if (this.serviceConfig.isAnnotateDocumentWithLang()
				&& (this.serviceConfig.getDefaultLang() != null)) {
			metadata.set(org.apache.tika.metadata.DublinCore.LANGUAGE,
					this.serviceConfig.getDefaultLang());
		}

		if (generateHtml) {
			if (!xhtmlOutputFile.exists()) {
				this.logger.warn(Messages.getString(
						Constants.KEY_WARN_NO_OUTPUT_FILE_2,
						xhtmlOutputFile.getPath(), document.getUri()));
			} else if (FileUtils.sizeOf(xhtmlOutputFile) <= 0) {
				this.logger.warn(Messages.getString(
						Constants.KEY_WARN_EMPTY_OUTPUT_FILE_2,
						xhtmlOutputFile.getPath(), document.getUri()));
			} else {
				final FileInputStream fis;
				try {
					fis = new FileInputStream(xhtmlOutputFile);
					this.logger.debug("Save normalised content file: "
							+ xhtmlOutputFile);
					try {
						this.contentManager.writeNormalisedContent(fis,
								document);
					} catch (final WebLabCheckedException wlce) {
						this.logger.warn(Messages.getString(
								Constants.KEY_WARN_ERROR_SAVING_NORMALISED_2,
								xhtmlOutputFile.getPath(), document.getUri()),
								wlce);
					} finally {
						IOUtils.closeQuietly(fis);
					}
				} catch (final FileNotFoundException fnfe) {
					this.logger
							.warn(Messages.getString(
									Constants.KEY_WARN_NO_OUTPUT_FILE_2,
									xhtmlOutputFile.getPath(),
									document.getUri()), fnfe);
				}
			}
		}

		FileUtils.deleteQuietly(xhtmlOutputFile);

		// Convert Tika metadata into WebLab RDF if annotations are needed.
		if (this.serviceConfig.isAddMetadata()) {
			return metadata;
		}

		return new Metadata();
	}

	/**
	 * Creates a new MediaUnit content Handler, with a HTML Body handler inside.
	 * 
	 * @param document
	 *            The document to be enriched with mediaUnits
	 * @return The MediaUnitContent Handler
	 * @throws UnexpectedException
	 */
	private WebLabHandlerDecorator getMUCreatorCHandler(final Document document)
			throws UnexpectedException {
		WebLabHandlerDecorator wlhd;
		try {
			wlhd = this.serviceConfig.getWebLabHandlerDecoratorClass()
					.newInstance();
		} catch (final InstantiationException ie) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_BAD_HANDLER_1, this.serviceConfig
							.getWebLabHandlerDecoratorClass()
							.getCanonicalName());
			this.logger.error(err, ie);
			throw new UnexpectedException(err, err, ie);
		} catch (final IllegalAccessException iae) {
			final String err = Messages.getString(
					Constants.KEY_ERROR_BAD_HANDLER_1, this.serviceConfig
							.getWebLabHandlerDecoratorClass()
							.getCanonicalName());
			this.logger.error(err, iae);
			throw new UnexpectedException(err, err, iae);
		}
		wlhd.setDocument(document);
		wlhd.setTikaConfiguration(this.serviceConfig);
		wlhd.setContentHandler(new BodyContentHandler(-1));
		return wlhd;
	}

	/**
	 * Creates an handler in charge of writing the XHTML events into the
	 * xhtmlFile.
	 * 
	 * @param xhtmlFile
	 *            The file in which the XHTML should be written.
	 * @return A ContentHandler that writes into the file
	 * @throws TransformerConfigurationException
	 *             If the content handler cannot be created.
	 */
	private ContentHandler getHtmlCreatorCHandler(final File xhtmlFile)
			throws TransformerConfigurationException {
		final SAXTransformerFactory factory = (SAXTransformerFactory) TransformerFactory
				.newInstance();
		final TransformerHandler handler;
		handler = factory.newTransformerHandler();
		handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
		handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
		handler.setResult(new StreamResult(xhtmlFile));
		return handler;
	}
}
