/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2011 Cassidian, an EADS company
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.service.normaliser.tika.handlers;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;

import javax.imageio.ImageIO;
import javax.xml.bind.DatatypeConverter;

import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.language.ProfilingWriter;
import org.ow2.weblab.content.api.ContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.factory.AnnotationFactory;
import org.ow2.weblab.core.extended.factory.MediaUnitFactory;
import org.ow2.weblab.core.extended.ontologies.DCTerms;
import org.ow2.weblab.core.extended.ontologies.DublinCore;
import org.ow2.weblab.core.extended.ontologies.WebLabProcessing;
import org.ow2.weblab.core.helper.PoKHelper;
import org.ow2.weblab.core.helper.impl.JenaPoKHelper;
import org.ow2.weblab.core.model.Annotation;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Image;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.service.normaliser.tika.TikaConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

@Deprecated
public class MediaUnitContentHandler extends WebLabHandlerDecorator {


	private final Log logger;


	private static final int MIN_IMAGE_WIDTH = 100;


	private static final int MIN_IMAGE_HEIGHT = 100;


	private static final String UNKNOWN_LANGUAGE = "unk";


	private static String BASE_URI = "http://weblab.ow2.org/services/tika/";


	private Document document;


	/**
	 * to supports an incremental language detection
	 ***/
	private ProfilingWriter pWriter;


	private ProfilingWriter pTableWriter;


	private Text textInProcess;


	private Text tableInProcess;


	private String muHtmlElement;


	private TikaConfiguration configuration;


	/**
	 * List of elements that require new line in extracted text
	 */
	private static List<String> NEW_LINE_ELEMENTS;


	private static List<String> TAB_LIST_ELEMENTS;

	static {
		final List<String> tempList = new ArrayList<String>();
		tempList.add("ol");
		tempList.add("dt");
		tempList.add("dl");
		tempList.add("li");
		MediaUnitContentHandler.NEW_LINE_ELEMENTS = Collections.unmodifiableList(tempList);

		final List<String> tempList2 = new ArrayList<String>();
		tempList2.add("li");
		tempList2.add("dd");
		MediaUnitContentHandler.TAB_LIST_ELEMENTS = Collections.unmodifiableList(tempList2);
	}


	private static List<String> TITLE_LIST_ELEMENTS;
	static {
		final List<String> tempList3 = new ArrayList<String>();
		tempList3.add("h1");
		tempList3.add("h2");
		tempList3.add("h3");
		tempList3.add("h4");
		tempList3.add("h5");
		tempList3.add("h6");
		MediaUnitContentHandler.TITLE_LIST_ELEMENTS = Collections.unmodifiableList(tempList3);
	}


	private static List<String> BLOC_LIST_ELEMENTS;
	static {
		final List<String> tempList4 = new ArrayList<String>();
		// tempList4.add("span");
		tempList4.add("div");
		tempList4.add("p");
		MediaUnitContentHandler.BLOC_LIST_ELEMENTS = Collections.unmodifiableList(tempList4);
	}


	public MediaUnitContentHandler() {
		super();
		this.logger = LogFactory.getLog(this.getClass());
		this.muHtmlElement = "";
		this.textInProcess = null;
		this.tableInProcess = null;
	}


	private void openTextMediaUnit() {
		this.textInProcess = MediaUnitFactory.createAndLinkMediaUnit(this.document, Text.class);
		this.textInProcess.setContent("");
		this.pWriter = new ProfilingWriter();
	}


	private void openTableMediaUnit() {
		this.tableInProcess = MediaUnitFactory.createAndLinkMediaUnit(this.document, Text.class);
		this.tableInProcess.setContent("");
		this.pTableWriter = new ProfilingWriter();
	}


	private void closeTextMediaUnit() {
		final String content = this.textInProcess.getContent().replaceAll("\t", "").replaceAll("\n", "").replaceAll("\r", "").trim();
		if (content.isEmpty() || (content.length() < 4)) {
			this.document.getMediaUnit().remove(this.textInProcess);
		} else if (this.pWriter.getLanguage().isReasonablyCertain() || (content.length() > 200)) {
			this.annotate(this.textInProcess, this.pWriter.getLanguage().getLanguage());
		} else {
			this.annotate(this.textInProcess, MediaUnitContentHandler.UNKNOWN_LANGUAGE);
		}
		this.textInProcess = null;
		this.pWriter = null;
	}


	private void closeTableMediaUnit() {
		final String content = this.tableInProcess.getContent().replaceAll("\t", "").replaceAll("\n", "").replaceAll("\r", "").trim();
		if (content.isEmpty()) {
			this.document.getMediaUnit().remove(this.tableInProcess);
		} else if (this.pTableWriter.getLanguage().isReasonablyCertain() || (content.length() > 200)) {
			this.annotate(this.tableInProcess, this.pTableWriter.getLanguage().getLanguage());
		} else {
			this.annotate(this.tableInProcess, MediaUnitContentHandler.UNKNOWN_LANGUAGE);
		}
		this.tableInProcess = null;
		this.pTableWriter = null;
	}


	private void addImageMediaUnit(final Attributes atts) {
		final String src = atts.getValue("src");
		String contentURI = "";
		BufferedImage image = null;

		// TODO

		if (src.endsWith(".jpg") || src.endsWith(".png") || src.endsWith(".bmp")) {
			try {
				if (src.startsWith("http://")) {
					final URL url = new URL(src);
					// BufferedReader br = new BufferedReader(new
					// InputStreamReader(new URL(src).openStream()));
					// InputStream inputStream = url.openStream();

					// Read from a URL
					image = ImageIO.read(url);
					contentURI = src;
				} else {
					// Read from an input stream
					// image = ImageIO.read(new BufferedInputStream(new
					// FileInputStream(src)));

				}
				if (image != null) {
					if ((image.getWidth() > MediaUnitContentHandler.MIN_IMAGE_WIDTH) && (image.getHeight() > MediaUnitContentHandler.MIN_IMAGE_HEIGHT)) {
						final Image ImageMU = MediaUnitFactory.createAndLinkMediaUnit(this.document, Image.class);
						ImageMU.setUri(ImageMU.getUri());

						if (contentURI.isEmpty()) {
							final FileInputStream fis = new FileInputStream(new File(src));
							final ContentManager contentManager = ContentManager.getInstance();
							try {
								contentManager.writeNativeContent(fis, ImageMU);
							} catch (final WebLabCheckedException wlce) {
								this.logger.warn("Unable to copy image.", wlce);
							} finally {
								IOUtils.closeQuietly(fis);
							}

						}

						for (int i = 0; i < atts.getLength(); i++) {
							if ("alt".equals(atts.getLocalName(i)) || "title".equals(atts.getLocalName(i))) {
								final Annotation muImageAnnot = AnnotationFactory.createAndLinkAnnotation(ImageMU);
								final PoKHelper pokH = new JenaPoKHelper(muImageAnnot);
								if ("alt".equals(atts.getLocalName(i))) {
									pokH.createLitStat(ImageMU.getUri(), MediaUnitContentHandler.BASE_URI + "alt", atts.getValue(i).replace("<br/>", ""));
								} else {
									pokH.createLitStat(ImageMU.getUri(), DublinCore.TITLE_PROPERTY_NAME, atts.getValue(i).replace("<br/>", ""));
								}
								break;
							}
						}

						try {
							final Annotation muImageAnnot = AnnotationFactory.createAndLinkAnnotation(ImageMU);
							final PoKHelper pokH = new JenaPoKHelper(muImageAnnot);
							pokH.setAutoCommitMode(false);
							pokH.createLitStat(ImageMU.getUri(), new URL(MediaUnitContentHandler.BASE_URI + "width").toURI().toString(),
									String.valueOf(image.getWidth()));
							pokH.createLitStat(ImageMU.getUri(), new URL(MediaUnitContentHandler.BASE_URI + "height").toURI().toString(),
									String.valueOf(image.getHeight()));
							pokH.commit();
						} catch (final MalformedURLException murle) {
							this.logger.warn("Malformed URL : " + MediaUnitContentHandler.BASE_URI + "width" + ")", murle);
						} catch (final URISyntaxException urise) {
							this.logger.warn("Unable to transform the property 'width" + "' into a predicate (" + MediaUnitContentHandler.BASE_URI + "width"
									+ ")", urise);
						}

						// TODO ??
						// ImageMU.setContent(normalisedContent);
					}
				}
			} catch (final MalformedURLException e) {
				this.logger.info("Unable to read image. " + src + " can not be transformed to URI.");
			} catch (final IOException e) {
				this.logger.info("Unable to convert to image : " + src);
			}
		}
	}


	@Override
	public void startElement(final String uri, final String localName, final String name, final Attributes atts) throws SAXException {
		super.startElement(uri, localName, name, atts);

		if (name.equals("body")) {
			this.muHtmlElement = name;
		}
		/**
		 * If the current element is not under a table element
		 */
		if (this.tableInProcess == null) {
			/**
			 * If a title element is detected
			 */
			if (MediaUnitContentHandler.TITLE_LIST_ELEMENTS.contains(name)) {
				if (this.textInProcess == null) {
					this.openTextMediaUnit();
					this.muHtmlElement = name;
				} else {
					if (this.muHtmlElement.equals(name) || this.muHtmlElement.equals("body")) {
						this.closeTextMediaUnit();
						this.muHtmlElement = name;
						this.openTextMediaUnit();

					} else {
						this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
					}
				}
			}

			/**
			 * If a bloc element is detected
			 */
			if (MediaUnitContentHandler.BLOC_LIST_ELEMENTS.contains(name)) {
				if (this.textInProcess == null) {
					this.openTextMediaUnit();
					this.muHtmlElement = name;
				} else {
					/**
					 * If the current element is under a title element or
					 * another bloc element
					 */
					if (MediaUnitContentHandler.TITLE_LIST_ELEMENTS.contains(this.muHtmlElement)
							|| MediaUnitContentHandler.BLOC_LIST_ELEMENTS.contains(this.muHtmlElement)) {
						this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
					} else {
						this.closeTextMediaUnit();
						this.muHtmlElement = name;
						this.openTextMediaUnit();

					}
				}
			}

			/**
			 * If a table element is detected
			 */
			if (name.equals("table")) {
				this.openTableMediaUnit();
				// if(!TITLE_LIST_ELEMENTS.contains(muHtmlElement)&&
				// this.textInProcess != null)
				if (this.textInProcess != null) {
					this.closeTextMediaUnit();
				}
			}
		}

		/**
		 * If an image element is detected
		 */
		if (name.equals("img")) {
			this.addImageMediaUnit(atts);
			if ((this.tableInProcess == null) && (this.textInProcess != null)) {
				if (MediaUnitContentHandler.BLOC_LIST_ELEMENTS.contains(this.muHtmlElement)) {
					this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
				} else {
					this.closeTextMediaUnit();
				}
			}
		}

		if (this.textInProcess != null) {
			/**
			 * Insert line separator if the new element is in the new line
			 * element list
			 */
			if (MediaUnitContentHandler.NEW_LINE_ELEMENTS.contains(name)) {
				this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
			}
			/**
			 * Insert tab if the new element is a list item (and '-' character?)
			 */
			if (MediaUnitContentHandler.TAB_LIST_ELEMENTS.contains(name)) {
				this.textInProcess.setContent(this.textInProcess.getContent() + "\t");
			}
			if (name.equals("a") || name.equals("span")) {
				this.textInProcess.setContent(this.textInProcess.getContent() + " ");
			}
		}

	}


	@Override
	public void endElement(final String uri, final String localName, final String name) throws SAXException {

		super.endElement(uri, localName, name);

		if (this.tableInProcess != null) {
			/**
			 * Insert new line character if the new element is a table row
			 */
			if (name.equals("tr")) {
				this.tableInProcess.setContent(this.tableInProcess.getContent() + "\n");
			} else if (name.equals("td")) {
				this.tableInProcess.setContent(this.tableInProcess.getContent() + "\t");
			} else if (name.equals("table")) {

				this.closeTableMediaUnit();
			}
		}

		else if (this.textInProcess != null) {
			if (name.equals("br")) {
				this.tableInProcess.setContent(this.tableInProcess.getContent() + "\n");
			}

			// if ((name.equals(muHtmlElement)&& nbSimilarMuHtmlElement==0 &&
			// !TITLE_LIST_ELEMENTS.contains(muHtmlElement))||name.equals("body"))
			if ((name.equals(this.muHtmlElement) && !MediaUnitContentHandler.TITLE_LIST_ELEMENTS.contains(this.muHtmlElement)) || name.equals("body")) {
				this.closeTextMediaUnit();
				this.muHtmlElement = "body";
			} else if (MediaUnitContentHandler.BLOC_LIST_ELEMENTS.contains(name) || MediaUnitContentHandler.NEW_LINE_ELEMENTS.contains(name)
					|| MediaUnitContentHandler.TITLE_LIST_ELEMENTS.contains(name)) {
				this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
			} else {
				this.textInProcess.setContent(this.textInProcess.getContent() + " ");
			}
		}

	}


	@Override
	public void characters(final char[] ch, final int start, final int length) throws SAXException {
		super.characters(ch, start, length);

		if (this.muHtmlElement != "") {
			final int end = start + length;
			final StringBuilder sb = new StringBuilder();
			for (int i = start; i < end; i++) {
				sb.append(ch[i]);
			}
			String content = sb.toString().replaceAll("\t", "").replaceAll("\n", "").replaceAll("\r", "").trim();

			if (content.length() != 0) {
				content = sb.toString().replaceAll("\t", " ").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("  ", " ");
				if ((this.textInProcess == null) && (this.tableInProcess == null)) {
					this.openTextMediaUnit();
				}

				if (this.tableInProcess != null) {
					/**
					 * If inserted text is in "table" element, make sure there
					 * is no spaces or new lines characters
					 */
					// this.tableInProcess.setContent(this.tableInProcess.getContent()+
					// sb.toString().trim());
					this.tableInProcess.setContent(this.tableInProcess.getContent() + content);
					try {
						this.pTableWriter.append(sb.toString());
					} catch (final IOException e) {
						this.logger.error(e.getMessage());
					}
				} else {
					// this.textInProcess.setContent(this.textInProcess.getContent()+
					// sb.toString());
					this.textInProcess.setContent(this.textInProcess.getContent() + content);
					try {
						this.pWriter.append(sb.toString());
					} catch (final IOException e) {
						this.logger.error(e.getMessage());
					}
				}
			}
		}
	}


	/**
	 * @param mu
	 *            The resource to be annotated
	 * @param language
	 *            The language to annotate using dc:language property statements
	 *            on res.
	 */
	private void annotate(final Resource mu, final String language) {
		final Annotation muLangAnnot = AnnotationFactory.createAndLinkAnnotation(mu);
		final PoKHelper pokH = new JenaPoKHelper((muLangAnnot));
		if (this.configuration.getServiceUri() != null) {
			pokH.setAutoCommitMode(false);
			pokH.setNSPrefix(DCTerms.PREFERRED_PREFIX, DCTerms.NAMESPACE);
			pokH.setNSPrefix(WebLabProcessing.PREFERRED_PREFIX, WebLabProcessing.NAMESPACE);
			pokH.createLitStat(mu.getUri(), DublinCore.LANGUAGE_PROPERTY_NAME, language);
			pokH.createResStat(muLangAnnot.getUri(), WebLabProcessing.IS_PRODUCED_BY, this.configuration.getServiceUri());
			pokH.createLitStat(muLangAnnot.getUri(), DCTerms.CREATED, DatatypeConverter.printDateTime(Calendar.getInstance()));
			pokH.commit();
		} else {
			pokH.createLitStat(mu.getUri(), DublinCore.LANGUAGE_PROPERTY_NAME, language);
		}
	}


	@Override
	public void setDocument(final Document document) {
		this.document = document;
	}


	@Override
	public void setTikaConfiguration(final TikaConfiguration tikaConfiguration) {
		this.configuration = tikaConfiguration;
	}

}
