/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2012 Cassidian, an EADS company
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.blremover;

import java.util.List;
import java.util.ListIterator;
import java.util.regex.Pattern;

import javax.jws.WebService;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.core.extended.util.ResourceUtil;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.core.services.Analyser;
import org.ow2.weblab.core.services.InvalidParameterException;
import org.ow2.weblab.core.services.analyser.ProcessArgs;
import org.ow2.weblab.core.services.analyser.ProcessReturn;

/**
 * This simple class enables to remove successive newlines (more than two newlines), even if lines contains only spaces.
 * 
 * @author BA
 * @date 2009-07-03
 */
@WebService(endpointInterface = "org.ow2.weblab.core.services.Analyser")
public class BlankLineRemoverService implements Analyser {


	/**
	 * The <code>Log</code> to be used.
	 */
	private final Log log;


	public BlankLineRemoverService() {
		super();
		this.log = LogFactory.getLog(this.getClass());
	}


	@Override
	public ProcessReturn process(final ProcessArgs args) throws InvalidParameterException {
		this.log.debug("Start of process method.");
		final List<Text> textList = this.checkArgs(args);
		final Resource res = args.getResource();
		this.log.info("Beginning process with MediaUnit: " + res.getUri() + ".");

		final Pattern p = Pattern.compile("^\\s+$", Pattern.MULTILINE);

		for (final Text text : textList) {
			String textContent = text.getContent();
			textContent = textContent.trim();
			textContent = p.matcher(textContent).replaceAll("\n");
			textContent = textContent.replace("\r\n", "\n");
			textContent = textContent.replaceAll("[\\r\\n]{2,}|(?:\\r\\n){2,}$", "\n\n");

			text.setContent(textContent);
		}

		final ProcessReturn pr = new ProcessReturn();
		pr.setResource(res);

		this.log.info("MediaUnit: " + res.getUri() + " successfully processed.");

		return pr;
	}


	/**
	 * Checks if <code>args</code> contains list of text sections and returns it.
	 * 
	 * @param args
	 *            The <code>ProcessArgs</code>
	 * @return The <code>list of text sections</code> in <code>args</code>.
	 * @throws InvalidParameterException
	 *             If <code>args</code> is <code>null</code>, contains a <code>Resource</code> that is <code>null</code> or not a <code>MediaUnit</code>,
	 *             contains a <code>MediaUnit</code> that is not a <code>Document</code>.
	 */
	private List<Text> checkArgs(final ProcessArgs args) throws InvalidParameterException {
		if (args == null) {
			throw new InvalidParameterException("ProcessArgs was null.", "ProcessArgs was null.");
		}
		final Resource res = args.getResource();
		if (res == null) {
			throw new InvalidParameterException("Resource in ProcessArgs was null.", "Resource in ProcessArgs was null.");
		}
		if (!(res instanceof Document)) {
			throw new InvalidParameterException("Resource in ProcessArgs was not a Document, but a " + res.getClass().getName() + ".",
					"URI of the buggy resource: " + res.getUri() + ".");
		}

		final List<Text> texts = ResourceUtil.getSelectedSubResources(args.getResource(), Text.class);
		for (final ListIterator<Text> textIt = texts.listIterator(); textIt.hasNext();) {
			final Text text = textIt.next();
			if (text.getContent() == null) {
				textIt.remove();
			}
		}

		return texts;
	}

}
