/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2009 EADS DEFENCE AND SECURITY SYSTEMS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.crawler.folder.impl;

import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.jws.WebMethod;
import javax.jws.WebParam;
import javax.jws.WebResult;
import javax.jws.WebService;
import javax.jws.soap.SOAPBinding;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.content.ContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.exception.WebLabUncheckedException;
import org.ow2.weblab.core.extended.factory.ResourceFactory;
import org.ow2.weblab.core.extended.ontologies.WebLab;
import org.ow2.weblab.core.extended.properties.PropertiesLoader;
import org.ow2.weblab.core.helper.PoKHelper;
import org.ow2.weblab.core.helper.RDFHelperFactory;
import org.ow2.weblab.core.model.ComposedResource;
import org.ow2.weblab.core.services.AccessDeniedException;
import org.ow2.weblab.core.services.Configurable;
import org.ow2.weblab.core.services.ContentNotAvailableException;
import org.ow2.weblab.core.services.EmptyQueueException;
import org.ow2.weblab.core.services.InsufficientResourcesException;
import org.ow2.weblab.core.services.InvalidParameterException;
import org.ow2.weblab.core.services.QueueManager;
import org.ow2.weblab.core.services.ServiceNotConfiguredException;
import org.ow2.weblab.core.services.SourceReader;
import org.ow2.weblab.core.services.UnexpectedException;
import org.ow2.weblab.core.services.UnsupportedRequestException;
import org.ow2.weblab.core.services.configurable.ConfigureArgs;
import org.ow2.weblab.core.services.configurable.ConfigureReturn;
import org.ow2.weblab.core.services.configurable.ResetConfigurationArgs;
import org.ow2.weblab.core.services.configurable.ResetConfigurationReturn;
import org.ow2.weblab.core.services.queuemanager.NextResourceArgs;
import org.ow2.weblab.core.services.queuemanager.NextResourceReturn;
import org.ow2.weblab.core.services.sourcereader.GetResourceArgs;
import org.ow2.weblab.core.services.sourcereader.GetResourceReturn;
import org.ow2.weblab.crawler.FolderCrawler;
import org.ow2.weblab.crawler.filter.ExtensionFilter;

/**
 * @author jdoucy, ymombrun
 * @date 5 juin 08
 */
@WebService()
@SOAPBinding(parameterStyle = SOAPBinding.ParameterStyle.BARE)
public class FolderCrawlerService implements Configurable, QueueManager, SourceReader {

	Log logger = LogFactory.getLog(FolderCrawlerService.class);
	/**
	 * Property denoting the folder to crawl
	 */
	private static final String FOLDER_PROPERTY = WebLab.PROCESSING_PROPERTY_NAMESPACE
			+ "crawler/folder";

	/**
	 * ContentManager
	 */
	protected ContentManager contentManager = null;

	/**
	 * The default folder to crawl
	 */
	public static final String DEFAULT_FOLDER = "toBeCrawled";

	/**
	 * The file defining the folder to crawl, in a one-a-row style
	 */
	public static final String FOLDERS_FILE = "FolderCrawlerService.config";

	private static final String FOLDERS = "folders";

	private static final String EXTENSIONS = "extensions";

	private static final String REJECT = "reject";

	private static final String RECURSIVE = "recursive";

	protected Map<String, String> props = null;

	protected Map<String, List<String>> usContextAndFolderToCrawl = new HashMap<String, List<String>>();

	protected Map<String, List<FolderCrawler>> usContextAndFolderCrawler = new HashMap<String, List<FolderCrawler>>();

	protected void startCrawl(String uc, boolean alreadyConfigured) {
		List<String> filesToCrawl = null;
		if (alreadyConfigured) {
			// Loads from configured configuration
			filesToCrawl = this.usContextAndFolderToCrawl.get(uc);
		} else {
			// Loads default configuration
			logger.info("Folder will be loaded from config file.");
			filesToCrawl = this.getFoldersFromConfig();
		}

		List<FolderCrawler> fcList = new ArrayList<FolderCrawler>();
		final boolean recursive = this.getRecursiveFromConfig();
		final FileFilter ff = this.getFileFilterFromConfig();
		for (final String toCrawl : filesToCrawl) {
			FolderCrawler fc;
			try {
				fc = new FolderCrawler(this.contentManager, new File(toCrawl),
						ff, recursive);
			} catch (final WebLabCheckedException wlce) {
				logger.warn("Unable to crawl folder: " + toCrawl, wlce);
				continue;
			}
			fc.startCrawl();
			fcList.add(fc);
		}
		this.usContextAndFolderCrawler.put(uc, fcList);

	}

	private boolean getRecursiveFromConfig() {
		if (this.props == null) {
			this.loadProps();
		}
		final boolean rec;
		if (this.props.containsKey(FolderCrawlerService.RECURSIVE)) {
			rec = Boolean.parseBoolean(this.props
					.get(FolderCrawlerService.RECURSIVE));
		} else {
			logger.warn("Unable to load recursive from config file. "
					+ "false will be used.");
			rec = false;
		}
		return rec;
	}

	private FileFilter getFileFilterFromConfig() {
		if (this.props == null) {
			this.loadProps();
		}
		final boolean rej;
		if (this.props.containsKey(FolderCrawlerService.REJECT)) {
			rej = Boolean.parseBoolean(this.props
					.get(FolderCrawlerService.REJECT));

		} else {
			logger.warn("Unable to load reject from config file. "
					+ "false will be used.");
			rej = false;
		}
		List<String> ext = new ArrayList<String>();
		if (this.props.containsKey(FolderCrawlerService.EXTENSIONS)) {
			String[] splitted = this.props.get(FolderCrawlerService.EXTENSIONS)
					.split(";");
			for (String string : splitted) {
				string = string.trim();
				if (!string.equals("")) {
					ext.add(string);
				}
			}
			if (ext.isEmpty()) {
				logger.info("Extensions is empty; from "
						+ FolderCrawlerService.FOLDERS_FILE);
			}
		} else {
			logger.warn("Unable to get extensions from "
					+ FolderCrawlerService.FOLDERS_FILE);
		}
		return new ExtensionFilter(ext, rej);
	}

	@Override
	@WebMethod(action = "next")
	@WebResult(name = "nextResourceReturn", targetNamespace = "http://weblab.ow2.org/core/1.2/services/queuemanager", partName = "return")
	public NextResourceReturn nextResource(
			@WebParam(name = "nextResourceArgs", targetNamespace = "http://weblab.ow2.org/core/1.2/services/queuemanager", partName = "args") NextResourceArgs args)
			throws AccessDeniedException, ContentNotAvailableException,
			EmptyQueueException, InsufficientResourcesException,
			InvalidParameterException, ServiceNotConfiguredException,
			UnexpectedException, UnsupportedRequestException {
		
		
		
		return null;
	}
	
	/*
	 * (non-Javadoc)
	 * 
	 * @see
	 * org.weblab_project.services.sourcereader.SourceReader#getResource(org
	 * .weblab_project.services.sourcereader.types.GetResourceArgs)
	 */
	@WebMethod(action = "getResource")
	@WebResult(name = "getResourceReturn", targetNamespace = "http://weblab.ow2.org/core/1.2/services/sourcereader", partName = "return")
	@Override
	public GetResourceReturn getResource(
			@WebParam(name = "getResourceArgs", targetNamespace = "http://weblab.ow2.org/core/1.2/services/sourcereader", partName = "args") GetResourceArgs args)
			throws AccessDeniedException, ContentNotAvailableException,
			InsufficientResourcesException, InvalidParameterException,
			ServiceNotConfiguredException, UnexpectedException,
			UnsupportedRequestException {

		// public GetResourceReturn totogetResource(
		// ) {
		int limit = -1;
		int offset = 0;

		if (args != null) {
			limit = args.getLimit();
			offset = args.getOffset();
		}
		logger.info("GetCrawledDocuments method called. " + "offset: " + offset
				+ "; limit: " + limit);
		GetResourceReturn ret = new GetResourceReturn();
		if (this.contentManager == null) {
			try {
				this.contentManager = ContentManager.getInstance();
			} catch (final WebLabUncheckedException wlue) {
				throw new AccessDeniedException(
						"Unable to get the content manager. ",
						wlue.getMessage());
			}
		}
		// Get the usageContext URI to be used as key
		String uc = null;
		if (args != null && args.getUsageContext() != null) {
			uc = args.getUsageContext();
		}
		List<FolderCrawler> fcList;
		if (uc == null || uc.length() == 0) {
			// UC Null
			throw new InvalidParameterException(
					"Unable to get resource on folder crawler.",
					"Usage context must not be null.");
		} else if (!this.usContextAndFolderCrawler.containsKey(uc)) {
			// Not yet started
			if (this.usContextAndFolderToCrawl.containsKey(uc)) {
				// Configured
				startCrawl(uc, true);

			} else {
				// Not configured, use default config
				startCrawl(uc, false);
			}
		}
		fcList = this.usContextAndFolderCrawler.get(uc);
		ComposedResource resCol = getRCFromList(fcList, offset, limit);

		if (resCol.getResource().size() == 0) {
			logger.info("No resources in the resource collection "
					+ "returned by the crawler...");
			logger.info("FoldersCrawler were: " + fcList);
		}
		ret.setResources(resCol);
		return ret;
	}

	private static ComposedResource getRCFromList(List<FolderCrawler> fcList,
			int offset, int limit) {
		if (fcList == null || fcList.isEmpty()) {
			LogFactory.getLog(FolderCrawlerService.class).warn(
					"No folder to crawl");
			return ResourceFactory.createResource("folderCrawlerService",
					"emptyCollection" + System.currentTimeMillis(),
					ComposedResource.class);
		}

		final int usedOffset;
		if (offset < 0) {
			usedOffset = 0;
		} else {
			usedOffset = offset;
		}
		final int usedLimit;
		if (limit <= 0) {
			usedLimit = Integer.MAX_VALUE;
		} else {
			usedLimit = limit;
		}

		int cpt = 0;
		ComposedResource resCol = ResourceFactory.createResource(
				"folderCrawlerService",
				"tempCollection-" + System.currentTimeMillis(),
				ComposedResource.class);
		for (final FolderCrawler fc : fcList) {
			if (cpt + fc.getNbFiles() < usedOffset) {
				// fc is before the offset
				cpt += fc.getNbFiles();
				continue;
			}
			if (cpt >= usedOffset + usedLimit) {
				break;
			}
			resCol.getResource().addAll(
					fc.getCrawledDocuments(usedOffset - cpt, usedLimit)
							.getResource());
			cpt += fc.getNbFiles();
		}

		return resCol;
	}

	protected void loadProps() {
		this.props = PropertiesLoader
				.loadProperties(FolderCrawlerService.FOLDERS_FILE);
	}

	/**
	 * @return The folder names listed in the file
	 *         FolderCrawlerService.FOLDERS_FILE
	 */
	private List<String> getFoldersFromConfig() {
		if (this.props == null) {
			this.loadProps();
		}
		List<String> folders = new ArrayList<String>();
		if (this.props.containsKey(FOLDERS)) {
			String[] splitted = this.props.get(FOLDERS).split(";");
			for (String string : splitted) {
				string = string.trim();
				if (!string.equals("")) {
					folders.add(string);
				}
			}
			if (folders.isEmpty()) {
				logger.warn("Unable to get folders from "
						+ FolderCrawlerService.FOLDERS_FILE + ". ( "
						+ this.props.get(FolderCrawlerService.FOLDERS)
						+ ") Default folder will be used.");
				folders.add(FolderCrawlerService.DEFAULT_FOLDER);
			}
		} else {
			logger.warn("Unable to load folders from "
					+ FolderCrawlerService.FOLDERS_FILE
					+ ". Default folder will be used.");
			folders.add(FolderCrawlerService.DEFAULT_FOLDER);
		}
		return folders;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @seeorg.weblab_project.services.configurable.Configurable#configure(org.
	 * weblab_project.services.configurable.types.ConfigureArgs)
	 */
	@WebMethod(action = "configure")
	@WebResult(name = "configureReturn", targetNamespace = "http://weblab.ow2.org/core/1.2/services/configurable", partName = "return")
	@Override
	public ConfigureReturn configure(
			@WebParam(name = "configureArgs", targetNamespace = "http://weblab.ow2.org/core/1.2/services/configurable", partName = "args") ConfigureArgs args)
			throws AccessDeniedException, ContentNotAvailableException,
			InsufficientResourcesException, InvalidParameterException,
			UnexpectedException, UnsupportedRequestException {
		// public ConfigureReturn totoconfigure(
		// ){
		logger.info("Configure method called.");
		/*
		 * reset conf
		 */
		ResetConfigurationArgs resetConfigurationArgs = new ResetConfigurationArgs();
		resetConfigurationArgs.setUsageContext(args.getUsageContext());
		try {
			resetConfiguration(resetConfigurationArgs);
		} catch (Exception e) {
			logger.warn(e.getMessage());
		}

		if (args == null || args.getUsageContext() == null
				|| args.getConfiguration() == null
				|| args.getUsageContext() == null) {
			throw new InvalidParameterException(
					"Unable to configure folder crawler.",
					"ConfigureArgs was invalid (either it self, "
							+ "usageContext, it's uri "
							+ "or configuration was null");
		}
		final PoKHelper helper = RDFHelperFactory.getPoKHelper(args
				.getConfiguration());
		List<String> list = new ArrayList<String>();
		list.addAll(helper.getLitsOnPredSubj(args.getUsageContext(),
				FolderCrawlerService.FOLDER_PROPERTY));
		list.addAll(helper.getRessOnPredSubj(args.getUsageContext(),
				FolderCrawlerService.FOLDER_PROPERTY));

		if (list.isEmpty()) {
			throw new InvalidParameterException(
					"Unable to configure folder crawler.", "No property '"
							+ FolderCrawlerService.FOLDER_PROPERTY
							+ "' in the configuration annotation.");
		}

		this.usContextAndFolderToCrawl.put(args.getUsageContext(), list);

		return new ConfigureReturn();
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see
	 * org.weblab_project.services.configurable.Configurable#resetConfiguration
	 * (org.weblab_project.services.configurable.types.ResetConfigurationArgs)
	 */
	@WebMethod(action = "resetConfiguration")
	@WebResult(name = "resetConfigurationReturn", targetNamespace = "http://weblab.ow2.org/core/1.2/services/configurable", partName = "return")
	public ResetConfigurationReturn resetConfiguration(
			@WebParam(name = "resetConfigurationArgs", targetNamespace = "http://weblab.ow2.org/core/1.2/services/configurable", partName = "args") ResetConfigurationArgs args)
			throws AccessDeniedException, ContentNotAvailableException,
			InsufficientResourcesException, InvalidParameterException,
			UnexpectedException, UnsupportedRequestException {
		// public ResetConfigurationReturn totoresetConfiguration(
		// ) {
		logger.info("ResetConfiguration method called.");
		if (args.getUsageContext() != null) {
			if (this.usContextAndFolderToCrawl.containsKey(args
					.getUsageContext())) {
				this.usContextAndFolderCrawler.remove(args.getUsageContext());
				this.usContextAndFolderToCrawl.remove(args.getUsageContext());
			} else {
				throw new InvalidParameterException(
						"Unable to reset configuration on folder crawler.",
						"Usage context unknowed.");
			}

		} else {
			throw new InvalidParameterException(
					"Unable to reset configuration on folder crawler.",
					"Usage context null.");
		}
		return new ResetConfigurationReturn();
	}

}
