/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2009 EADS DEFENCE AND SECURITY SYSTEMS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.services.sourcereader;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.jws.WebMethod;
import javax.jws.WebParam;
import javax.jws.WebResult;
import javax.jws.WebService;
import javax.jws.soap.SOAPBinding;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.services.sourcereader.filter.FolderAndExtensionFilter;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.factory.ResourceFactory;
import org.weblab_project.core.helper.PoKHelper;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.jaxb.WebLabMarshaler;
import org.weblab_project.core.model.Annotation;
import org.weblab_project.core.model.Document;
import org.weblab_project.core.model.ResourceCollection;
import org.weblab_project.core.ontologies.DublinCore;
import org.weblab_project.core.properties.PropertiesLoader;
import org.weblab_project.services.configurable.Configurable;
import org.weblab_project.services.configurable.ConfigureException;
import org.weblab_project.services.configurable.ResetConfigurationException;
import org.weblab_project.services.configurable.types.ConfigureArgs;
import org.weblab_project.services.configurable.types.ConfigureReturn;
import org.weblab_project.services.configurable.types.ResetConfigurationArgs;
import org.weblab_project.services.configurable.types.ResetConfigurationReturn;
import org.weblab_project.services.exception.WebLabException;
import org.weblab_project.services.sourcereader.GetResourceException;
import org.weblab_project.services.sourcereader.SourceReader;
import org.weblab_project.services.sourcereader.types.GetResourceArgs;
import org.weblab_project.services.sourcereader.types.GetResourceReturn;

/**
 * FolderSourceReaderService.java
 * 
 * @todo Add some comments
 */
@WebService()
@SOAPBinding(parameterStyle = SOAPBinding.ParameterStyle.BARE)
public class FolderSourceReaderService implements SourceReader, Configurable {

	private final static Log logger = LogFactory.getLog(FolderSourceReaderService.class);

	public static final String SERVICE_URI = "http://weblab-project.org/services/FolderSourceReaderConfiguration";
	public static final String WEBLAB_GATHERING_NAMESPACE = "http://weblab-project.org/core/model/property/gathering/";
	public static final String CRAWL_URL_PROPERTY = WEBLAB_GATHERING_NAMESPACE + "crawlUrl";

	private static final String SERVICE_CONFIG_FILE = "FolderSourceReaderService.config";
	public final static String DEFAULT_FOLDER_URL_PROPERTY_NAME = "defaultFolderUrl";
	public final static String RECURSIVE_PROPERTY_NAME = "recursive";
	public final static String DELETE_FILE_AFTER_PROPERTY_NAME = "deleteFileAfter";
	public final static String EXTENSIONS_FILTER_PROPERTY_NAME = "extensionsFilter";
	public final static String EXTENSIONS_REJECT_PROPERTY_NAME = "extensionsReject";
	public final static String FOLDERS_FILTER_PROPERTY_NAME = "foldersFilter";
	public final static String FOLDERS_REJECT_PROPERTY_NAME = "foldersReject";
	public final static String FOLDERS_EXTENSIONS_FILTER_PROPERTY_NAME = "foldersExtensionsFilter";
	public final static String FOLDERS_EXTENSIONS_REJECT_PROPERTY_NAME = "foldersExtensionsReject";
	public final static String ANNOT_SOURCE_PROPERTY_NAME = "annotSource";
	public final static String SOURCE_PROPERTY_NAME = "sourceProperty";

	private final static String DEFAULT_USAGE_CONTEXT = "default_context";
	protected final static String CRAWLER_ID = "crawlerFolder";

	private static Map<String, String> usContextAndFolderUriMapping = Collections.synchronizedMap(new HashMap<String, String>());
	private static Map<String, List<File>> usContextAndFileListMapping = Collections.synchronizedMap(new HashMap<String, List<File>>());

	private static Map<String, String> props;

	public FolderSourceReaderService() {
		super();

		if (props == null)
			loadProps();
	}

	@Override
	@WebMethod(action = "getResource")
	@WebResult(name = "getResourceReturn", targetNamespace = "http://weblab-project.org/services/sourcereader/types", partName = "return")
	public GetResourceReturn getResource(@WebParam(name = "getResourceArgs", targetNamespace = "http://weblab-project.org/services/sourcereader/types", partName = "args") GetResourceArgs args)
			throws GetResourceException {

		int limit = -1;
		int offset = 0;

		if (args != null) {
			limit = args.getLimit();
			offset = args.getOffset();
		}
		logger.info("GetResource method called. " + "offset: " + offset + "; limit: " + limit);
		final ResourceCollection col = ResourceFactory.createResource(CRAWLER_ID, "tempCollection-" + System.currentTimeMillis(), ResourceCollection.class);
		GetResourceReturn ret = new GetResourceReturn();
		ret.setResources(col);

		// Get the usageContext URI to be used as key
		String urlToCrawl = props.get(DEFAULT_FOLDER_URL_PROPERTY_NAME);
		String usContext = DEFAULT_USAGE_CONTEXT;
		if (args != null && args.getUsageContext() != null && args.getUsageContext().getUri() != null) {
			usContext = args.getUsageContext().getUri();
			urlToCrawl = usContextAndFolderUriMapping.get(args.getUsageContext().getUri());
		} else
			logger.info("Default folder URL is used for crawling.");

		// If it's first call, initialize file list
		logger.info("URL to crawl: " + urlToCrawl);
		if (!usContextAndFileListMapping.containsKey(usContext)) {
			logger.info("First call, list files.");

			usContextAndFileListMapping.put(usContext, new ArrayList<File>());
			final File folderToCrawl = new File(urlToCrawl);
			this.listAndAddFiles(folderToCrawl, this.getFileFilterFromConfig(), usContext);
		}

		if (usContextAndFileListMapping.get(usContext).isEmpty()) {
			logger.warn("Folder (" + urlToCrawl + ") was empty.");
			usContextAndFileListMapping.remove(usContext);
			return ret;
		}

		if (offset >= usContextAndFileListMapping.get(usContext).size()) {
			logger.warn("Every files have already been crawled.");
			usContextAndFileListMapping.remove(usContext);
			return ret;
		}

		if (offset < 0) {
			logger.warn("Offset was negative, 0 used instead.");
			offset = 0;
		}

		if (limit <= 0) {
			logger.info("Limit was null or negative. " + "Integer.MAX_VALUE will be used.");
			limit = Integer.MAX_VALUE;
		}

		int cpt = offset;
		boolean toContinue = true;
		do {
			if (cpt < usContextAndFileListMapping.get(usContext).size()) {
				final File file = usContextAndFileListMapping.get(usContext).get(cpt);
				if ((!file.exists()) || (!file.isFile()) || (!file.canRead())) {
					// If the file changed of status between listing and
					// getCrawledDocuments, we remove it from the list and
					// continue the loop.
					usContextAndFileListMapping.get(usContext).remove(cpt);
					logger.warn("File (" + file + ") is not crawlable, remove from list.");
					continue;
				}

				// Source properties
				final boolean annotSource;
				if (props.containsKey(ANNOT_SOURCE_PROPERTY_NAME))
					annotSource = Boolean.parseBoolean(props.get(ANNOT_SOURCE_PROPERTY_NAME));
				else {
					logger.warn("Unable to load source annotation boolean from config file. false will be used.");
					annotSource = false;
				}
				final String sourceProperty;
				if (props.containsKey(SOURCE_PROPERTY_NAME))
					sourceProperty = props.get(SOURCE_PROPERTY_NAME);
				else {
					logger.warn("Unable to load source annotation property from config file. false will be used.");
					sourceProperty = DublinCore.SOURCE_PROPERTY_NAME;
				}

				logger.debug("Loading file: " + file.getAbsolutePath());
				try {
					Document document = new WebLabMarshaler().unmarshal(file, Document.class);
					if (annotSource) {
						Annotation annot = AnnotationFactory.createAndLinkAnnotation(document);
						PoKHelper h = RDFHelperFactory.getPoKHelper(annot);
						h.createLitStat(document.getUri(), sourceProperty, file.getCanonicalPath());
					}
					col.getResource().add(document);
				} catch (WebLabCheckedException e) {
					usContextAndFileListMapping.get(usContext).remove(cpt);
					logger.error("Resource (" + file + ") could not be unmarshaled, remove from list.");
					continue;
				} catch (IOException ioe) {
					usContextAndFileListMapping.get(usContext).remove(cpt);
					logger.error("Could not get canonical path for file: " + file.getAbsolutePath() + ", remove from list.");
					continue;
				}

				// Delete resource file if configured
				if (Boolean.parseBoolean(props.get(DELETE_FILE_AFTER_PROPERTY_NAME)) == true) {
					if (!file.delete())
						file.deleteOnExit();
				}

				cpt++;
				if (cpt - offset >= limit) {
					toContinue = false;
				}
			} else {
				toContinue = false;
			}
		} while (toContinue);

		return ret;
	}

	protected void listAndAddFiles(final File folderToCrawl, final FileFilter filter, final String usContextUri) {
		if (folderToCrawl.isDirectory()) {
			logger.debug("Add content of folder: " + folderToCrawl.getAbsolutePath());

			for (final File file : folderToCrawl.listFiles(filter)) {
				if (!usContextAndFileListMapping.get(usContextUri).contains(file) && !file.isDirectory()) {
					logger.trace("Add file: " + file.getAbsolutePath());
					usContextAndFileListMapping.get(usContextUri).add(file);
				}
				if (Boolean.parseBoolean((props.get(RECURSIVE_PROPERTY_NAME))) && file.isDirectory())
					this.listAndAddFiles(file, filter, usContextUri);
			}
		}
	}

	@Override
	@WebMethod(action = "configure")
	@WebResult(name = "configureReturn", targetNamespace = "http://weblab-project.org/services/configurable/types", partName = "return")
	public ConfigureReturn configure(@WebParam(name = "configureArgs", targetNamespace = "http://weblab-project.org/services/configurable/types", partName = "args") final ConfigureArgs args)
			throws ConfigureException {
		if (args == null || args.getUsageContext() == null || args.getConfiguration() == null || args.getUsageContext().getUri() == null) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E1");
			wle.setErrorMessage("Invalid parameter");

			throw new ConfigureException("ConfigureArgs was invalid (either it self, usageContext, its uri or configuration was null)", wle);
		}

		final PoKHelper helper = RDFHelperFactory.getPoKHelper(args.getConfiguration());

		// Folder Url
		List<String> urlLits = helper.getLitsOnPredSubj(SERVICE_URI, CRAWL_URL_PROPERTY);
		if (urlLits.size() == 0) {
			logger.error("Missing '" + CRAWL_URL_PROPERTY + "' property in the configuration.");

			WebLabException exp = new WebLabException();
			exp.setErrorId("E1");
			exp.setErrorMessage("Invalid parameter.");
			throw new ConfigureException("Missing '" + CRAWL_URL_PROPERTY + "' property in the configuration.", exp);
		} else if (urlLits.size() > 1)
			logger.warn("More than one property for '" + CRAWL_URL_PROPERTY + "' in the configuration, use first.");

		usContextAndFolderUriMapping.put(args.getUsageContext().getUri(), urlLits.get(0));

		return new ConfigureReturn();
	}

	@Override
	@WebMethod(action = "resetConfiguration")
	@WebResult(name = "resetConfigurationReturn", targetNamespace = "http://weblab-project.org/services/configurable/types", partName = "return")
	public ResetConfigurationReturn resetConfiguration(
			@WebParam(name = "resetConfigurationArgs", targetNamespace = "http://weblab-project.org/services/configurable/types", partName = "args") final ResetConfigurationArgs args)
			throws ResetConfigurationException {
		if (args == null || args.getUsageContext() == null || args.getUsageContext().getUri() == null) {
			WebLabException wle = new WebLabException();
			wle.setErrorId("E1");
			wle.setErrorMessage("Invalid parameter");

			throw new ResetConfigurationException("ResetConfigurationArgs was invalid (either it self, usageContext, it's uri or configuration was null", wle);
		}

		usContextAndFolderUriMapping.remove(args.getUsageContext().getUri());

		return new ResetConfigurationReturn();
	}

	private void loadProps() {
		Set<String> toLoad = new HashSet<String>();
		toLoad.add(DEFAULT_FOLDER_URL_PROPERTY_NAME);
		toLoad.add(RECURSIVE_PROPERTY_NAME);
		toLoad.add(DELETE_FILE_AFTER_PROPERTY_NAME);
		toLoad.add(EXTENSIONS_FILTER_PROPERTY_NAME);
		toLoad.add(EXTENSIONS_REJECT_PROPERTY_NAME);
		toLoad.add(FOLDERS_FILTER_PROPERTY_NAME);
		toLoad.add(FOLDERS_REJECT_PROPERTY_NAME);
		toLoad.add(FOLDERS_EXTENSIONS_FILTER_PROPERTY_NAME);
		toLoad.add(FOLDERS_EXTENSIONS_REJECT_PROPERTY_NAME);
		toLoad.add(ANNOT_SOURCE_PROPERTY_NAME);
		toLoad.add(SOURCE_PROPERTY_NAME);
		props = PropertiesLoader.loadProperties(SERVICE_CONFIG_FILE, toLoad);
	}

	private FileFilter getFileFilterFromConfig() {
		// Extensions
		final boolean extRej;
		if (props.containsKey(EXTENSIONS_REJECT_PROPERTY_NAME))
			extRej = Boolean.parseBoolean(props.get(EXTENSIONS_REJECT_PROPERTY_NAME));
		else {
			logger.warn("Unable to load reject from config file. false will be used.");
			extRej = false;
		}
		List<String> ext = new ArrayList<String>();
		if (props.containsKey(EXTENSIONS_FILTER_PROPERTY_NAME)) {
			for (String string : props.get(EXTENSIONS_FILTER_PROPERTY_NAME).split(";")) {
				string = string.trim();
				if (!string.equals(""))
					ext.add(string);
			}
			if (ext.isEmpty())
				logger.info("Extensions is empty; from " + EXTENSIONS_FILTER_PROPERTY_NAME);
		} else {
			logger.warn("Unable to get extensions from " + EXTENSIONS_FILTER_PROPERTY_NAME);
		}

		// Folders
		final boolean folderRej;
		if (props.containsKey(FOLDERS_REJECT_PROPERTY_NAME))
			folderRej = Boolean.parseBoolean(props.get(FOLDERS_REJECT_PROPERTY_NAME));
		else {
			logger.warn("Unable to load reject from config file. false will be used.");
			folderRej = false;
		}
		List<String> folders = new ArrayList<String>();
		if (props.containsKey(FOLDERS_FILTER_PROPERTY_NAME)) {
			for (String string : props.get(FOLDERS_FILTER_PROPERTY_NAME).split(";")) {
				string = string.trim();
				if (!string.equals(""))
					folders.add(string);
			}
			if (folders.isEmpty())
				logger.info("Extensions is empty; from " + FOLDERS_FILTER_PROPERTY_NAME);
		} else {
			logger.warn("Unable to get extensions from " + FOLDERS_FILTER_PROPERTY_NAME);
		}

		// Folders extensions
		final boolean folderExtRej;
		if (props.containsKey(FOLDERS_EXTENSIONS_REJECT_PROPERTY_NAME))
			folderExtRej = Boolean.parseBoolean(props.get(FOLDERS_EXTENSIONS_REJECT_PROPERTY_NAME));
		else {
			logger.warn("Unable to load folder extension reject from config file. false will be used.");
			folderExtRej = false;
		}
		List<String> foldersExt = new ArrayList<String>();
		if (props.containsKey(FOLDERS_EXTENSIONS_FILTER_PROPERTY_NAME)) {
			for (String string : props.get(FOLDERS_EXTENSIONS_FILTER_PROPERTY_NAME).split(";")) {
				string = string.trim();
				if (!string.equals(""))
					foldersExt.add(string);
			}
			if (foldersExt.isEmpty())
				logger.info("Extensions is empty; from " + FOLDERS_EXTENSIONS_FILTER_PROPERTY_NAME);
		} else {
			logger.warn("Unable to get extensions from " + FOLDERS_EXTENSIONS_FILTER_PROPERTY_NAME);
		}

		return new FolderAndExtensionFilter(folders, folderRej, ext, extRej, foldersExt, folderExtRej);
	}
}
