/*
 * Decompiled with CFR 0.152.
 */
package org.ow2.weblab.crawler;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.content.binary.BinaryFolderContentManager;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.exception.WebLabUncheckedException;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.factory.ResourceFactory;
import org.weblab_project.core.helper.PoKHelperExtended;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.model.Document;
import org.weblab_project.core.model.PieceOfKnowledge;
import org.weblab_project.core.model.Resource;
import org.weblab_project.core.model.ResourceCollection;
import org.weblab_project.core.properties.PropertiesLoader;

public class FolderCrawler {
    protected final BinaryFolderContentManager contentManager;
    protected final File folder;
    protected final FileFilter fileFilter;
    protected final FileFilter folderFilter;
    protected final int bufferSize = 10000;
    protected final boolean recursiveMode;
    private final List<File> crawledFiles = new ArrayList<File>();
    private final byte[] lock = new byte[0];
    protected static final String CRAWLER_ID = "crawlerFolder";
    protected static final String CRAWLER_CONTENT_ID = "crawlerFolderContent";
    protected static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
    public static final String CONFIG_FILE = "FolderCrawler.config";
    public static final String EXPOSED_ROOT_PROPERTY_NAME = "exposedRoot";
    public static final String EXPOSED_AS_URI_PROPERTY_NAME = "exposedAsUri";
    public static Set<String> propKeys = new HashSet<String>();
    protected String exposedRoot;
    protected String exposedAsUri;
    private static final Log LOG;
    private static final FileFilter FOLDER_FILTER;

    public FolderCrawler(BinaryFolderContentManager contentManager, File folder, FileFilter fileFilter, boolean recursiveMode, FileFilter folderFilter) throws WebLabCheckedException {
        if (contentManager.getFolder() == null) {
            throw new WebLabCheckedException("Content manager must be well defined.");
        }
        this.contentManager = contentManager;
        if (!folder.exists() || folder.isFile() || !folder.canRead()) {
            throw new WebLabCheckedException("Folder to crawl '" + folder.getAbsolutePath() + "' is unvalid.");
        }
        this.folder = folder;
        this.recursiveMode = recursiveMode;
        this.fileFilter = fileFilter;
        this.folderFilter = folderFilter;
        Map props = PropertiesLoader.loadProperties((String)CONFIG_FILE, propKeys);
        this.exposedRoot = (String)props.get(EXPOSED_ROOT_PROPERTY_NAME);
        this.exposedAsUri = (String)props.get(EXPOSED_AS_URI_PROPERTY_NAME);
    }

    public FolderCrawler(BinaryFolderContentManager contentManager, File folder, FileFilter fileFilter, boolean recursiveMode) throws WebLabCheckedException {
        this(contentManager, folder, fileFilter, recursiveMode, FOLDER_FILTER);
    }

    public FolderCrawler(String internFolder, String folderToCrawl, FileFilter fileFilter) throws WebLabCheckedException {
        this(internFolder, folderToCrawl, fileFilter, false, FOLDER_FILTER);
    }

    public FolderCrawler(String internFolder, String folderToCrawl, FileFilter fileFilter, boolean recursiveMode) throws WebLabCheckedException {
        this(BinaryFolderContentManager.getInstance((String)internFolder), new File(folderToCrawl), fileFilter, recursiveMode, FOLDER_FILTER);
    }

    public FolderCrawler(String internFolder, String folderToCrawl, FileFilter fileFilter, boolean recursiveMode, FileFilter folderFilter) throws WebLabCheckedException {
        this(BinaryFolderContentManager.getInstance((String)internFolder), new File(folderToCrawl), fileFilter, recursiveMode, folderFilter);
    }

    public int getNbFiles() {
        return this.crawledFiles.size();
    }

    public FolderCrawler(String internFolder, String folderToCrawl) throws WebLabCheckedException {
        this(internFolder, folderToCrawl, new FileFilter(){

            @Override
            public boolean accept(File file) {
                return file.isFile();
            }
        });
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void startCrawl() {
        if (this.folder == null || this.fileFilter == null) {
            throw new WebLabUncheckedException("Folder to crawl and file filter should have been defined previously.");
        }
        byte[] byArray = this.lock;
        synchronized (this.lock) {
            this.listAndAddFiles(this.folder);
            // ** MonitorExit[var1_1] (shouldn't be in output)
            LOG.info((Object)(this.crawledFiles.size() + " crawled files in FolderCrawler: " + this.toString()));
            LOG.debug((Object)("Crawled files: " + this.folder));
            return;
        }
    }

    protected void listAndAddFiles(File newFolder) {
        if (newFolder.isDirectory()) {
            LOG.debug((Object)("Add content of folder: " + newFolder.getAbsolutePath()));
            boolean debug = LOG.isDebugEnabled();
            for (File file : newFolder.listFiles(this.fileFilter)) {
                if (this.crawledFiles.contains(file)) continue;
                if (debug) {
                    LOG.trace((Object)("Add file: " + file.getAbsolutePath()));
                }
                this.crawledFiles.add(file);
            }
            if (this.recursiveMode) {
                for (File dir : newFolder.listFiles(this.folderFilter)) {
                    this.listAndAddFiles(dir);
                }
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public ResourceCollection getCrawledDocuments(int offset, int limit) {
        int theOffset = offset;
        int theLimit = limit;
        byte[] byArray = this.lock;
        synchronized (this.lock) {
            long time = System.currentTimeMillis();
            ResourceCollection col = (ResourceCollection)ResourceFactory.createResource((String)CRAWLER_ID, (String)("tempCollection-" + time), ResourceCollection.class);
            if (this.crawledFiles.isEmpty()) {
                LOG.warn((Object)("Either you haven't done a startCrawl before or folder (" + this.folder + ") was empty."));
                // ** MonitorExit[var5_5] (shouldn't be in output)
                return col;
            }
            if (theOffset >= this.crawledFiles.size()) {
                LOG.warn((Object)"Every files have already been crawled.");
                // ** MonitorExit[var5_5] (shouldn't be in output)
                return col;
            }
            if (theOffset < 0) {
                LOG.warn((Object)"Offset was negative, 0 used instead.");
                theOffset = 0;
            }
            if (theLimit <= 0) {
                LOG.info((Object)"Limit was null or negative. Integer.MAX_VALUE will be used.");
                theLimit = Integer.MAX_VALUE;
            }
            int cpt = theOffset;
            boolean toContinue = true;
            do {
                if (cpt < this.crawledFiles.size()) {
                    String path;
                    File file = this.crawledFiles.get(cpt);
                    if (!(file.exists() && file.isFile() && file.canRead())) {
                        this.crawledFiles.remove(cpt);
                        LOG.warn((Object)("File (" + file + ") is not crawlable"));
                        continue;
                    }
                    String contentUri = "weblab://crawlerFolderContent/" + time + "/" + cpt;
                    LOG.debug((Object)("Loading file: " + file.getAbsolutePath()));
                    try {
                        this.contentManager.saveFile(file, contentUri);
                    }
                    catch (WebLabCheckedException wlce) {
                        throw new WebLabUncheckedException("Unexpected error with content manager.", (Throwable)wlce);
                    }
                    try {
                        path = file.getCanonicalPath();
                    }
                    catch (IOException ioe) {
                        LOG.warn((Object)("Unable to get canonical path of file: " + file.getAbsolutePath() + "; absolute path will be used instead."));
                        path = file.getAbsolutePath();
                    }
                    Document document = (Document)ResourceFactory.createResource((String)CRAWLER_ID, (String)("file" + cpt), Document.class);
                    PoKHelperExtended helper = RDFHelperFactory.getPoKHelperExtended((PieceOfKnowledge)AnnotationFactory.createAndLinkAnnotation((Resource)document));
                    helper.createResStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasNativeContent", contentUri);
                    helper.createLitStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasGatheringDate", DATE_FORMAT.format(new Date()));
                    helper.createLitStat(document.getUri(), "http://purl.org/dc/terms/extent", file.length() + " bytes");
                    helper.createLitStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasOriginalFileSize", "" + file.length());
                    helper.createLitStat(document.getUri(), "http://purl.org/dc/terms/modified", DATE_FORMAT.format(new Date(file.lastModified())));
                    helper.createLitStat(document.getUri(), "http://purl.org/dc/elements/1.1/source", path);
                    helper.createLitStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasOriginalFileName", file.getName());
                    helper.createLitStat(document.getUri(), this.exposedAsUri, this.exposedRoot + contentUri.hashCode());
                    helper.setNSPrefix("wlp", "http://weblab-project.org/core/model/property/processing/");
                    helper.setNSPrefix("dc", "http://purl.org/dc/elements/1.1/");
                    helper.setNSPrefix("dct", "http://purl.org/dc/terms/");
                    helper.commit();
                    col.getResource().add(document);
                    if (++cpt - theOffset < theLimit) continue;
                    toContinue = false;
                    continue;
                }
                toContinue = false;
            } while (toContinue);
            LOG.info((Object)(this.crawledFiles.size() - cpt + " files remaining in foldercrawler " + this.toString()));
            // ** MonitorExit[var5_5] (shouldn't be in output)
            return col;
        }
    }

    public String toString() {
        return "Folder to crawl: '" + this.folder.getAbsolutePath() + "'.";
    }

    static {
        propKeys.add(EXPOSED_ROOT_PROPERTY_NAME);
        propKeys.add(EXPOSED_AS_URI_PROPERTY_NAME);
        LOG = LogFactory.getLog(FolderCrawler.class);
        FOLDER_FILTER = new FileFilter(){

            @Override
            public boolean accept(File file) {
                return file.isDirectory();
            }
        };
    }
}

