/*
 * Decompiled with CFR 0.152.
 */
package org.ow2.weblab.crawler;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.content.api.ContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.exception.WebLabUncheckedException;
import org.ow2.weblab.core.extended.factory.ResourceFactory;
import org.ow2.weblab.core.model.ComposedResource;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.processing.WProcessingAnnotator;
import org.purl.dc.elements.DublinCoreAnnotator;
import org.purl.dc.terms.DCTermsAnnotator;

public class FolderCrawler {
    protected final ContentManager contentManager;
    protected final File folder;
    protected final FileFilter fileFilter;
    protected final FileFilter folderFilter;
    protected final int bufferSize = 10000;
    protected final boolean recursiveMode;
    private final List<File> crawledFiles = new ArrayList<File>();
    private final byte[] lock = new byte[0];
    protected static final String CRAWLER_ID = "crawlerFolder";
    protected static final String CRAWLER_CONTENT_ID = "crawlerFolderContent";
    private static final Log LOG = LogFactory.getLog(FolderCrawler.class);
    private static final FileFilter FOLDER_FILTER = new FileFilter(){

        @Override
        public boolean accept(File file) {
            return file.isDirectory();
        }
    };

    public FolderCrawler(ContentManager contentManager, File folder, FileFilter fileFilter, boolean recursiveMode, FileFilter folderFilter) throws WebLabCheckedException {
        if (contentManager == null) {
            throw new WebLabCheckedException("Content manager must be well instanciated.");
        }
        this.contentManager = contentManager;
        if (!folder.exists() || folder.isFile() || !folder.canRead()) {
            throw new WebLabCheckedException("Folder to crawl '" + folder.getAbsolutePath() + "' is unvalid.");
        }
        this.folder = folder;
        this.recursiveMode = recursiveMode;
        this.fileFilter = fileFilter;
        this.folderFilter = folderFilter;
    }

    public FolderCrawler(ContentManager contentManager, File folder, FileFilter fileFilter, boolean recursiveMode) throws WebLabCheckedException {
        this(contentManager, folder, fileFilter, recursiveMode, FOLDER_FILTER);
    }

    public FolderCrawler(String folderToCrawl, FileFilter fileFilter) throws WebLabCheckedException {
        this(folderToCrawl, fileFilter, false, FOLDER_FILTER);
    }

    public FolderCrawler(String folderToCrawl, FileFilter fileFilter, boolean recursiveMode) throws WebLabCheckedException {
        this(ContentManager.getInstance(), new File(folderToCrawl), fileFilter, recursiveMode, FOLDER_FILTER);
    }

    public FolderCrawler(String folderToCrawl, FileFilter fileFilter, boolean recursiveMode, FileFilter folderFilter) throws WebLabCheckedException {
        this(ContentManager.getInstance(), new File(folderToCrawl), fileFilter, recursiveMode, folderFilter);
    }

    public FolderCrawler(String folderToCrawl) throws WebLabCheckedException {
        this(folderToCrawl, new FileFilter(){

            @Override
            public boolean accept(File file) {
                return file.isFile();
            }
        });
    }

    public int getNbFiles() {
        return this.crawledFiles.size();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void startCrawl() {
        if (this.folder == null || this.fileFilter == null) {
            throw new WebLabUncheckedException("Folder to crawl and file filter should have been defined previously.");
        }
        byte[] byArray = this.lock;
        synchronized (this.lock) {
            this.listAndAddFiles(this.folder);
            // ** MonitorExit[var1_1] (shouldn't be in output)
            LOG.info((Object)(this.crawledFiles.size() + " crawled files in FolderCrawler: " + this.toString()));
            LOG.debug((Object)("Crawled files: " + this.folder));
            return;
        }
    }

    protected void listAndAddFiles(File newFolder) {
        if (newFolder.isDirectory()) {
            LOG.debug((Object)("Add content of folder: " + newFolder.getAbsolutePath()));
            boolean debug = LOG.isDebugEnabled();
            for (File file : newFolder.listFiles(this.fileFilter)) {
                if (this.crawledFiles.contains(file)) continue;
                if (debug) {
                    LOG.trace((Object)("Add file: " + file.getAbsolutePath()));
                }
                this.crawledFiles.add(file);
            }
            if (this.recursiveMode) {
                for (File dir : newFolder.listFiles(this.folderFilter)) {
                    this.listAndAddFiles(dir);
                }
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public ComposedResource getCrawledDocuments(int offset, int limit) {
        int theOffset = offset;
        int theLimit = limit;
        byte[] byArray = this.lock;
        synchronized (this.lock) {
            long time = System.currentTimeMillis();
            ComposedResource col = (ComposedResource)ResourceFactory.createResource((String)CRAWLER_ID, (String)("tempCollection-" + time), ComposedResource.class);
            if (this.crawledFiles.isEmpty()) {
                LOG.warn((Object)("Either you haven't done a startCrawl before or folder (" + this.folder + ") was empty."));
                // ** MonitorExit[var5_5] (shouldn't be in output)
                return col;
            }
            if (theOffset >= this.crawledFiles.size()) {
                LOG.warn((Object)"Every files have already been crawled.");
                // ** MonitorExit[var5_5] (shouldn't be in output)
                return col;
            }
            if (theOffset < 0) {
                LOG.warn((Object)"Offset was negative, 0 used instead.");
                theOffset = 0;
            }
            if (theLimit <= 0) {
                LOG.info((Object)"Limit was null or negative. Integer.MAX_VALUE will be used.");
                theLimit = Integer.MAX_VALUE;
            }
            int cpt = theOffset;
            boolean toContinue = true;
            do {
                if (cpt < this.crawledFiles.size()) {
                    File file = this.crawledFiles.get(cpt);
                    if (!(file.exists() && file.isFile() && file.canRead())) {
                        this.crawledFiles.remove(cpt);
                        LOG.warn((Object)("File (" + file + ") is not crawlable"));
                        continue;
                    }
                    Document document = (Document)ResourceFactory.createResource((String)CRAWLER_ID, (String)("file" + cpt), Document.class);
                    LOG.debug((Object)("Loading file: " + file.getAbsolutePath()));
                    try {
                        this.contentManager.writeNativeContent((InputStream)new FileInputStream(file), (Resource)document);
                    }
                    catch (WebLabCheckedException wlce) {
                        throw new WebLabUncheckedException("Unexpected error with content manager.", (Throwable)wlce);
                    }
                    catch (FileNotFoundException e) {
                        throw new WebLabUncheckedException("Cannot create an InputStream on file [" + file + "].", (Throwable)e);
                    }
                    this.writeWeblabAnnotations(document, file);
                    col.getResource().add(document);
                    if (++cpt - theOffset < theLimit) continue;
                    toContinue = false;
                    continue;
                }
                toContinue = false;
            } while (toContinue);
            LOG.info((Object)(this.crawledFiles.size() - cpt + " files remaining in foldercrawler " + this.toString()));
            // ** MonitorExit[var5_5] (shouldn't be in output)
            return col;
        }
    }

    protected void writeWeblabAnnotations(Document document, File file) {
        String path;
        try {
            path = file.getCanonicalPath();
        }
        catch (IOException ioe) {
            LOG.warn((Object)("Unable to get canonical path of file: " + file.getAbsolutePath() + "; absolute path will be used instead."));
            path = file.getAbsolutePath();
        }
        WProcessingAnnotator wpa = new WProcessingAnnotator((Resource)document);
        wpa.writeGatheringDate(new Date());
        wpa.writeOriginalFileName(file.getName());
        wpa.writeOriginalFileSize(Long.valueOf(file.length()));
        DublinCoreAnnotator dca = new DublinCoreAnnotator((Resource)document);
        dca.writeSource(path);
        DCTermsAnnotator dcta = new DCTermsAnnotator((Resource)document);
        dcta.writeExtent(file.length() + " bytes");
        dcta.writeModified(new Date(file.lastModified()));
    }

    public String toString() {
        return "Folder to crawl: '" + this.folder.getAbsolutePath() + "'.";
    }
}

