/*
 * Decompiled with CFR 0.152.
 */
package org.ow2.weblab.crawler;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.content.binary.BinaryFolderContentManager;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.exception.WebLabUncheckedException;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.factory.ResourceFactory;
import org.weblab_project.core.helper.PoKHelperExtended;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.model.Document;
import org.weblab_project.core.model.PieceOfKnowledge;
import org.weblab_project.core.model.Resource;
import org.weblab_project.core.model.ResourceCollection;
import org.weblab_project.core.properties.PropertiesLoader;

public class FolderCrawler {
    protected final BinaryFolderContentManager contentManager;
    protected final File folder;
    protected final FileFilter fileFilter;
    protected final FileFilter folderFilter;
    protected final int bufferSize = 10000;
    protected final boolean recursiveMode;
    private List<Long> crawledFilesOffset = new ArrayList<Long>();
    static final String PERSISTENT_LIST_PREFIX = "FolderCrawlerList";
    private File persistentListOfFiles;
    private final byte[] lock = new byte[0];
    protected static final String CRAWLER_ID = "crawlerFolder";
    protected static final String CRAWLER_CONTENT_ID = "crawlerFolderContent";
    protected static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
    public static final String CONFIG_FILE = "FolderCrawler.config";
    public static final String EXPOSED_ROOT_PROPERTY_NAME = "exposedRoot";
    public static final String EXPOSED_AS_URI_PROPERTY_NAME = "exposedAsUri";
    protected final String exposedRoot;
    protected final String exposedAsUri;
    private static final Log LOG = LogFactory.getLog((String)"FolderCrawler");
    private static final FileFilter FOLDER_FILTER = new FileFilter(){

        @Override
        public boolean accept(File file) {
            return file.isDirectory();
        }
    };

    public FolderCrawler(BinaryFolderContentManager contentManager, File folder, FileFilter fileFilter, boolean recursiveMode, FileFilter folderFilter) throws WebLabCheckedException {
        if (contentManager.getFolder() == null) {
            throw new WebLabCheckedException("Content manager must be well defined.");
        }
        this.contentManager = contentManager;
        if (!folder.exists() || folder.isFile() || !folder.canRead()) {
            throw new WebLabCheckedException("Folder to crawl '" + folder.getAbsolutePath() + "' is unvalid.");
        }
        this.folder = folder;
        this.recursiveMode = recursiveMode;
        this.fileFilter = fileFilter;
        this.folderFilter = folderFilter;
        Map props = PropertiesLoader.loadProperties((String)CONFIG_FILE);
        this.exposedRoot = (String)props.get(EXPOSED_ROOT_PROPERTY_NAME);
        this.exposedAsUri = (String)props.get(EXPOSED_AS_URI_PROPERTY_NAME);
    }

    public FolderCrawler(BinaryFolderContentManager contentManager, File folder, FileFilter fileFilter, boolean recursiveMode) throws WebLabCheckedException {
        this(contentManager, folder, fileFilter, recursiveMode, FOLDER_FILTER);
    }

    public FolderCrawler(String internFolder, String folderToCrawl, FileFilter fileFilter) throws WebLabCheckedException {
        this(internFolder, folderToCrawl, fileFilter, false, FOLDER_FILTER);
    }

    public FolderCrawler(String internFolder, String folderToCrawl, FileFilter fileFilter, boolean recursiveMode) throws WebLabCheckedException {
        this(BinaryFolderContentManager.getInstance((String)internFolder), new File(folderToCrawl), fileFilter, recursiveMode, FOLDER_FILTER);
    }

    public FolderCrawler(String internFolder, String folderToCrawl, FileFilter fileFilter, boolean recursiveMode, FileFilter folderFilter) throws WebLabCheckedException {
        this(BinaryFolderContentManager.getInstance((String)internFolder), new File(folderToCrawl), fileFilter, recursiveMode, folderFilter);
    }

    public int getNbFiles() {
        return this.crawledFilesOffset.size();
    }

    public FolderCrawler(String internFolder, String folderToCrawl) throws WebLabCheckedException {
        this(internFolder, folderToCrawl, new FileFilter(){

            @Override
            public boolean accept(File file) {
                return file.exists() && file.isFile() && file.canRead();
            }
        });
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void startCrawl() {
        if (this.folder == null || this.fileFilter == null) {
            throw new WebLabUncheckedException("Folder to crawl and file filter should have been defined previously.");
        }
        byte[] byArray = this.lock;
        synchronized (this.lock) {
            if (this.persistentListOfFiles != null) {
                this.persistentListOfFiles.delete();
                this.crawledFilesOffset.clear();
            }
            try {
                this.persistentListOfFiles = File.createTempFile(PERSISTENT_LIST_PREFIX + Long.toString(System.currentTimeMillis()), null);
                this.persistentListOfFiles.deleteOnExit();
                BufferedWriter bw_l = new BufferedWriter(new FileWriter(this.persistentListOfFiles));
                int numberOfFiles_l = 0;
                this.listAndAddFiles(this.folder, 0L, bw_l, numberOfFiles_l);
                bw_l.close();
                LOG.trace((Object)"Offsets :");
                LOG.info((Object)(numberOfFiles_l + " were crawled"));
                for (Long long_l : this.crawledFilesOffset) {
                    LOG.trace((Object)long_l);
                }
            }
            catch (FileNotFoundException ex_l) {
                throw new WebLabUncheckedException("Impossible to write temporary file");
            }
            catch (IOException ex_l) {
                throw new WebLabUncheckedException("Impossible to write temporary file");
            }
            LOG.info((Object)(this.crawledFilesOffset.size() + " crawled files in FolderCrawler: " + this.toString()));
            LOG.debug((Object)("Crawled files: " + this.folder));
            return;
        }
    }

    protected long listAndAddFiles(File currentFolder, long offset_p, BufferedWriter persistantFile_p, int numberOfFiles_p) throws IOException {
        long offset_l = offset_p;
        int numberOfFiles_l = numberOfFiles_p;
        if (currentFolder.isDirectory()) {
            File[] filesFound_l = currentFolder.listFiles(this.fileFilter);
            LOG.debug((Object)("Add content of folder: " + currentFolder.getAbsolutePath()));
            if (filesFound_l != null) {
                for (File file_l : filesFound_l) {
                    if (!file_l.exists() || !file_l.isFile() || !file_l.canRead()) continue;
                    String filenameWithNewLine_l = file_l.getAbsolutePath();
                    persistantFile_p.write(filenameWithNewLine_l);
                    persistantFile_p.newLine();
                    this.crawledFilesOffset.add(offset_l);
                    offset_l += (long)(filenameWithNewLine_l.length() + System.getProperty("line.separator").length());
                    ++numberOfFiles_l;
                }
            }
            if (this.recursiveMode) {
                for (File dir : currentFolder.listFiles(this.folderFilter)) {
                    offset_l = this.listAndAddFiles(dir, offset_l, persistantFile_p, numberOfFiles_l);
                }
            }
        }
        return offset_l;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public ResourceCollection getCrawledDocuments(int offset, int limit) {
        int theOffset = offset;
        int theLimit = limit;
        byte[] byArray = this.lock;
        synchronized (this.lock) {
            long time = System.currentTimeMillis();
            ResourceCollection col = (ResourceCollection)ResourceFactory.createResource((String)CRAWLER_ID, (String)("tempCollection-" + time), ResourceCollection.class);
            if (this.crawledFilesOffset.isEmpty()) {
                LOG.warn((Object)("Either you haven't done a startCrawl before or folder (" + this.folder + ") was empty."));
                // ** MonitorExit[var5_5] (shouldn't be in output)
                return col;
            }
            if (theOffset >= this.crawledFilesOffset.size()) {
                LOG.warn((Object)"Every files have already been crawled.");
                // ** MonitorExit[var5_5] (shouldn't be in output)
                return col;
            }
            if (theOffset < 0) {
                LOG.warn((Object)"Offset was negative, 0 used instead.");
                theOffset = 0;
            }
            if (theLimit <= 0) {
                LOG.info((Object)"Limit was null or negative. Integer.MAX_VALUE will be used.");
                theLimit = Integer.MAX_VALUE;
            }
            long offsetInFile_l = this.crawledFilesOffset.get(theOffset);
            BufferedReader reader_l = null;
            int cpt_l = theOffset;
            try {
                reader_l = new BufferedReader(new FileReader(this.persistentListOfFiles));
                LOG.info((Object)("Skeeping " + offsetInFile_l + " characters fom file"));
                reader_l.skip(offsetInFile_l);
                ListIterator<Long> iterator_l = this.crawledFilesOffset.subList(theOffset, this.crawledFilesOffset.size()).listIterator();
                int numberOfFilesRetrieved_l = 0;
                while (iterator_l.hasNext() && numberOfFilesRetrieved_l < theLimit) {
                    String path;
                    iterator_l.next();
                    ++numberOfFilesRetrieved_l;
                    String filename_l = reader_l.readLine();
                    LOG.trace((Object)("Found URI : " + filename_l));
                    File file = new File(filename_l);
                    if (!(file.exists() && file.isFile() && file.canRead())) {
                        iterator_l.remove();
                        LOG.warn((Object)("File (" + file + ") is not crawlable"));
                        continue;
                    }
                    String contentUri = "weblab://crawlerFolderContent/" + time + "/" + cpt_l;
                    LOG.debug((Object)("Loading file: " + file.getAbsolutePath()));
                    try {
                        this.contentManager.saveFile(file, contentUri);
                    }
                    catch (WebLabCheckedException wlce) {
                        throw new WebLabUncheckedException("Unexpected error with content manager.", (Throwable)wlce);
                    }
                    try {
                        path = file.getCanonicalPath();
                    }
                    catch (IOException ioe) {
                        LOG.warn((Object)("Unable to get canonical path of file: " + file.getAbsolutePath() + "; absolute path will be used instead."));
                        path = file.getAbsolutePath();
                    }
                    Document document = (Document)ResourceFactory.createResource((String)CRAWLER_ID, (String)("file" + cpt_l), Document.class);
                    PoKHelperExtended helper = RDFHelperFactory.getPoKHelperExtended((PieceOfKnowledge)AnnotationFactory.createAndLinkAnnotation((Resource)document));
                    helper.createResStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasNativeContent", contentUri);
                    helper.createLitStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasGatheringDate", DATE_FORMAT.format(new Date()));
                    helper.createLitStat(document.getUri(), "http://purl.org/dc/terms/extent", file.length() + " bytes");
                    helper.createLitStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasOriginalFileSize", "" + file.length());
                    helper.createLitStat(document.getUri(), "http://purl.org/dc/terms/modified", DATE_FORMAT.format(new Date(file.lastModified())));
                    helper.createLitStat(document.getUri(), "http://purl.org/dc/elements/1.1/source", path);
                    helper.createLitStat(document.getUri(), "http://weblab-project.org/core/model/property/processing/hasOriginalFileName", file.getName());
                    if (this.exposedAsUri != null && this.exposedRoot != null && !this.exposedAsUri.isEmpty() && !this.exposedRoot.isEmpty()) {
                        helper.createLitStat(document.getUri(), this.exposedAsUri, this.exposedRoot + contentUri.hashCode());
                    }
                    helper.setNSPrefix("wlp", "http://weblab-project.org/core/model/property/processing/");
                    helper.setNSPrefix("dc", "http://purl.org/dc/elements/1.1/");
                    helper.setNSPrefix("dct", "http://purl.org/dc/terms/");
                    helper.commit();
                    col.getResource().add(document);
                    ++cpt_l;
                }
            }
            catch (IOException ex_l) {
                LOG.info((Object)"There was a problem with internal caching system, resources may be missing");
            }
            finally {
                if (reader_l != null) {
                    try {
                        reader_l.close();
                    }
                    catch (IOException ex_l) {
                        LOG.info((Object)"There was a problem with internal caching system, resources may be missing");
                    }
                }
            }
            LOG.info((Object)(this.crawledFilesOffset.size() - cpt_l + " files remaining in foldercrawler " + this.toString()));
            // ** MonitorExit[var5_5] (shouldn't be in output)
            return col;
        }
    }

    public String toString() {
        return "Folder to crawl: '" + this.folder.getAbsolutePath() + "'.";
    }
}

