/*
 * Decompiled with CFR 0.152.
 */
package org.ow2.weblab.services.duplicates;

import java.io.File;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import javax.jws.WebService;
import javax.servlet.ServletContext;
import javax.xml.ws.WebServiceContext;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.ow2.weblab.services.indexer.impl.SolrIndexer;
import org.ow2.weblab.services.searcher.impl.SolrSearcher;
import org.ow2.weblab.services.solr.SolrComponent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.helper.PoKHelper;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.helper.ResourceHelper;
import org.weblab_project.core.model.Annotation;
import org.weblab_project.core.model.MediaUnit;
import org.weblab_project.core.model.PieceOfKnowledge;
import org.weblab_project.core.properties.PropertiesLoader;
import org.weblab_project.services.analyser.Analyser;
import org.weblab_project.services.analyser.ProcessException;
import org.weblab_project.services.analyser.types.ProcessArgs;
import org.weblab_project.services.analyser.types.ProcessReturn;
import org.weblab_project.services.exception.WebLabException;

/*
 * Exception performing whole class analysis ignored.
 */
@WebService(endpointInterface="org.weblab_project.services.analyser.Analyser")
public class DuplicatesDetectorService
implements Analyser {
    private static final Logger logger = LoggerFactory.getLogger(DuplicatesDetectorService.class);
    @Resource
    protected WebServiceContext wsContext;
    private SolrComponent comp;
    protected static final String CONFIG_FILE = "duplicates-detector.config";
    protected static final String SIMILARITY_LIMIT_PROPERTY = "similarityLimit";
    protected static Map<String, String> props;

    public ProcessReturn process(ProcessArgs args) throws ProcessException {
        MediaUnit unit = DuplicatesDetectorService.checkArgs((ProcessArgs)args);
        logger.info("Process method of DuplicatesDetectorService called for Document: " + unit.getUri());
        boolean docIsAlreadyPresent = false;
        this.comp = SolrComponent.getInstance();
        try {
            this.comp.open(this.getWebAppPath());
            docIsAlreadyPresent = this.testDuplicateWithSourceProperty(unit);
            if (!docIsAlreadyPresent) {
                docIsAlreadyPresent = this.testDuplicateWithSolrMoreLikeThisQuery(unit);
            }
            if (docIsAlreadyPresent) {
                logger.info("Document (" + unit.getUri() + ") is a duplicate.");
                Annotation annot = AnnotationFactory.createAndLinkAnnotation((org.weblab_project.core.model.Resource)unit);
                PoKHelper pokHlp = RDFHelperFactory.getPoKHelper((PieceOfKnowledge)annot);
                pokHlp.createLitStat(unit.getUri(), "http://weblab-project.org/core/model/property/processing/canBeIgnored", "true");
            } else {
                logger.info("Document (" + unit.getUri() + ") is a new document.");
            }
        }
        catch (WebLabCheckedException e) {
            WebLabException exp = new WebLabException();
            exp.setErrorId("E0");
            exp.setErrorMessage("Unexpected error");
            throw new ProcessException("Error when calling solr index.", exp, (Throwable)e);
        }
        ProcessReturn pr = new ProcessReturn();
        pr.setResource((org.weblab_project.core.model.Resource)unit);
        return pr;
    }

    private boolean testDuplicateWithSourceProperty(MediaUnit unit) throws WebLabCheckedException {
        boolean docIsAlreadyPresent = false;
        String textToCompare = DuplicatesDetectorService.removeAllBreakLinesAndMultipleSpaces((String)SolrComponent.extractTextFromResource((org.weblab_project.core.model.Resource)unit));
        if (props == null) {
            this.loadProps();
        }
        Float similarityLimit = Float.valueOf(Float.parseFloat((String)props.get("similarityLimit")));
        ResourceHelper helper = RDFHelperFactory.getResourceHelper((org.weblab_project.core.model.Resource)unit);
        List sourceProperties = helper.getLitsOnPredSubj(unit.getUri(), "http://purl.org/dc/elements/1.1/source");
        if (sourceProperties != null && sourceProperties.size() > 0) {
            StringBuilder sourceQuery = new StringBuilder();
            for (String sourceProperty : sourceProperties) {
                sourceQuery.append("source:(\"" + ClientUtils.escapeQueryChars((String)sourceProperty) + "\") OR ");
            }
            sourceQuery.delete(sourceQuery.lastIndexOf(" OR "), sourceQuery.length() - 1);
            QueryResponse qr = this.comp.search(sourceQuery.toString(), 0, 10);
            if (qr.getResults() != null && qr.getResults().size() > 0) {
                for (SolrDocument hit : qr.getResults()) {
                    String hitText = DuplicatesDetectorService.removeAllBreakLinesAndMultipleSpaces((String)String.valueOf(hit.getFieldValue("text")));
                    float similarity = DuplicatesDetectorService.getLevenshteinSimilarity((String)textToCompare, (String)hitText);
                    if (similarity > similarityLimit.floatValue()) {
                        logger.info("Similar document found from source property (" + similarity + "% of text similarity): " + String.valueOf(hit.getFieldValue("id")));
                        docIsAlreadyPresent = true;
                        continue;
                    }
                    logger.info("Document with same source property found but similiraty (" + similarity + "%) is lower than limit (" + similarityLimit + "%): " + String.valueOf(hit.getFieldValue("id")));
                }
            } else {
                logger.info("No documents with same source property found.");
            }
        }
        return docIsAlreadyPresent;
    }

    private boolean testDuplicateWithSolrMoreLikeThisQuery(MediaUnit unit) throws WebLabCheckedException {
        boolean docIsAlreadyPresent = false;
        String textToCompare = DuplicatesDetectorService.removeAllBreakLinesAndMultipleSpaces((String)SolrComponent.extractTextFromResource((org.weblab_project.core.model.Resource)unit));
        if (props == null) {
            this.loadProps();
        }
        Float similarityLimit = Float.valueOf(Float.parseFloat((String)props.get("similarityLimit")));
        this.comp.addDocument((org.weblab_project.core.model.Resource)unit);
        this.comp.flushIndexBuffer();
        QueryResponse qr = this.comp.moreLikeThis("id:(\"" + unit.getUri() + "\")");
        if (qr.getResults() != null && qr.getResults().size() > 0) {
            for (SolrDocument hit : qr.getResults()) {
                String hitText = DuplicatesDetectorService.removeAllBreakLinesAndMultipleSpaces((String)String.valueOf(hit.getFieldValue("text")));
                float similarity = DuplicatesDetectorService.getLevenshteinSimilarity((String)textToCompare, (String)hitText);
                if (similarity > similarityLimit.floatValue()) {
                    logger.info("Similar document found from \"moreLikeThis\" query (" + similarity + "% of text similarity): " + String.valueOf(hit.getFieldValue("id")));
                    docIsAlreadyPresent = true;
                    continue;
                }
                logger.info("Document found in index but similiraty (" + similarity + "%) is lower than limit (" + similarityLimit + "%): " + String.valueOf(hit.getFieldValue("id")));
            }
        } else {
            logger.info("No documents found with \"More like this\" query.");
        }
        return docIsAlreadyPresent;
    }

    private static float getLevenshteinSimilarity(String s1, String s2) {
        int nbOfChanges = StringUtils.getLevenshteinDistance((String)s1, (String)s2);
        int nbOfCharacters = Math.max(s1.length(), s2.length());
        float similarity = 100.0f - (float)nbOfChanges / new Float(nbOfCharacters).floatValue() * 100.0f;
        return similarity;
    }

    private static MediaUnit checkArgs(ProcessArgs args) throws ProcessException {
        WebLabException wle = new WebLabException();
        wle.setErrorId("E1");
        wle.setErrorMessage("Invalid parameter");
        if (args == null) {
            LogFactory.getLog(SolrIndexer.class).error((Object)"IndexArgs was null.");
            throw new ProcessException("IndexArgs was null.", wle);
        }
        if (args.getResource() == null) {
            LogFactory.getLog(SolrIndexer.class).error((Object)"Args must contain a non-null Resource to index");
            throw new ProcessException("Args must contain a non-null Resource to index", wle);
        }
        if (!(args.getResource() instanceof MediaUnit)) {
            LogFactory.getLog(SolrIndexer.class).error((Object)"Resource to index as not a MediaUnit.");
            throw new ProcessException("Resource to index as not a MediaUnit.", wle);
        }
        return (MediaUnit)args.getResource();
    }

    private static String removeAllBreakLinesAndMultipleSpaces(String textToProcess) {
        return textToProcess.replaceAll("[\\s\\xA0]+", " ");
    }

    private String getWebAppPath() throws WebLabCheckedException {
        String appPath = "./src/main/resources";
        if (this.wsContext != null) {
            ServletContext ctx = (ServletContext)this.wsContext.getMessageContext().get((Object)"javax.xml.ws.servlet.context");
            appPath = ctx.getRealPath("WEB-INF/classes");
        } else {
            LogFactory.getLog(SolrSearcher.class).warn((Object)"Webservice context not available returning current local path as default web app path");
        }
        File f = new File(appPath);
        if (!f.exists()) {
            throw new WebLabCheckedException("Webapp path [" + appPath + "] does not exists...");
        }
        return appPath;
    }

    private void loadProps() {
        props = PropertiesLoader.loadProperties((String)"duplicates-detector.config");
    }

    public WebServiceContext getWsContext() {
        return this.wsContext;
    }

    public void setWsContext(WebServiceContext wsContext) {
        this.wsContext = wsContext;
    }
}

