/*
 * Decompiled with CFR 0.152.
 */
package org.ow2.weblab.service.language;

import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.annotation.PostConstruct;
import javax.jws.WebService;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.service.language.LanguageExtractionException;
import org.ow2.weblab.service.language.NGramProfilesPatched;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.helper.PoKHelper;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.model.Annotation;
import org.weblab_project.core.model.ComposedUnit;
import org.weblab_project.core.model.MediaUnit;
import org.weblab_project.core.model.PieceOfKnowledge;
import org.weblab_project.core.model.Resource;
import org.weblab_project.core.model.text.Text;
import org.weblab_project.core.properties.PropertiesLoader;
import org.weblab_project.core.util.ResourceUtil;
import org.weblab_project.services.analyser.Analyser;
import org.weblab_project.services.analyser.ProcessException;
import org.weblab_project.services.analyser.types.ProcessArgs;
import org.weblab_project.services.analyser.types.ProcessReturn;
import org.weblab_project.services.exception.WebLabException;

@WebService(endpointInterface="org.weblab_project.services.analyser.Analyser")
public class LanguageExtraction
implements Analyser {
    private static final String PROPERTY_FILE = "ngram.properties";
    private static final double DEFAULT_MIN_SINGLE_VALUE = 0.75;
    private static final double DEFAULT_MIN_MULTIPLE_VALUE = 0.15;
    private static final int DEFAULT_MAX_NB_VALUES = 1;
    private static final String MIN_SINGLE_VALUE = "minSingleValue";
    private static final String MIN_MULTIPLE_VALUE = "minMultipleValue";
    private static final String MAX_NB_VALUES = "maxNbValues";
    private static final String PROFILES_FOLDER_PATH = "profilesFolderPath";
    private static final String ADD_TOP_LEVEL_ANNOT = "addTopLevelAnnot";
    private static final String UNKNOWN = "UNKNOWN";
    private double minSingleValue;
    private double minMultipleValue;
    private int maxNbValues;
    private boolean addTopLevelAnnot;
    private NGramProfilesPatched ngps;

    @PostConstruct
    public void init() throws LanguageExtractionException {
        String addTopLevelAnnotP;
        String minMultipleValueP;
        Map props = PropertiesLoader.loadProperties((String)PROPERTY_FILE);
        String minSingleValueP = (String)props.get(MIN_SINGLE_VALUE);
        if (minSingleValueP != null && !minSingleValueP.isEmpty()) {
            try {
                this.minSingleValue = Double.parseDouble(minSingleValueP);
            }
            catch (NumberFormatException nfe) {
                LogFactory.getLog(this.getClass()).warn((Object)("Unable to parse double for minSingleValue property. Value was: '" + minSingleValueP + "'."));
                this.minSingleValue = 0.75;
            }
        } else {
            this.minSingleValue = 0.75;
        }
        if ((minMultipleValueP = (String)props.get(MIN_MULTIPLE_VALUE)) != null && !minMultipleValueP.isEmpty()) {
            try {
                this.minMultipleValue = Double.parseDouble(minMultipleValueP);
            }
            catch (NumberFormatException nfe) {
                LogFactory.getLog(this.getClass()).warn((Object)("Unable to parse double for minMultipleValue property. Value was: '" + minMultipleValueP + "'."));
                this.minMultipleValue = 0.15;
            }
        } else {
            this.minMultipleValue = 0.15;
        }
        if (this.minSingleValue < this.minMultipleValue) {
            LogFactory.getLog(this.getClass()).warn((Object)"minSingleValue was smaller than minMultipleValue. Use the two default value instead.");
            this.minSingleValue = 0.75;
            this.minMultipleValue = 0.15;
        }
        LogFactory.getLog(this.getClass()).debug((Object)("LanguageExtraction initialised with minSingleValue=" + this.minSingleValue));
        LogFactory.getLog(this.getClass()).debug((Object)("LanguageExtraction initialised with minMultipleValue=" + this.minMultipleValue));
        String maxNbValuesP = (String)props.get(MAX_NB_VALUES);
        if (maxNbValuesP != null && !maxNbValuesP.isEmpty()) {
            try {
                this.maxNbValues = Integer.parseInt(maxNbValuesP);
            }
            catch (NumberFormatException nfe) {
                LogFactory.getLog(this.getClass()).warn((Object)("Unable to parse double for maxNbValues property. Value was: '" + maxNbValuesP + "'."));
                this.maxNbValues = 1;
            }
        } else {
            this.maxNbValues = 1;
        }
        if (this.maxNbValues < 1) {
            LogFactory.getLog(this.getClass()).warn((Object)"maxNbValues was smaller than 1. Use the two default value instead.");
            this.maxNbValues = 1;
        }
        LogFactory.getLog(this.getClass()).debug((Object)("LanguageExtraction initialised with maxNbValues=" + this.maxNbValues));
        String profilesFolderPathP = (String)props.get(PROFILES_FOLDER_PATH);
        if (profilesFolderPathP != null && !profilesFolderPathP.isEmpty()) {
            File file = new File(profilesFolderPathP);
            if (!file.exists()) {
                LogFactory.getLog(this.getClass()).warn((Object)("File '" + file.getAbsolutePath() + "' does not exists. Creating LanguageExtraction with default configuration."));
                try {
                    this.ngps = new NGramProfilesPatched();
                }
                catch (IOException ioe) {
                    throw new LanguageExtractionException("Unable to create NGramProfilesPatched using default value.", (Throwable)ioe);
                }
            } else if (!file.canRead()) {
                LogFactory.getLog(this.getClass()).warn((Object)("File '" + file.getAbsolutePath() + "' is not readable. Creating LanguageExtraction with default configuration."));
                try {
                    this.ngps = new NGramProfilesPatched();
                }
                catch (IOException ioe) {
                    throw new LanguageExtractionException("Unable to create NGramProfilesPatched using default value.", (Throwable)ioe);
                }
            } else if (!file.isDirectory()) {
                LogFactory.getLog(this.getClass()).warn((Object)("File '" + file.getAbsolutePath() + "' is not a directory. Creating LanguageExtraction with default configuration."));
                try {
                    this.ngps = new NGramProfilesPatched();
                }
                catch (IOException ioe) {
                    throw new LanguageExtractionException("Unable to create NGramProfilesPatched using default value.", (Throwable)ioe);
                }
            } else {
                try {
                    this.ngps = new NGramProfilesPatched(file);
                }
                catch (IOException ioe) {
                    LogFactory.getLog(this.getClass()).warn((Object)("Unable to create NGramProfilesPatched using value of profilesFolderPath property. Value was: '" + file.getAbsolutePath() + "'. Try to create default one."), (Throwable)ioe);
                    try {
                        this.ngps = new NGramProfilesPatched();
                    }
                    catch (IOException ioe2) {
                        throw new LanguageExtractionException("Unable to create NGramProfilesPatched using default value.", (Throwable)ioe2);
                    }
                }
            }
        } else {
            try {
                this.ngps = new NGramProfilesPatched();
            }
            catch (IOException ioe) {
                throw new LanguageExtractionException("Unable to create NGramProfilesPatched using default value.", (Throwable)ioe);
            }
        }
        if (LogFactory.getLog(this.getClass()).isDebugEnabled()) {
            StringBuilder sb = new StringBuilder();
            sb.append("LanguageExtraction initialised with the following " + this.ngps.getProfileCount() + " language profiles: [");
            for (int p = 0; p < this.ngps.getProfileCount(); ++p) {
                sb.append(this.ngps.getProfileName(p));
                if (p < this.ngps.getProfileCount() - 1) {
                    sb.append(", ");
                    continue;
                }
                sb.append("]");
            }
            LogFactory.getLog(this.getClass()).debug((Object)sb.toString());
        }
        if ((addTopLevelAnnotP = (String)props.get(ADD_TOP_LEVEL_ANNOT)) != null && !addTopLevelAnnotP.isEmpty()) {
            this.addTopLevelAnnot = Boolean.parseBoolean(addTopLevelAnnotP);
        }
    }

    public ProcessReturn process(ProcessArgs processArgs) throws ProcessException {
        List texts = this.checkArgs(processArgs);
        boolean topLevelAnnot = this.addTopLevelAnnot && processArgs.getResource() instanceof ComposedUnit;
        StringBuilder sb = new StringBuilder();
        for (Text text : texts) {
            if (text.getContent() == null || text.getContent().isEmpty()) {
                LogFactory.getLog(this.getClass()).debug((Object)("Text '" + text.getUri() + "' has no content; ignored."));
                continue;
            }
            List profileToAnnotate = this.checkLanguage(text.getContent(), text.getUri());
            this.annotate((Resource)text, profileToAnnotate);
            if (!topLevelAnnot) continue;
            sb.append(text.getContent());
            sb.append("\n\n\n");
        }
        if (topLevelAnnot && sb.length() > 0) {
            ComposedUnit cu = (ComposedUnit)processArgs.getResource();
            List profileToAnnotate = this.checkLanguage(sb.toString(), cu.getUri());
            this.annotate((Resource)cu, profileToAnnotate);
        }
        ProcessReturn pr = new ProcessReturn();
        pr.setResource(processArgs.getResource());
        return pr;
    }

    private void annotate(Resource res, List<String> profileToAnnotate) {
        Annotation annot = AnnotationFactory.createAndLinkAnnotation((Resource)res);
        PoKHelper pokH = RDFHelperFactory.getPoKHelper((PieceOfKnowledge)annot);
        pokH.setAutoCommitMode(false);
        for (String language : profileToAnnotate) {
            pokH.createLitStat(res.getUri(), "http://purl.org/dc/elements/1.1/language", language);
        }
        pokH.commit();
    }

    private List<String> checkLanguage(String content, String uri) {
        int p;
        LinkedList<String> profileToAnnotate = new LinkedList<String>();
        NGramProfilesPatched.Ranker ranker = this.ngps.getRanker();
        ranker.account((CharSequence)content);
        NGramProfilesPatched.RankResult result = ranker.getRankResult();
        boolean warn = false;
        double bestScore = result.getScore(0);
        if (bestScore > this.minSingleValue) {
            profileToAnnotate.add(result.getName(0));
        } else if (bestScore < this.minMultipleValue) {
            profileToAnnotate.add(UNKNOWN);
            warn = true;
        } else {
            int max = Math.min(result.getLength(), this.maxNbValues);
            for (p = 0; p < max && result.getScore(p) >= this.minMultipleValue; ++p) {
                profileToAnnotate.add(result.getName(p));
            }
        }
        if (LogFactory.getLog(this.getClass()).isDebugEnabled() || warn) {
            StringBuilder sb = new StringBuilder();
            sb.append("Language detected for MediaUnit '" + uri + "' are: [");
            for (p = 0; p < result.getLength(); ++p) {
                sb.append(result.getName(p));
                sb.append(" - ");
                sb.append(result.getScore(p));
                if (p < result.getLength() - 1) {
                    sb.append(" --|-- ");
                    continue;
                }
                sb.append("]");
            }
            if (warn) {
                LogFactory.getLog(this.getClass()).warn((Object)sb.toString());
                LogFactory.getLog(this.getClass()).warn((Object)("Unable to identify language for MediaUnit '" + uri + "'; " + profileToAnnotate + " will be annotated."));
            } else {
                LogFactory.getLog(this.getClass()).debug((Object)sb.toString());
                LogFactory.getLog(this.getClass()).debug((Object)("Language to be annotated for MediaUnit '" + uri + "' are: " + profileToAnnotate));
            }
        }
        return profileToAnnotate;
    }

    private List<Text> checkArgs(ProcessArgs processArg) throws ProcessException {
        LinkedList<Text> texts;
        if (processArg == null) {
            throw new ProcessException("ProcessArgs was null.", this.createE1Exception());
        }
        Resource res = processArg.getResource();
        if (res == null) {
            throw new ProcessException("Resource in ProcessArgs was null.", this.createE1Exception());
        }
        if (!(res instanceof MediaUnit)) {
            throw new ProcessException("Resource in ProcessArgs was not an instance of MediaUnit but of '" + res.getClass().getCanonicalName() + "'.", this.createE1Exception());
        }
        if (res instanceof ComposedUnit) {
            texts = ResourceUtil.getSelectedSubResources((Resource)res, Text.class);
        } else if (res instanceof Text) {
            texts = new LinkedList<Text>();
            texts.add((Text)res);
        } else {
            throw new ProcessException("Resource in ProcessArgs was not neither an instance of ComposedUnit nor of Text but of '" + res.getClass().getCanonicalName() + "'.", this.createE1Exception());
        }
        return texts;
    }

    private WebLabException createE1Exception() {
        WebLabException wle = new WebLabException();
        wle.setErrorId("E1");
        wle.setErrorMessage("Invalid parameter");
        return wle;
    }
}

