/*
 * Decompiled with CFR 0.152.
 */
package org.imixs.ml.service;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.ejb.Stateless;
import javax.enterprise.event.Event;
import javax.inject.Inject;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import org.imixs.melman.RestAPIException;
import org.imixs.melman.WorkflowClient;
import org.imixs.ml.core.MLClient;
import org.imixs.ml.core.MLConfig;
import org.imixs.ml.core.MLContentBuilder;
import org.imixs.ml.core.MLEntity;
import org.imixs.ml.core.MLTrainingResult;
import org.imixs.ml.events.EntityObjectEvent;
import org.imixs.ml.service.TikaHelperService;
import org.imixs.ml.training.TrainingDataBuilder;
import org.imixs.ml.xml.XMLAnalyseResult;
import org.imixs.ml.xml.XMLTrainingData;
import org.imixs.workflow.FileData;
import org.imixs.workflow.ItemCollection;
import org.imixs.workflow.exceptions.PluginException;

@Stateless
public class TrainingService {
    private static Logger logger = Logger.getLogger(TrainingService.class.getName());
    public static final String FILE_ATTRIBUTE_TEXT = "text";
    public static final String ITEM_ML_ITEMS = "ml.items";
    public static final String ITEM_ML_DEFINITIONS = "ml.definitions";
    @Inject
    TikaHelperService tikaService;
    @Inject
    protected Event<EntityObjectEvent> entityObjectEvents;

    public MLTrainingResult trainWorkitemData(ItemCollection config, ItemCollection workitem, WorkflowClient workflowClient) {
        String _FilenamePattern;
        boolean debug = logger.isLoggable(Level.FINE);
        int qualityResult = -1;
        MLTrainingResult trainingResult = null;
        Pattern mlFilenamePattern = null;
        logger.info("=======================START ======================================");
        logger.info("...create new training data for: " + workitem.getUniqueID());
        String model = config.getItemValueString("ml.training.model");
        String mlOptions = config.getItemValueString("ml.options");
        String mlOCR = config.getItemValueString("ml.training.ocr");
        List trainingItemNames = config.getItemValue("workflow.entities");
        List tikaOptions = config.getItemValue("tika.options");
        String qualityLevel = config.getItemValueString("ml.training.quality");
        if (qualityLevel.isEmpty()) {
            qualityLevel = "LOW";
        }
        if ((_FilenamePattern = config.getItemValueString("ml.training.filepattern")) != null && !_FilenamePattern.isEmpty()) {
            logger.fine("......apply filename.pattern=" + _FilenamePattern);
            mlFilenamePattern = Pattern.compile(_FilenamePattern);
        }
        List sLocales = config.getItemValue("workflow.locale");
        ArrayList<Locale> locals = new ArrayList<Locale>();
        for (String _locale : sLocales) {
            Locale aLocale = null;
            if (_locale.contains("_")) {
                String[] localParts = _locale.split("_");
                if (localParts.length >= 1) {
                    aLocale = new Locale(localParts[0], localParts[1]);
                } else {
                    logger.warning("Wrong Locale Configuration: " + _locale);
                }
            } else {
                aLocale = new Locale(_locale);
            }
            if (aLocale == null) continue;
            locals.add(aLocale);
            if (!debug) continue;
            logger.finest("......suporting locale " + aLocale);
        }
        try {
            String ocrText;
            if ("true".equalsIgnoreCase(mlOCR)) {
                workitem = this.doVerifyOCRContent(workitem, mlFilenamePattern, workflowClient, tikaOptions);
            }
            if ((ocrText = new MLContentBuilder(workitem, null, false, mlFilenamePattern).build()) == null || ocrText.isEmpty()) {
                logger.severe("...document '" + workitem.getUniqueID() + "' No text found!");
                logger.info("=======================FINISHED====================================");
                return new MLTrainingResult(0, null);
            }
            logger.fine("extracted text content to be analysed=");
            logger.fine(ocrText);
            XMLTrainingData trainingData = this.generateTraingDataSet(ocrText, workitem, trainingItemNames, locals);
            qualityResult = trainingData.getQuality();
            if (0 == trainingData.getQuality()) {
                logger.severe("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=BAD - document will be ignored!");
            } else if (4 == trainingData.getQuality() && "GOOD".equalsIgnoreCase(qualityLevel)) {
                logger.severe("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LOW but GOOD is required - document will be ignored!");
                qualityResult = 0;
            } else {
                logger.info("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=" + qualityResult + "...");
            }
            if (qualityResult != 0) {
                if (debug) {
                    this.printXML(trainingData);
                }
                String serviceEndpoint = config.getItemValueString("ml.training.endpoint");
                MLClient mlClient = new MLClient(serviceEndpoint);
                String resultData = mlClient.postTrainingData(trainingData, model, mlOptions);
                trainingResult = new MLTrainingResult(qualityResult, resultData);
            } else {
                trainingResult = new MLTrainingResult(qualityResult, null);
            }
        }
        catch (RestAPIException | PluginException e1) {
            logger.severe("Error parsing documents: " + e1.getMessage());
        }
        logger.info("=======================FINISHED====================================");
        return trainingResult;
    }

    public XMLAnalyseResult analyzeWorkitemData(ItemCollection config, ItemCollection workitem, WorkflowClient workflowClient) {
        logger.info("......anaysing: " + workitem.getUniqueID());
        Pattern mlFilenamePattern = null;
        List tikaOptions = config.getItemValue("tika.options");
        String serviceEndpoint = config.getItemValueString("ml.validation.endpoint");
        String model = config.getItemValueString("ml.validation.model");
        String mlOCR = config.getItemValueString("ml.validation.ocr");
        String _FilenamePattern = config.getItemValueString("ml.validation.filepattern");
        if (_FilenamePattern != null && !_FilenamePattern.isEmpty()) {
            logger.info("......apply filename.pattern=" + _FilenamePattern);
            mlFilenamePattern = Pattern.compile(_FilenamePattern);
        }
        try {
            String ocrText;
            if ("true".equalsIgnoreCase(mlOCR)) {
                workitem = this.doVerifyOCRContent(workitem, mlFilenamePattern, workflowClient, tikaOptions);
            }
            if ((ocrText = new MLContentBuilder(workitem, null, false, mlFilenamePattern).build()) != null && !ocrText.isEmpty()) {
                MLClient mlClient = new MLClient(serviceEndpoint);
                XMLAnalyseResult result = mlClient.postAnalyseData(ocrText, model);
                return result;
            }
        }
        catch (RestAPIException | PluginException e1) {
            logger.severe("Error parsing documents: " + e1.getMessage());
        }
        return null;
    }

    public MLTrainingResult validateWorkitemData(ItemCollection config, ItemCollection workitem, WorkflowClient workflowClient) {
        String _FilenamePattern;
        boolean debug = logger.isLoggable(Level.FINE);
        int qualityResult = -1;
        MLTrainingResult trainingResult = null;
        Pattern mlFilenamePattern = null;
        logger.info("=======================START ======================================");
        logger.info("...validate training data for: " + workitem.getUniqueID());
        String model = config.getItemValueString("ml.training.model");
        String mlOCR = config.getItemValueString("ml.training.ocr");
        List trainingItemNames = config.getItemValue("workflow.entities");
        List tikaOptions = config.getItemValue("tika.options");
        String qualityLevel = config.getItemValueString("ml.training.quality");
        if (qualityLevel.isEmpty()) {
            qualityLevel = "LOW";
        }
        if ((_FilenamePattern = config.getItemValueString("ml.training.filepattern")) != null && !_FilenamePattern.isEmpty()) {
            logger.fine("......apply filename.pattern=" + _FilenamePattern);
            mlFilenamePattern = Pattern.compile(_FilenamePattern);
        }
        List sLocales = config.getItemValue("workflow.locale");
        ArrayList<Locale> locals = new ArrayList<Locale>();
        for (String _locale : sLocales) {
            Locale aLocale = null;
            if (_locale.contains("_")) {
                String[] localParts = _locale.split("_");
                if (localParts.length >= 1) {
                    aLocale = new Locale(localParts[0], localParts[1]);
                } else {
                    logger.warning("Wrong Locale Configuration: " + _locale);
                }
            } else {
                aLocale = new Locale(_locale);
            }
            if (aLocale == null) continue;
            locals.add(aLocale);
            if (!debug) continue;
            logger.finest("......suporting locale " + aLocale);
        }
        try {
            String ocrText;
            if ("true".equalsIgnoreCase(mlOCR)) {
                workitem = this.doVerifyOCRContent(workitem, mlFilenamePattern, workflowClient, tikaOptions);
            }
            if ((ocrText = new MLContentBuilder(workitem, null, false, mlFilenamePattern).build()) == null || ocrText.isEmpty()) {
                logger.severe("...document '" + workitem.getUniqueID() + "' No text found!");
                logger.info("=======================FINISHED====================================");
                return new MLTrainingResult(0, null);
            }
            logger.fine("extracted text content to be analysed=");
            logger.fine(ocrText);
            XMLTrainingData trainingData = this.generateTraingDataSet(ocrText, workitem, trainingItemNames, locals);
            qualityResult = trainingData.getQuality();
            String serviceEndpoint = config.getItemValueString("ml.training.endpoint");
            MLClient mlClient = new MLClient(serviceEndpoint);
            String resultData = mlClient.postValidateData(trainingData, model);
            trainingResult = new MLTrainingResult(qualityResult, resultData);
        }
        catch (RestAPIException | PluginException e1) {
            logger.severe("Error parsing documents: " + e1.getMessage());
        }
        logger.info("=======================FINISHED====================================");
        return trainingResult;
    }

    public void printXML(XMLTrainingData trainingData) {
        try {
            JAXBContext context = JAXBContext.newInstance((Class[])new Class[]{XMLTrainingData.class});
            Marshaller marshaller = context.createMarshaller();
            marshaller.setProperty("jaxb.formatted.output", (Object)true);
            StringWriter out = new StringWriter();
            marshaller.marshal((Object)trainingData, (Writer)out);
            String xml = out.toString();
            logger.info(xml);
        }
        catch (JAXBException e) {
            e.printStackTrace();
        }
    }

    public List<ItemCollection> getMLDefinitions(ItemCollection workitem) {
        ArrayList<ItemCollection> result = new ArrayList<ItemCollection>();
        if (!workitem.getItemValueString(ITEM_ML_DEFINITIONS).isEmpty()) {
            List mlDefinitions = workitem.getItemValue(ITEM_ML_DEFINITIONS);
            for (Map aDef : mlDefinitions) {
                result.add(new ItemCollection(aDef));
            }
        }
        return result;
    }

    private XMLTrainingData generateTraingDataSet(String ocrText, ItemCollection workitem, List<String> trainingItemNames, List<Locale> locals) {
        List<MLEntity> mlEntities = null;
        List mlDefinitionList = this.getMLDefinitions(workitem);
        if (mlDefinitionList != null && mlDefinitionList.size() > 0) {
            ItemCollection mlDefinition = (ItemCollection)mlDefinitionList.get(0);
            mlEntities = MLConfig.explodeMLEntityList((List)mlDefinition.getItemValue(ITEM_ML_ITEMS));
        }
        if (mlEntities == null || mlEntities.size() == 0) {
            logger.info("migrating to dummy mlEntity definition set from XML configuration!");
            mlEntities = new ArrayList();
            for (String aname : trainingItemNames) {
                if (ITEM_ML_DEFINITIONS.equals(aname)) continue;
                mlEntities.add(new MLEntity(aname, null, null, 0, false));
            }
        }
        return new TrainingDataBuilder(ocrText, workitem, mlEntities, locals).setAnalyzerEntityEvents(this.entityObjectEvents).build();
    }

    private ItemCollection doVerifyOCRContent(ItemCollection workitem, Pattern mlFilenamePattern, WorkflowClient workflowClient, List<String> tikaOptions) throws RestAPIException, PluginException {
        List files = workitem.getFileData();
        if (files == null || files.size() == 0) {
            return workitem;
        }
        for (FileData file : files) {
            ItemCollection metadata = new ItemCollection(file.getAttributes());
            String _text = metadata.getItemValueString(FILE_ATTRIBUTE_TEXT);
            if (_text.isEmpty()) continue;
            return workitem;
        }
        ItemCollection snapshot = null;
        String snapshotID = workitem.getItemValueString("$snapshotid");
        if (!snapshotID.isEmpty()) {
            snapshot = workflowClient.getDocument(snapshotID);
        }
        if (snapshot == null) {
            logger.warning("Unable to load snapshot for document " + workitem.getUniqueID());
            return workitem;
        }
        workitem = snapshot;
        files = workitem.getFileData();
        for (FileData fileData : files) {
            try {
                String ocrContent = this.tikaService.doORCProcessing(fileData, tikaOptions);
                ArrayList<String> list = new ArrayList<String>();
                list.add(ocrContent);
                fileData.setAttribute(FILE_ATTRIBUTE_TEXT, list);
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        return workitem;
    }
}

