/*
 * Decompiled with CFR 0.152.
 */
package org.imixs.ml.service;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.ejb.Stateless;
import javax.enterprise.event.Event;
import javax.inject.Inject;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import org.imixs.melman.RestAPIException;
import org.imixs.melman.WorkflowClient;
import org.imixs.ml.core.MLClient;
import org.imixs.ml.core.MLContentBuilder;
import org.imixs.ml.events.EntityObjectEvent;
import org.imixs.ml.service.TikaHelperService;
import org.imixs.ml.training.TrainingDataBuilder;
import org.imixs.ml.xml.XMLTrainingData;
import org.imixs.ml.xml.XMLTrainingEntity;
import org.imixs.workflow.FileData;
import org.imixs.workflow.ItemCollection;
import org.imixs.workflow.exceptions.PluginException;

@Stateless
public class TrainingService {
    private static Logger logger = Logger.getLogger(TrainingService.class.getName());
    public static final String FILE_ATTRIBUTE_TEXT = "text";
    @Inject
    TikaHelperService tikaService;
    @Inject
    protected Event<EntityObjectEvent> entityObjectEvents;

    public int trainWorkitemData(ItemCollection config, ItemCollection workitem, WorkflowClient workflowClient) {
        String _FilenamePattern;
        boolean debug = logger.isLoggable(Level.FINE);
        int qualityResult = -1;
        Pattern mlFilenamePattern = null;
        logger.info("...create new training data for: " + workitem.getUniqueID());
        String model = config.getItemValueString("ml.analyse.model");
        List trainingItemNames = config.getItemValue("workflow.entities");
        List tikaOptions = config.getItemValue("tika.options");
        String ocrMode = config.getItemValueString("tika.ocrmode");
        String qualityLevel = config.getItemValueString("ml.training.quality");
        if (qualityLevel.isEmpty()) {
            qualityLevel = "FULL";
        }
        if ((_FilenamePattern = config.getItemValueString("filename.pattern")) != null && !_FilenamePattern.isEmpty()) {
            logger.info("......apply filename.pattern=" + _FilenamePattern);
            mlFilenamePattern = Pattern.compile(_FilenamePattern);
        }
        List sLocales = config.getItemValue("workflow.locale");
        ArrayList<Locale> locals = new ArrayList<Locale>();
        for (String _locale : sLocales) {
            Locale aLocale = new Locale(_locale);
            locals.add(aLocale);
            if (!debug) continue;
            logger.finest("......suporting locale " + aLocale);
        }
        logger.info("......model=" + model);
        logger.info("......qualityLevel=" + qualityLevel);
        logger.info("......ocrMode=" + ocrMode);
        logger.info("......locales=" + Arrays.toString(sLocales.toArray()));
        try {
            workitem = this.doVerifyOCRContent(workitem, mlFilenamePattern, workflowClient, tikaOptions);
            String ocrText = new MLContentBuilder(workitem, null, false, mlFilenamePattern).build();
            if (ocrText == null || ocrText.isEmpty()) {
                return 0;
            }
            logger.fine("extracted text content to be analysed=");
            logger.fine(ocrText);
            XMLTrainingData trainingData = new TrainingDataBuilder(ocrText, workitem, trainingItemNames, locals).setAnalyzerEntityEvents(this.entityObjectEvents).build();
            ArrayList<String> entitysFound = new ArrayList<String>();
            for (XMLTrainingEntity trainingEntity : trainingData.getEntities()) {
                if (entitysFound.contains(trainingEntity.getLabel())) continue;
                entitysFound.add(trainingEntity.getLabel());
            }
            qualityResult = trainingData.getQuality();
            if (0 == trainingData.getQuality()) {
                if ("REDUCED".equalsIgnoreCase(qualityLevel)) {
                    logger.info("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=BAD but REDUCED is accepted - document will be trained...");
                    qualityResult = 2;
                } else {
                    logger.severe("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=BAD - document will be ignored!");
                }
            } else if (2 == trainingData.getQuality() && "FULL".equalsIgnoreCase(qualityLevel)) {
                logger.severe("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=PARTIAL but FULL is required - document will be ignored!");
                qualityResult = 0;
            } else {
                logger.info("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=" + qualityResult + "...");
            }
            if (qualityResult == 2 || qualityResult == 1) {
                if (debug) {
                    this.printXML(trainingData);
                }
                String serviceEndpoint = config.getItemValueString("ml.training.endpoint");
                MLClient mlClient = new MLClient(serviceEndpoint);
                mlClient.postTrainingData(trainingData, model);
            }
        }
        catch (RestAPIException | PluginException e1) {
            logger.severe("Error parsing documents: " + e1.getMessage());
        }
        return qualityResult;
    }

    public void testWorkitemData(ItemCollection config, ItemCollection doc, WorkflowClient workflowClient) {
        logger.info("......anaysing: " + doc.getUniqueID());
        Pattern mlFilenamePattern = null;
        List tikaOptions = config.getItemValue("tika.options");
        String serviceEndpoint = config.getItemValueString("ml.analyse.endpoint");
        String model = config.getItemValueString("ml.analyse.model");
        String _FilenamePattern = config.getItemValueString("filename.pattern");
        if (_FilenamePattern != null && !_FilenamePattern.isEmpty()) {
            logger.info("......apply filename.pattern=" + _FilenamePattern);
            mlFilenamePattern = Pattern.compile(_FilenamePattern);
        }
        try {
            doc = this.doVerifyOCRContent(doc, mlFilenamePattern, workflowClient, tikaOptions);
            String ocrText = new MLContentBuilder(doc, null, false, mlFilenamePattern).build();
            if (ocrText != null && !ocrText.isEmpty()) {
                MLClient mlClient = new MLClient(serviceEndpoint);
                mlClient.postAnalyseData(ocrText, model);
            }
        }
        catch (RestAPIException | PluginException e1) {
            logger.severe("Error parsing documents: " + e1.getMessage());
        }
    }

    public void printXML(XMLTrainingData trainingData) {
        try {
            JAXBContext context = JAXBContext.newInstance((Class[])new Class[]{XMLTrainingData.class});
            Marshaller marshaller = context.createMarshaller();
            marshaller.setProperty("jaxb.formatted.output", (Object)true);
            StringWriter out = new StringWriter();
            marshaller.marshal((Object)trainingData, (Writer)out);
            String xml = out.toString();
            logger.info(xml);
        }
        catch (JAXBException e) {
            e.printStackTrace();
        }
    }

    private ItemCollection doVerifyOCRContent(ItemCollection workitem, Pattern mlFilenamePattern, WorkflowClient workflowClient, List<String> tikaOptions) throws RestAPIException, PluginException {
        List files = workitem.getFileData();
        if (files == null || files.size() == 0) {
            return workitem;
        }
        for (FileData file : files) {
            ItemCollection metadata = new ItemCollection(file.getAttributes());
            String _text = metadata.getItemValueString(FILE_ATTRIBUTE_TEXT);
            if (_text.isEmpty()) continue;
            return workitem;
        }
        ItemCollection snapshot = null;
        String snapshotID = workitem.getItemValueString("$snapshotid");
        if (!snapshotID.isEmpty()) {
            snapshot = workflowClient.getDocument(snapshotID);
        }
        if (snapshot == null) {
            logger.warning("Unable to load snapshot for document " + workitem.getUniqueID());
            return workitem;
        }
        workitem = snapshot;
        files = workitem.getFileData();
        for (FileData fileData : files) {
            try {
                String ocrContent = this.tikaService.doORCProcessing(fileData, tikaOptions);
                ArrayList<String> list = new ArrayList<String>();
                list.add(ocrContent);
                fileData.setAttribute(FILE_ATTRIBUTE_TEXT, list);
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        return workitem;
    }
}

