/*
 * Decompiled with CFR 0.152.
 */
package org.imixs.ml.service;

import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ejb.Stateless;
import javax.enterprise.event.Event;
import javax.inject.Inject;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import org.imixs.archive.ocr.OCRService;
import org.imixs.melman.RestAPIException;
import org.imixs.melman.WorkflowClient;
import org.imixs.ml.core.MLClient;
import org.imixs.ml.events.EntityObjectEvent;
import org.imixs.ml.training.TrainingDataBuilder;
import org.imixs.ml.xml.XMLTrainingData;
import org.imixs.ml.xml.XMLTrainingEntity;
import org.imixs.workflow.FileData;
import org.imixs.workflow.ItemCollection;
import org.imixs.workflow.exceptions.PluginException;

@Stateless
public class TrainingService {
    private static Logger logger = Logger.getLogger(TrainingService.class.getName());
    @Inject
    OCRService ocrService;
    @Inject
    protected Event<EntityObjectEvent> entityObjectEvents;

    public int trainWorkitemData(ItemCollection config, ItemCollection workitem, WorkflowClient workflowClient) {
        boolean debug = logger.isLoggable(Level.FINE);
        int qualityResult = -1;
        logger.info("......create new training data for: " + workitem.getUniqueID());
        List trainingItemNames = config.getItemValue("workflow.entities");
        List tikaOptions = config.getItemValue("tika.options");
        String ocrMode = config.getItemValueString("tika.ocrmode");
        String qualityLevel = config.getItemValueString("ml.training.quality");
        if (qualityLevel.isEmpty()) {
            qualityLevel = "FULL";
        }
        List sLocales = config.getItemValue("workflow.locale");
        HashSet<Locale> locals = new HashSet<Locale>();
        for (String _locale : sLocales) {
            Locale aLocale = new Locale(_locale);
            locals.add(aLocale);
            if (!debug) continue;
            logger.finest("......suporting locale " + aLocale);
        }
        ItemCollection snapshot = null;
        try {
            String snapshotID = workitem.getItemValueString("$snapshotid");
            if (!snapshotID.isEmpty()) {
                snapshot = workflowClient.getDocument(snapshotID);
            }
            if (snapshot == null) {
                logger.warning("Unable to load snapshot for document " + workitem.getUniqueID());
                return 0;
            }
            this.ocrService.extractText(snapshot, null, ocrMode, tikaOptions);
            List files = snapshot.getFileData();
            if (files != null && files.size() > 0) {
                for (FileData file : files) {
                    ItemCollection metadata;
                    String content;
                    if (debug) {
                        logger.fine("...analyzing content of '" + file.getName() + "'.....");
                    }
                    if (!(content = (metadata = new ItemCollection(file.getAttributes())).getItemValueString("text")).isEmpty()) {
                        if (debug) {
                            logger.fine("extracted text content to be analysed=");
                            logger.fine(content);
                        }
                        XMLTrainingData trainingData = new TrainingDataBuilder(content, workitem, trainingItemNames, locals).setAnalyzerEntityEvents(this.entityObjectEvents).build();
                        ArrayList<String> entitysFound = new ArrayList<String>();
                        for (XMLTrainingEntity trainingEntity : trainingData.getEntities()) {
                            if (entitysFound.contains(trainingEntity.getLabel())) continue;
                            entitysFound.add(trainingEntity.getLabel());
                        }
                        if (0 == trainingData.getQuality()) {
                            logger.severe("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=BAD - document will be ignored!");
                            qualityResult = 0;
                            continue;
                        }
                        if (2 == trainingData.getQuality() && "FULL".equalsIgnoreCase(qualityLevel)) {
                            logger.severe("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=PARTIAL but FULL is required - document will be ignored!");
                            qualityResult = 0;
                            continue;
                        }
                        if (2 == trainingData.getQuality()) {
                            logger.warning("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=PARTIAL ...");
                            qualityResult = 2;
                        }
                        if (1 == trainingData.getQuality()) {
                            logger.info("...document '" + workitem.getUniqueID() + "' TRAININGDATA_QUALITY_LEVEL=FULL ...");
                            qualityResult = 1;
                        }
                        if (debug) {
                            this.printXML(trainingData);
                        }
                        String serviceEndpoint = config.getItemValueString("ml.training.endpoint");
                        MLClient mlClient = new MLClient();
                        mlClient.postTrainingData(trainingData, serviceEndpoint);
                        continue;
                    }
                    logger.severe("......no content found in '" + file.getName() + "' (" + workitem.getUniqueID() + ")");
                    qualityResult = 0;
                }
            } else {
                logger.severe("......no files found for " + workitem.getUniqueID());
                qualityResult = 0;
            }
        }
        catch (RestAPIException | PluginException e1) {
            logger.severe("Error parsing documents: " + e1.getMessage());
        }
        return qualityResult;
    }

    public void testWorkitemData(ItemCollection config, ItemCollection doc, WorkflowClient workflowClient) {
        boolean debug = logger.isLoggable(Level.FINE);
        logger.info("......anaysing: " + doc.getUniqueID());
        List tikaOptions = config.getItemValue("tika.options");
        String ocrMode = config.getItemValueString("tika.ocrmode");
        ItemCollection snapshot = null;
        try {
            String snapshotID = doc.getItemValueString("$snapshotid");
            if (!snapshotID.isEmpty()) {
                snapshot = workflowClient.getDocument(snapshotID);
            }
            if (snapshot == null) {
                logger.warning("Unable to load snapshot for document " + doc.getUniqueID());
                return;
            }
            this.ocrService.extractText(snapshot, null, ocrMode, tikaOptions);
            List files = snapshot.getFileData();
            if (files != null && files.size() > 0) {
                for (FileData file : files) {
                    if (debug) {
                        logger.fine("...analyzing content of '" + file.getName() + "'.....");
                    }
                    ItemCollection metadata = new ItemCollection(file.getAttributes());
                    String content = metadata.getItemValueString("text");
                    String serviceEndpoint = config.getItemValueString("ml.analyse.endpoint");
                    MLClient mlClient = new MLClient();
                    mlClient.postAnalyseData(content, serviceEndpoint);
                }
            } else {
                logger.severe("......no files found for " + doc.getUniqueID());
            }
        }
        catch (RestAPIException | PluginException e1) {
            logger.severe("Error parsing documents: " + e1.getMessage());
        }
    }

    public void printXML(XMLTrainingData trainingData) {
        try {
            JAXBContext context = JAXBContext.newInstance((Class[])new Class[]{XMLTrainingData.class});
            Marshaller marshaller = context.createMarshaller();
            marshaller.setProperty("jaxb.formatted.output", (Object)true);
            StringWriter out = new StringWriter();
            marshaller.marshal((Object)trainingData, (Writer)out);
            String xml = out.toString();
            logger.info(xml);
        }
        catch (JAXBException e) {
            e.printStackTrace();
        }
    }
}

