/*
 * Decompiled with CFR 0.152.
 */
package org.imixs.ml.service;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.ejb.Stateless;
import javax.inject.Inject;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import org.imixs.workflow.FileData;
import org.imixs.workflow.ItemCollection;
import org.imixs.workflow.exceptions.PluginException;

@Stateless
public class TikaHelperService {
    public static final String DEFAULT_ENCODING = "UTF-8";
    public static final String PLUGIN_ERROR = "PLUGIN_ERROR";
    public static final String ENV_OCR_SERVICE_ENDPOINT = "ocr.service.endpoint";
    public static final String ENV_OCR_SERVICE_MODE = "ocr.service.mode";
    public static final String ENV_OCR_STRATEGY = "ocr.strategy";
    public static final String OCR_STRATEGY_NO_OCR = "NO_OCR";
    public static final String OCR_STRATEGY_OCR_AND_TEXT_EXTRACTION = "OCR_AND_TEXT_EXTRACTION";
    public static final String OCR_STRATEGY_OCR_ONLY = "OCR_ONLY";
    public static final String OCR_STRATEGY_AUTO = "AUTO";
    private static Logger logger = Logger.getLogger(TikaHelperService.class.getName());
    @Inject
    @ConfigProperty(name="ocr.service.endpoint")
    Optional<String> serviceEndpoint;
    @Inject
    @ConfigProperty(name="ocr.strategy", defaultValue="AUTO")
    String ocrStategy;

    public String extractText(ItemCollection snapshot, Pattern mlFilenamePattern, String _ocrStategy, List<String> options) throws PluginException {
        boolean debug = logger.isLoggable(Level.FINE);
        String result = "";
        if (options == null) {
            options = new ArrayList<String>();
        }
        if (_ocrStategy != null) {
            this.ocrStategy = _ocrStategy;
        }
        if ("AUTO, NO_OCR, OCR_ONLY, OCR_AND_TEXT_EXTRACTION".indexOf(this.ocrStategy) == -1) {
            throw new PluginException(TikaHelperService.class.getSimpleName(), PLUGIN_ERROR, "Invalid TIKA_OCR_MODE - expected one of the following options: NO_OCR | OCR_ONLY | OCR_AND_TEXT_EXTRACTION");
        }
        boolean hasPDFOcrStrategy = options.stream().anyMatch(s -> s.toLowerCase().startsWith("X-Tika-PDFOcrStrategy=".toLowerCase()));
        if (!hasPDFOcrStrategy) {
            options.add("X-Tika-PDFOcrStrategy=" + this.ocrStategy);
        }
        if (debug) {
            for (String opt : options) {
                logger.info("......  Tika Option = " + opt);
            }
        }
        long l = System.currentTimeMillis();
        List files = snapshot.getFileData();
        for (FileData fileData : files) {
            if (mlFilenamePattern != null && !mlFilenamePattern.matcher(fileData.getName()).find()) continue;
            String textContent = null;
            try {
                if (debug) {
                    logger.fine("...text extraction '" + fileData.getName() + "'...");
                }
                if ((textContent = this.doORCProcessing(fileData, options)) == null || textContent.isEmpty()) {
                    logger.warning("Unable to extract text-content for '" + fileData.getName() + "'");
                    textContent = "";
                    continue;
                }
                result = result + textContent + " ";
            }
            catch (IOException e) {
                throw new PluginException(TikaHelperService.class.getSimpleName(), PLUGIN_ERROR, "Unable to scan attached document '" + fileData.getName() + "'", (Exception)e);
            }
        }
        if (debug) {
            logger.fine("...extracted textual information in " + (System.currentTimeMillis() - l) + "ms");
        }
        return result;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public String doORCProcessing(FileData fileData, List<String> options) throws IOException {
        boolean debug = logger.isLoggable(Level.FINE);
        if (!this.serviceEndpoint.isPresent() || ((String)this.serviceEndpoint.get()).isEmpty()) {
            logger.severe("OCR_SERVICE_ENDPOINT is missing - OCR processing not supported without a valid tika server endpoint!");
            return null;
        }
        logger.fine("...ocr scanning....");
        String contentType = this.adaptContentType(fileData);
        if (!this.acceptContentType(contentType)) {
            logger.fine("contentType '" + contentType + " is not supported by Tika Server");
            return null;
        }
        PrintWriter printWriter = null;
        HttpURLConnection urlConnection = null;
        PrintWriter writer = null;
        try {
            urlConnection = (HttpURLConnection)new URL((String)this.serviceEndpoint.get()).openConnection();
            urlConnection.setRequestMethod("PUT");
            urlConnection.setDoOutput(true);
            urlConnection.setDoInput(true);
            urlConnection.setAllowUserInteraction(false);
            urlConnection.setRequestProperty("Content-Type", contentType + "; charset=" + DEFAULT_ENCODING);
            urlConnection.setRequestProperty("Accept", "text/plain");
            if (options != null && options.size() > 0) {
                for (String option : options) {
                    int i = option.indexOf("=");
                    if (i > -1) {
                        String key = option.substring(0, i);
                        String value = option.substring(i + 1);
                        if (key.startsWith("X-Tika")) {
                            urlConnection.setRequestProperty(key, value);
                            continue;
                        }
                        logger.warning("Invalid tika option : '" + option + "'  key must start with 'X-Tika'");
                        continue;
                    }
                    logger.warning("Invalid tika option : '" + option + "'  character '=' expeced!");
                }
            }
            urlConnection.setRequestProperty("Content-Length", "" + Integer.valueOf(fileData.getContent().length));
            OutputStream output = urlConnection.getOutputStream();
            writer = new PrintWriter((Writer)new OutputStreamWriter(output, DEFAULT_ENCODING), true);
            output.write(fileData.getContent());
            writer.flush();
            int resposeCode = urlConnection.getResponseCode();
            if (resposeCode >= 200 && resposeCode <= 299) {
                String string = this.readResponse((URLConnection)urlConnection, DEFAULT_ENCODING);
                return string;
            }
            String string = null;
            return string;
        }
        finally {
            if (printWriter != null) {
                printWriter.close();
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private String readResponse(URLConnection urlConnection, String encoding) throws IOException {
        boolean debug = logger.isLoggable(Level.FINE);
        if (debug) {
            logger.finest("......readResponse....");
        }
        StringWriter writer = new StringWriter();
        try (BufferedReader in = null;){
            String inputLine;
            String sContentEncoding = urlConnection.getContentEncoding();
            if ((sContentEncoding == null || sContentEncoding.isEmpty()) && encoding != null && !encoding.isEmpty()) {
                sContentEncoding = encoding;
            }
            in = sContentEncoding != null && !sContentEncoding.isEmpty() ? new BufferedReader(new InputStreamReader(urlConnection.getInputStream(), sContentEncoding)) : new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
            while ((inputLine = in.readLine()) != null) {
                if (debug) {
                    logger.finest("......" + inputLine);
                }
                writer.write(inputLine + "\n");
            }
        }
        return writer.toString();
    }

    private boolean acceptContentType(String contentType) {
        if (contentType == null || contentType.isEmpty()) {
            return false;
        }
        return !"application/octet-stream".equalsIgnoreCase(contentType);
    }

    private String adaptContentType(FileData fileData) {
        String contentType = fileData.getContentType();
        if (contentType == null || contentType.isEmpty() || "*/*".equals(contentType)) {
            contentType = fileData.getName().toLowerCase().endsWith(".pdf") ? "application/pdf" : "application/xml";
        }
        return contentType;
    }
}

