/*
 * Decompiled with CFR 0.152.
 */
package org.imixs.archive.documents;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.ejb.EJB;
import javax.xml.bind.JAXBException;
import javax.xml.transform.TransformerException;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.imixs.workflow.FileData;
import org.imixs.workflow.ItemCollection;
import org.imixs.workflow.engine.ReportService;
import org.imixs.workflow.engine.plugins.AbstractPlugin;
import org.imixs.workflow.exceptions.PluginException;
import org.imixs.workflow.util.XMLParser;
import org.imixs.workflow.xml.XMLDocumentAdapter;
import org.imixs.workflow.xml.XSLHandler;

public class PDFXMLExtractorPlugin
extends AbstractPlugin {
    public static final String PDFXMLEXTRACTOR = "PDFXMLExtractor";
    public static final String PARSING_EXCEPTION = "PARSING_EXCEPTION";
    public static final String PLUGIN_ERROR = "PLUGIN_ERROR";
    public static final String REPORT_ERROR = "REPORT_ERROR";
    public static final String FILE_PATTERN_PDF = ".[pP][dD][fF]";
    public static final String FILE_PATTERN_XML = ".[xX][mM][lL]";
    @EJB
    ReportService reportService;
    private static Logger logger = Logger.getLogger(PDFXMLExtractorPlugin.class.getName());

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public ItemCollection run(ItemCollection document, ItemCollection event) throws PluginException {
        byte[] xmlData = null;
        ItemCollection evalItemCollection = this.getWorkflowService().evalWorkflowResult(event, document, false);
        if (evalItemCollection == null) {
            return document;
        }
        String processValue = evalItemCollection.getItemValueString(PDFXMLEXTRACTOR);
        if (!processValue.isEmpty()) {
            ItemCollection processData = XMLParser.parseItemStructure((String)processValue);
            String reportName = processData.getItemValueString("report");
            String file_pattern = processData.getItemValueString("filename");
            xmlData = PDFXMLExtractorPlugin.getXMLFile(document, file_pattern);
            if (xmlData != null) {
                logger.info("...do something with the xml file.." + reportName);
                ItemCollection report = this.reportService.findReport(reportName);
                if (report == null) {
                    throw new PluginException(PDFXMLExtractorPlugin.class.getSimpleName(), REPORT_ERROR, "unable to load report '" + reportName + "'. Please check  model configuration");
                }
                String xsl = report.getItemValueString("XSL").trim();
                String encoding = report.getItemValueString("encoding");
                if ("".equals(encoding)) {
                    encoding = "UTF-8";
                }
                byte[] byteData = null;
                ItemCollection resultItemCol = null;
                ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
                try {
                    String xml = new String(xmlData);
                    XSLHandler.transform((String)new String(xml), (String)xsl, (String)encoding, (OutputStream)outputStream);
                    byteData = outputStream.toByteArray();
                    resultItemCol = XMLDocumentAdapter.readItemCollection((byte[])byteData);
                }
                catch (IOException | JAXBException | TransformerException e) {
                    e.printStackTrace();
                }
                finally {
                    try {
                        outputStream.close();
                    }
                    catch (IOException e) {
                        e.printStackTrace();
                    }
                }
                if (resultItemCol != null) {
                    document.replaceAllItems(resultItemCol.getAllItems());
                }
            }
        }
        return document;
    }

    public static byte[] getXMLFile(ItemCollection document, String file_pattern) throws PluginException {
        List filenames = document.getFileNames();
        for (String filename : filenames) {
            if (!Pattern.compile(file_pattern).matcher(filename).find()) continue;
            logger.info("...extract embedded XML from '" + filename + "'");
            FileData pdfFileData = document.getFileData(filename);
            return PDFXMLExtractorPlugin.getFirstEmbeddedXML(pdfFileData.getContent());
        }
        return null;
    }

    public static byte[] streamToByteArray(InputStream ins) throws IOException {
        int len;
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        byte[] byteBuffer = new byte[1024];
        while ((len = ins.read(byteBuffer)) > -1) {
            baos.write(byteBuffer, 0, len);
        }
        baos.flush();
        return baos.toByteArray();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static byte[] getFirstEmbeddedXML(byte[] content) {
        PDDocument doc = null;
        byte[] result = null;
        try {
            doc = PDDocument.load((byte[])content);
            PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(doc.getDocumentCatalog());
            PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
            if (efTree != null) {
                Map names = efTree.getNames();
                if (names != null) {
                    result = PDFXMLExtractorPlugin.extractFirstXMLFile(names);
                } else {
                    List kids = efTree.getKids();
                    for (PDNameTreeNode node : kids) {
                        names = node.getNames();
                        result = PDFXMLExtractorPlugin.extractFirstXMLFile(names);
                    }
                }
            }
        }
        catch (IOException e) {
            logger.warning("unable to load embedded xml : " + e.getMessage());
        }
        finally {
            if (doc != null) {
                try {
                    doc.close();
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return result;
    }

    private static byte[] extractFirstXMLFile(Map<String, PDComplexFileSpecification> names) throws IOException {
        for (Map.Entry<String, PDComplexFileSpecification> entry : names.entrySet()) {
            PDComplexFileSpecification fileSpec = entry.getValue();
            String filename = fileSpec.getFile();
            if (!Pattern.compile(FILE_PATTERN_XML).matcher(filename).find()) continue;
            PDEmbeddedFile embeddedFile = PDFXMLExtractorPlugin.getEmbeddedFile(fileSpec);
            COSInputStream inStream = embeddedFile.createInputStream();
            return PDFXMLExtractorPlugin.streamToByteArray((InputStream)inStream);
        }
        return null;
    }

    private static PDEmbeddedFile getEmbeddedFile(PDComplexFileSpecification fileSpec) {
        PDEmbeddedFile embeddedFile = null;
        if (fileSpec != null) {
            embeddedFile = fileSpec.getEmbeddedFileUnicode();
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileDos();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileMac();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileUnix();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFile();
            }
        }
        return embeddedFile;
    }
}

