/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.ocr;

import java.awt.Image;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import javax.imageio.ImageIO;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.ExecuteStreamHandler;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.SystemUtils;
import org.apache.tika.config.Field;
import org.apache.tika.config.Initializable;
import org.apache.tika.config.InitializableProblemHandler;
import org.apache.tika.config.Param;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.sax.OfflineContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.XMLReaderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class TesseractOCRParser
extends AbstractParser
implements Initializable {
    private static final Logger LOG = LoggerFactory.getLogger(TesseractOCRParser.class);
    private static volatile boolean HAS_WARNED = false;
    private static final Object[] LOCK = new Object[0];
    private static final long serialVersionUID = -8167538283213097265L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(MediaType.image((String)"png"), MediaType.image((String)"jpeg"), MediaType.image((String)"tiff"), MediaType.image((String)"bmp"), MediaType.image((String)"gif"), MediaType.image((String)"jp2"), MediaType.image((String)"jpx"), MediaType.image((String)"x-portable-pixmap"))));
    private final TesseractOCRConfig defaultConfig = new TesseractOCRConfig();
    private static Map<String, Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
    private static Map<String, Boolean> IMAGE_MAGICK_PRESENT = new HashMap<String, Boolean>();
    private static Parser _TMP_IMAGE_METADATA_PARSER = new CompositeImageParser();

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        TesseractOCRConfig config = (TesseractOCRConfig)context.get(TesseractOCRConfig.class, (Object)this.defaultConfig);
        if (this.hasTesseract(config)) {
            return SUPPORTED_TYPES;
        }
        return Collections.emptySet();
    }

    private void setEnv(TesseractOCRConfig config, ProcessBuilder pb) {
        String tessdataPrefix = "TESSDATA_PREFIX";
        Map<String, String> env = pb.environment();
        if (!config.getTessdataPath().isEmpty()) {
            env.put(tessdataPrefix, config.getTessdataPath());
        } else if (!config.getTesseractPath().isEmpty()) {
            env.put(tessdataPrefix, config.getTesseractPath());
        }
    }

    public boolean hasTesseract(TesseractOCRConfig config) {
        String tesseract = config.getTesseractPath() + TesseractOCRParser.getTesseractProg();
        if (TESSERACT_PRESENT.containsKey(tesseract)) {
            return TESSERACT_PRESENT.get(tesseract);
        }
        if (TESSERACT_PRESENT.size() > 100) {
            TESSERACT_PRESENT.clear();
        }
        if (!config.getTesseractPath().isEmpty() && !Files.isDirectory(Paths.get(config.getTesseractPath(), new String[0]), new LinkOption[0])) {
            TESSERACT_PRESENT.put(tesseract, false);
            return false;
        }
        String[] checkCmd = new String[]{tesseract};
        boolean hasTesseract = ExternalParser.check((String[])checkCmd, (int[])new int[0]);
        TESSERACT_PRESENT.put(tesseract, hasTesseract);
        return hasTesseract;
    }

    private boolean hasImageMagick(TesseractOCRConfig config) {
        String ImageMagick = this.getImageMagickPath(config);
        if (IMAGE_MAGICK_PRESENT.containsKey(ImageMagick)) {
            return IMAGE_MAGICK_PRESENT.get(ImageMagick);
        }
        if (IMAGE_MAGICK_PRESENT.size() > 100) {
            IMAGE_MAGICK_PRESENT.clear();
        }
        if (!config.getImageMagickPath().isEmpty() && !Files.isDirectory(Paths.get(config.getImageMagickPath(), new String[0]), new LinkOption[0])) {
            IMAGE_MAGICK_PRESENT.put(ImageMagick, false);
            return false;
        }
        if (SystemUtils.IS_OS_WINDOWS && config.getImageMagickPath().isEmpty()) {
            LOG.warn("Must specify path for imagemagick on Windows OS to avoid accidental confusion with convert.exe");
            IMAGE_MAGICK_PRESENT.put(ImageMagick, false);
            return false;
        }
        String[] checkCmd = new String[]{ImageMagick};
        boolean hasImageMagick = ExternalParser.check((String[])checkCmd, (int[])new int[0]);
        IMAGE_MAGICK_PRESENT.put(ImageMagick, hasImageMagick);
        return hasImageMagick;
    }

    private String getImageMagickPath(TesseractOCRConfig config) {
        return config.getImageMagickPath() + TesseractOCRParser.getImageMagickProg();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    static boolean hasPython() {
        boolean hasPython = false;
        TemporaryResources tmp = null;
        try {
            tmp = new TemporaryResources();
            File importCheck = tmp.createTemporaryFile();
            String prg = "import numpy, matplotlib, skimage, _tkinter";
            OutputStreamWriter out = new OutputStreamWriter((OutputStream)new FileOutputStream(importCheck), Charset.forName("UTF-8"));
            out.write(prg);
            out.close();
            Process p = Runtime.getRuntime().exec("python " + importCheck.getAbsolutePath());
            if (p.waitFor() == 0) {
                hasPython = true;
            }
        }
        catch (Exception exception) {
        }
        finally {
            IOUtils.closeQuietly((Closeable)tmp);
        }
        return hasPython;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        try {
            int w = image.getWidth(null);
            int h = image.getHeight(null);
            BufferedImage bImage = new BufferedImage(w, h, 1);
            File file = tmp.createTemporaryFile();
            try (FileOutputStream fos = new FileOutputStream(file);){
                ImageIO.write((RenderedImage)bImage, "png", fos);
            }
            try (TikaInputStream tis = TikaInputStream.get((File)file);){
                this.parse((InputStream)tis, handler, metadata, context);
            }
        }
        finally {
            tmp.dispose();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        TesseractOCRConfig config = (TesseractOCRConfig)parseContext.get(TesseractOCRConfig.class, (Object)this.defaultConfig);
        if (!this.hasTesseract(config)) {
            return;
        }
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tikaStream = TikaInputStream.get((InputStream)stream, (TemporaryResources)tmp);
            tikaStream.getPath();
            File tmpOCROutputFile = tmp.createTemporaryFile();
            _TMP_IMAGE_METADATA_PARSER.parse((InputStream)tikaStream, (ContentHandler)new DefaultHandler(), metadata, parseContext);
            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            this.parse(tikaStream, tmpOCROutputFile, parseContext, xhtml, config);
            xhtml.endDocument();
        }
        finally {
            tmp.dispose();
        }
    }

    public void parseInline(InputStream stream, XHTMLContentHandler xhtml, TesseractOCRConfig config) throws IOException, SAXException, TikaException {
        this.parseInline(stream, xhtml, new ParseContext(), config);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parseInline(InputStream stream, XHTMLContentHandler xhtml, ParseContext parseContext, TesseractOCRConfig config) throws IOException, SAXException, TikaException {
        if (!this.hasTesseract(config)) {
            return;
        }
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tikaStream = TikaInputStream.get((InputStream)stream, (TemporaryResources)tmp);
            File tmpImgFile = tmp.createTemporaryFile();
            this.parse(tikaStream, tmpImgFile, parseContext, xhtml, config);
        }
        finally {
            tmp.dispose();
        }
    }

    private void processImage(File scratchFile, TesseractOCRConfig config) throws IOException, TikaException {
        InputStream in = ((Object)((Object)this)).getClass().getResourceAsStream("rotation.py");
        TemporaryResources tmp = new TemporaryResources();
        File rotationScript = tmp.createTemporaryFile();
        Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
        CommandLine commandLine = new CommandLine("python");
        String[] args = new String[]{"-W", "ignore", rotationScript.getAbsolutePath(), "-f", scratchFile.getAbsolutePath()};
        commandLine.addArguments(args, true);
        String angle = "0";
        DefaultExecutor executor = new DefaultExecutor();
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        PumpStreamHandler streamHandler = new PumpStreamHandler((OutputStream)outputStream);
        executor.setStreamHandler((ExecuteStreamHandler)streamHandler);
        if (config.getApplyRotation() && TesseractOCRParser.hasPython()) {
            try {
                executor.execute(commandLine);
                String tmpAngle = outputStream.toString("UTF-8").trim();
                Double.parseDouble(tmpAngle);
                angle = tmpAngle;
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
        commandLine = new CommandLine(this.getImageMagickPath(config));
        args = new String[]{"-density", Integer.toString(config.getDensity()), "-depth ", Integer.toString(config.getDepth()), "-colorspace", config.getColorspace(), "-filter", config.getFilter(), "-resize", config.getResize() + "%", "-rotate", angle, scratchFile.getAbsolutePath(), scratchFile.getAbsolutePath()};
        commandLine.addArguments(args, true);
        try {
            executor.execute(commandLine);
        }
        catch (Exception exception) {
            // empty catch block
        }
        tmp.close();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void parse(TikaInputStream tikaInputStream, File tmpOCROutputFile, ParseContext parseContext, XHTMLContentHandler xhtml, TesseractOCRConfig config) throws IOException, SAXException, TikaException {
        block17: {
            File tmpTxtOutput = null;
            try {
                File input = tikaInputStream.getFile();
                long size = tikaInputStream.getLength();
                if (size < config.getMinFileSizeToOcr() || size > config.getMaxFileSizeToOcr()) break block17;
                if (config.isEnableImageProcessing() == 1 && this.hasImageMagick(config)) {
                    TemporaryResources tmp = new TemporaryResources();
                    try {
                        File tmpFile = tmp.createTemporaryFile();
                        FileUtils.copyFile((File)input, (File)tmpFile);
                        this.processImage(tmpFile, config);
                        this.doOCR(tmpFile, tmpOCROutputFile, config);
                    }
                    finally {
                        if (tmp != null) {
                            tmp.dispose();
                        }
                    }
                } else {
                    this.doOCR(input, tmpOCROutputFile, config);
                }
                tmpTxtOutput = new File(tmpOCROutputFile.getAbsolutePath() + "." + config.getOutputType().toString().toLowerCase(Locale.US));
                if (!tmpTxtOutput.exists()) break block17;
                try (FileInputStream is = new FileInputStream(tmpTxtOutput);){
                    if (config.getOutputType().equals((Object)TesseractOCRConfig.OUTPUT_TYPE.HOCR)) {
                        this.extractHOCROutput(is, parseContext, xhtml);
                    } else {
                        this.extractOutput(is, xhtml);
                    }
                }
            }
            finally {
                if (tmpTxtOutput != null) {
                    tmpTxtOutput.delete();
                }
            }
        }
    }

    public void initialize(Map<String, Param> params) throws TikaConfigException {
    }

    public void checkInitialization(InitializableProblemHandler problemHandler) throws TikaConfigException {
        if (!this.hasWarned() && this.hasTesseract(this.defaultConfig)) {
            problemHandler.handleInitializableProblem(((Object)((Object)this)).getClass().getName(), "Tesseract OCR is installed and will be automatically applied to image files unless\nyou've excluded the TesseractOCRParser from the default parser.\nTesseract may dramatically slow down content extraction (TIKA-2359).\nAs of Tika 1.15 (and prior versions), Tesseract is automatically called.\nIn future versions of Tika, users may need to turn the TesseractOCRParser on via TikaConfig.");
            this.warn();
        }
    }

    private void doOCR(File input, File output, TesseractOCRConfig config) throws IOException, TikaException {
        ArrayList<String> cmd = new ArrayList<String>(Arrays.asList(config.getTesseractPath() + TesseractOCRParser.getTesseractProg(), input.getPath(), output.getPath(), "-l", config.getLanguage(), "--psm", config.getPageSegMode()));
        for (Map.Entry<String, String> entry : config.getOtherTesseractConfig().entrySet()) {
            cmd.add("-c");
            cmd.add(entry.getKey() + "=" + entry.getValue());
        }
        cmd.addAll(Arrays.asList("-c", "page_separator=" + config.getPageSeparator(), "-c", config.getPreserveInterwordSpacing() ? "preserve_interword_spaces=1" : "preserve_interword_spaces=0", config.getOutputType().name().toLowerCase(Locale.US)));
        LOG.debug("Tesseract command: " + String.join((CharSequence)" ", cmd));
        ProcessBuilder pb = new ProcessBuilder(cmd);
        this.setEnv(config, pb);
        final Process process = pb.start();
        process.getOutputStream().close();
        InputStream out = process.getInputStream();
        InputStream err = process.getErrorStream();
        this.logStream("OCR MSG", out, input);
        this.logStream("OCR ERROR", err, input);
        FutureTask<Integer> waitTask = new FutureTask<Integer>(new Callable<Integer>(){

            @Override
            public Integer call() throws Exception {
                return process.waitFor();
            }
        });
        Thread waitThread = new Thread(waitTask);
        waitThread.start();
        try {
            waitTask.get(config.getTimeout(), TimeUnit.SECONDS);
        }
        catch (InterruptedException e) {
            waitThread.interrupt();
            process.destroy();
            Thread.currentThread().interrupt();
            throw new TikaException("TesseractOCRParser interrupted", (Throwable)e);
        }
        catch (ExecutionException e) {
        }
        catch (TimeoutException e) {
            waitThread.interrupt();
            process.destroy();
            throw new TikaException("TesseractOCRParser timeout", (Throwable)e);
        }
    }

    private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
        xhtml.startElement("div", "class", "ocr");
        try (InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);){
            char[] buffer = new char[1024];
            int n = reader.read(buffer);
            while (n != -1) {
                if (n > 0) {
                    xhtml.characters(buffer, 0, n);
                }
                n = reader.read(buffer);
            }
        }
        xhtml.endElement("div");
    }

    private void extractHOCROutput(InputStream is, ParseContext parseContext, XHTMLContentHandler xhtml) throws TikaException, IOException, SAXException {
        if (parseContext == null) {
            parseContext = new ParseContext();
        }
        xhtml.startElement("div", "class", "ocr");
        XMLReaderUtils.parseSAX((InputStream)is, (DefaultHandler)new OfflineContentHandler((ContentHandler)new HOCRPassThroughHandler((ContentHandler)xhtml)), (ParseContext)parseContext);
        xhtml.endElement("div");
    }

    private void logStream(String logType, final InputStream stream, File file) {
        new Thread(){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            @Override
            public void run() {
                InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);
                StringBuilder out = new StringBuilder();
                char[] buffer = new char[1024];
                try {
                    int n = reader.read(buffer);
                    while (n != -1) {
                        out.append(buffer, 0, n);
                        n = reader.read(buffer);
                    }
                }
                catch (IOException iOException) {
                }
                finally {
                    IOUtils.closeQuietly((InputStream)stream);
                }
                LOG.debug("{}", (Object)out);
            }
        }.start();
    }

    static String getTesseractProg() {
        return System.getProperty("os.name").startsWith("Windows") ? "tesseract.exe" : "tesseract";
    }

    static String getImageMagickProg() {
        return System.getProperty("os.name").startsWith("Windows") ? "convert.exe" : "convert";
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected boolean hasWarned() {
        if (HAS_WARNED) {
            return true;
        }
        Object[] objectArray = LOCK;
        synchronized (LOCK) {
            if (HAS_WARNED) {
                // ** MonitorExit[var1_1] (shouldn't be in output)
                return true;
            }
            // ** MonitorExit[var1_1] (shouldn't be in output)
            return false;
        }
    }

    protected void warn() {
        HAS_WARNED = true;
    }

    @Field
    public void setTesseractPath(String tesseractPath) {
        this.defaultConfig.setTesseractPath(tesseractPath);
    }

    @Field
    public void setTessdataPath(String tessdataPath) {
        this.defaultConfig.setTessdataPath(tessdataPath);
    }

    @Field
    public void setLanguage(String language) {
        this.defaultConfig.setLanguage(language);
    }

    @Field
    public void setPageSegMode(String pageSegMode) {
        this.defaultConfig.setPageSegMode(pageSegMode);
    }

    @Field
    public void setMaxFileSizeToOcr(long maxFileSizeToOcr) {
        this.defaultConfig.setMaxFileSizeToOcr(maxFileSizeToOcr);
    }

    @Field
    public void setMinFileSizeToOcr(long minFileSizeToOcr) {
        this.defaultConfig.setMinFileSizeToOcr(minFileSizeToOcr);
    }

    @Field
    public void setTimeout(int timeout) {
        this.defaultConfig.setTimeout(timeout);
    }

    @Field
    public void setOutputType(String outputType) {
        this.defaultConfig.setOutputType(outputType);
    }

    @Field
    public void setPreserveInterwordSpacing(boolean preserveInterwordSpacing) {
        this.defaultConfig.setPreserveInterwordSpacing(preserveInterwordSpacing);
    }

    @Field
    public void setEnableImageProcessing(int enableImageProcessing) {
        this.defaultConfig.setEnableImageProcessing(enableImageProcessing);
    }

    @Field
    public void setImageMagickPath(String imageMagickPath) {
        this.defaultConfig.setImageMagickPath(imageMagickPath);
    }

    @Field
    public void setDensity(int density) {
        this.defaultConfig.setDensity(density);
    }

    @Field
    public void setDepth(int depth) {
        this.defaultConfig.setDepth(depth);
    }

    @Field
    public void setColorspace(String colorspace) {
        this.defaultConfig.setColorspace(colorspace);
    }

    @Field
    public void setFilter(String filter) {
        this.defaultConfig.setFilter(filter);
    }

    @Field
    public void setResize(int resize) {
        this.defaultConfig.setResize(resize);
    }

    @Field
    public void setApplyRotation(boolean applyRotation) {
        this.defaultConfig.setApplyRotation(applyRotation);
    }

    public TesseractOCRConfig getDefaultConfig() {
        return this.defaultConfig;
    }

    private static class HOCRPassThroughHandler
    extends DefaultHandler {
        private final ContentHandler xhtml;
        public static final Set<String> IGNORE = HOCRPassThroughHandler.unmodifiableSet("html", "head", "title", "meta", "body");

        public HOCRPassThroughHandler(ContentHandler xhtml) {
            this.xhtml = xhtml;
        }

        @Override
        public void startElement(String uri, String local, String name, Attributes attributes) throws SAXException {
            if (!IGNORE.contains(name)) {
                this.xhtml.startElement(uri, local, name, attributes);
            }
        }

        @Override
        public void endElement(String uri, String local, String name) throws SAXException {
            if (!IGNORE.contains(name)) {
                this.xhtml.endElement(uri, local, name);
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            this.xhtml.characters(ch, start, length);
        }

        private static Set<String> unmodifiableSet(String ... elements) {
            return Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(elements)));
        }
    }

    private static class CompositeImageParser
    extends CompositeParser {
        private static final long serialVersionUID = -2398203346206381382L;
        private static List<Parser> imageParsers = Arrays.asList(new Parser[]{new ImageParser(), new JpegParser(), new TiffParser()});

        CompositeImageParser() {
            super(new MediaTypeRegistry(), imageParsers);
        }
    }
}

