/*
 * Decompiled with CFR 0.152.
 */
package org.dspace.app.mediafilter;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.commons.lang.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.util.IOUtils;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.dspace.app.mediafilter.MediaFilter;
import org.dspace.content.Item;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaTextExtractionFilter
extends MediaFilter {
    private static final Logger log = LogManager.getLogger();
    private static final int DEFAULT_MAX_CHARS = 100000;
    private static final int DEFAULT_MAX_ARRAY = 1000000;

    @Override
    public String getFilteredName(String oldFilename) {
        return oldFilename + ".txt";
    }

    @Override
    public String getBundleName() {
        return "TEXT";
    }

    @Override
    public String getFormatString() {
        return "Text";
    }

    @Override
    public String getDescription() {
        return "Extracted text";
    }

    @Override
    public InputStream getDestinationStream(Item currentItem, InputStream source, boolean verbose) throws Exception {
        String extractedText;
        ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
        boolean useTemporaryFile = configurationService.getBooleanProperty("textextractor.use-temp-file", false);
        if (useTemporaryFile) {
            return this.extractUsingTempFile(source, verbose);
        }
        int maxChars = configurationService.getIntProperty("textextractor.max-chars", 100000);
        int maxArray = configurationService.getIntProperty("textextractor.max-array", 1000000);
        IOUtils.setByteArrayMaxOverride((int)maxArray);
        try {
            Tika tika = new Tika();
            tika.setMaxStringLength(maxChars);
            extractedText = tika.parseToString(source);
        }
        catch (IOException e) {
            System.err.format("Unable to extract text from bitstream in Item %s%n", currentItem.getID().toString());
            e.printStackTrace(System.err);
            log.error("Unable to extract text from bitstream in Item {}", (Object)currentItem.getID().toString(), (Object)e);
            throw e;
        }
        catch (OutOfMemoryError oe) {
            System.err.format("OutOfMemoryError occurred when extracting text from bitstream in Item %s. You may wish to enable 'textextractor.use-temp-file'.%n", currentItem.getID().toString());
            oe.printStackTrace(System.err);
            log.error("OutOfMemoryError occurred when extracting text from bitstream in Item {}. You may wish to enable 'textextractor.use-temp-file'.", (Object)currentItem.getID().toString(), (Object)oe);
            throw oe;
        }
        if (StringUtils.isNotEmpty((String)extractedText)) {
            if (verbose) {
                System.out.println("(Verbose mode) Extracted text:");
                System.out.println(extractedText);
            }
            return new ByteArrayInputStream(extractedText.getBytes(StandardCharsets.UTF_8));
        }
        return null;
    }

    private InputStream extractUsingTempFile(InputStream source, boolean verbose) throws IOException, TikaException, SAXException {
        final File tempExtractedTextFile = File.createTempFile("dspacetextextract" + source.hashCode(), ".txt");
        if (verbose) {
            System.out.println("(Verbose mode) Extracted text was written to temporary file at " + tempExtractedTextFile.getAbsolutePath());
        } else {
            tempExtractedTextFile.deleteOnExit();
        }
        try (final FileWriter writer = new FileWriter(tempExtractedTextFile, StandardCharsets.UTF_8);){
            BodyContentHandler handler = new BodyContentHandler((ContentHandler)new ContentHandlerDecorator(){

                public void characters(char[] ch, int start, int length) throws SAXException {
                    try {
                        writer.append(new String(ch, start, length));
                    }
                    catch (IOException e) {
                        String errorMsg = String.format("Could not append to temporary file at %s when performing text extraction", tempExtractedTextFile.getAbsolutePath());
                        log.error(errorMsg, (Throwable)e);
                        throw new SAXException(errorMsg, e);
                    }
                }

                public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
                    try {
                        writer.append(new String(ch, start, length));
                    }
                    catch (IOException e) {
                        String errorMsg = String.format("Could not append to temporary file at %s when performing text extraction", tempExtractedTextFile.getAbsolutePath());
                        log.error(errorMsg, (Throwable)e);
                        throw new SAXException(errorMsg, e);
                    }
                }
            });
            ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
            int maxArray = configurationService.getIntProperty("textextractor.max-array", 1000000);
            IOUtils.setByteArrayMaxOverride((int)maxArray);
            AutoDetectParser parser = new AutoDetectParser();
            Metadata metadata = new Metadata();
            parser.parse(source, (ContentHandler)handler, metadata);
        }
        return new FileInputStream(tempExtractedTextFile);
    }
}

