/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.html;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.extractors.ArticleExtractor;
import de.l3s.boilerpipe.extractors.KeepEverythingExtractor;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.html.BoilerpipeContentHandler;
import org.apache.tika.parser.html.HtmlParser;
import org.ow2.weblab.core.extended.properties.PropertiesLoader;
import org.ow2.weblab.services.normaliser.tika.TikaExtractorService;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class CustomBoilerpipeHtmlParser
implements Parser {
    private static final Log logger = LogFactory.getLog(TikaExtractorService.class);
    private static final String HTML_PARSER_PROPERTIES_FILE_NAME = "html-parser.properties";
    private static final String BOILERPIPE_EXTRACTOR = "boilerpipeExtractor";
    private static final String BOILERPIPE_ARTICLE_EXTRACTOR = "Article";
    private static final String BOILERPIPE_DEFAULT_EXTRACTOR = "Default";
    private static final String BOILERPIPE_KEEP_EVERYTHING_EXTRACTOR = "KeepEverything";

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        try {
            Map props = PropertiesLoader.loadProperties((String)HTML_PARSER_PROPERTIES_FILE_NAME);
            if (((String)props.get(BOILERPIPE_EXTRACTOR)).equals(BOILERPIPE_DEFAULT_EXTRACTOR)) {
                this.parseUsingDefaultExtractor(stream, metadata, handler, context);
            } else if (((String)props.get(BOILERPIPE_EXTRACTOR)).equals(BOILERPIPE_ARTICLE_EXTRACTOR)) {
                this.parseUsingArticleExtractor(stream, metadata, handler, context);
            } else if (((String)props.get(BOILERPIPE_EXTRACTOR)).equals(BOILERPIPE_KEEP_EVERYTHING_EXTRACTOR)) {
                this.parseUsingKeepEverythingExtractor(stream, metadata, handler, context);
            } else {
                this.parseUsingTikaParser(stream, metadata, handler, context);
            }
        }
        catch (Exception e) {
            logger.error((Object)"Document could not be parsed.");
            throw new TikaException("Failed to parse HTML document", (Throwable)e);
        }
    }

    private void parseUsingKeepEverythingExtractor(InputStream stream, Metadata metadata, ContentHandler handler, ParseContext context) throws IOException, SAXException, TikaException {
        HtmlParser parser = new HtmlParser();
        logger.info((Object)"Using boilerpipe extractor filter : KeepEverything");
        try {
            BoilerpipeContentHandler boilerpipeContentHandler = new BoilerpipeContentHandler(handler, (BoilerpipeExtractor)KeepEverythingExtractor.INSTANCE);
            boilerpipeContentHandler.setIncludeMarkup(true);
            parser.parse(stream, (ContentHandler)boilerpipeContentHandler, metadata, context);
        }
        catch (Exception e) {
            logger.error((Object)"Extractor failed to extract content... Try to use another extractor.");
            this.parseUsingTikaParser(stream, metadata, handler, context);
        }
    }

    private void parseUsingArticleExtractor(InputStream stream, Metadata metadata, ContentHandler handler, ParseContext context) throws IOException, SAXException, TikaException {
        HtmlParser parser = new HtmlParser();
        logger.info((Object)"Using boilerpipe extractor filter : Article");
        try {
            BoilerpipeContentHandler boilerpipeContentHandler = new BoilerpipeContentHandler(handler, (BoilerpipeExtractor)ArticleExtractor.INSTANCE);
            boilerpipeContentHandler.setIncludeMarkup(true);
            parser.parse(stream, (ContentHandler)boilerpipeContentHandler, metadata, context);
        }
        catch (Exception e) {
            logger.error((Object)"Extractor failed to extract content... Try to use another extractor.");
            this.parseUsingDefaultExtractor(stream, metadata, handler, context);
        }
    }

    private void parseUsingDefaultExtractor(InputStream stream, Metadata metadata, ContentHandler handler, ParseContext context) throws IOException, SAXException, TikaException {
        HtmlParser parser = new HtmlParser();
        logger.info((Object)"Using boilerpipe extractor filter : Default");
        try {
            BoilerpipeContentHandler boilerpipeContentHandler = new BoilerpipeContentHandler(handler);
            boilerpipeContentHandler.setIncludeMarkup(true);
            parser.parse(stream, (ContentHandler)boilerpipeContentHandler, metadata, context);
        }
        catch (Exception e) {
            logger.error((Object)"Extractor failed to extract content... Try to use another extractor.");
            this.parseUsingKeepEverythingExtractor(stream, metadata, handler, context);
        }
    }

    private void parseUsingTikaParser(InputStream stream, Metadata metadata, ContentHandler handler, ParseContext context) throws IOException, SAXException, TikaException {
        HtmlParser parser = new HtmlParser();
        logger.info((Object)"Using HTML Tika parser.");
        parser.parse(stream, handler, metadata, context);
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException, SAXException, TikaException {
        this.parse(stream, handler, metadata, new ParseContext());
    }

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return null;
    }
}

