/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.microsoft;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Set;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.CustomOutlookExtractor;
import org.apache.tika.parser.microsoft.ExcelExtractor;
import org.apache.tika.parser.microsoft.SummaryExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class CustomOfficeParser
implements Parser {
    private static final long serialVersionUID = 1L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(MediaType.application((String)"x-tika-msoffice"), MediaType.application((String)"vnd.visio"), MediaType.application((String)"vnd.ms-powerpoint"), MediaType.application((String)"vnd.ms-excel"), MediaType.application((String)"vnd.ms-excel.sheet.binary.macroenabled.12"), MediaType.application((String)"msword"), MediaType.application((String)"vnd.ms-outlook"))));

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        POIFSFileSystem filesystem = new POIFSFileSystem(stream);
        new SummaryExtractor(metadata).parseSummaries(filesystem);
        boolean outlookExtracted = false;
        Iterator entries = filesystem.getRoot().getEntries();
        while (entries.hasNext()) {
            PublisherTextExtractor extractor;
            Entry entry = (Entry)entries.next();
            String name = entry.getName();
            if (entry instanceof DirectoryEntry) {
                if (!"Quill".equals(name)) continue;
                this.setType(metadata, "application/x-mspublisher");
                extractor = new PublisherTextExtractor(filesystem);
                xhtml.element("p", extractor.getText());
                continue;
            }
            if (!(entry instanceof DocumentEntry)) continue;
            if ("WordDocument".equals(name)) {
                this.setType(metadata, "application/msword");
                extractor = new WordExtractor(filesystem);
                this.addTextIfAny(xhtml, "header", extractor.getHeaderText());
                for (String paragraph : extractor.getParagraphText()) {
                    xhtml.element("p", paragraph);
                }
                for (String paragraph : extractor.getFootnoteText()) {
                    xhtml.element("p", paragraph);
                }
                for (String paragraph : extractor.getCommentsText()) {
                    xhtml.element("p", paragraph);
                }
                for (String paragraph : extractor.getEndnoteText()) {
                    xhtml.element("p", paragraph);
                }
                this.addTextIfAny(xhtml, "footer", extractor.getFooterText());
                continue;
            }
            if ("PowerPoint Document".equals(name)) {
                this.setType(metadata, "application/vnd.ms-powerpoint");
                extractor = new PowerPointExtractor(filesystem);
                xhtml.element("p", extractor.getText(true, true));
                continue;
            }
            if ("Workbook".equals(name)) {
                this.setType(metadata, "application/vnd.ms-excel");
                Locale locale = (Locale)context.get(Locale.class, (Object)Locale.getDefault());
                new ExcelExtractor(context).parse(filesystem, xhtml, locale);
                continue;
            }
            if ("VisioDocument".equals(name)) {
                this.setType(metadata, "application/vnd.visio");
                extractor = new VisioTextExtractor(filesystem);
                for (String text : extractor.getAllText()) {
                    xhtml.element("p", text);
                }
                continue;
            }
            if (outlookExtracted || !name.startsWith("__substg1.0_")) continue;
            outlookExtracted = true;
            this.setType(metadata, "application/vnd.ms-outlook");
            new CustomOutlookExtractor(filesystem, context).parse(xhtml, metadata);
        }
        xhtml.endDocument();
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException, SAXException, TikaException {
        this.parse(stream, handler, metadata, new ParseContext());
    }

    private void setType(Metadata metadata, String type) {
        metadata.set("Content-Type", type);
    }

    private void addTextIfAny(XHTMLContentHandler xhtml, String section, String text) throws SAXException {
        if (text != null && text.length() > 0) {
            xhtml.startElement("div", "class", section);
            xhtml.element("p", text);
            xhtml.endElement("div");
        }
    }
}

