/*
 * Decompiled with CFR 0.152.
 */
package org.ow2.weblab.service.normaliser.tika.handlers;

import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.core.extended.factory.MediaUnitFactory;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.service.normaliser.tika.TikaConfiguration;
import org.ow2.weblab.service.normaliser.tika.handlers.WebLabHandlerDecorator;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

public class SimpleTextContentHandler
extends WebLabHandlerDecorator {
    private static final List<String> NEWLINE_ELEMENTS = Arrays.asList("ol", "dt", "dl", "ul", "li", "br", "p", "div", "table", "tr");
    private static final List<String> TAB_ELEMENTS = Arrays.asList("td");
    private boolean isInBody = false;
    private Document document;
    private Text createdText;
    private final StringBuilder sb = new StringBuilder();
    private final Log logger = LogFactory.getLog(this.getClass());

    public SimpleTextContentHandler() {
        this.logger.debug((Object)"SimpleTextContentHandler initialised.");
    }

    public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
        super.startElement(uri, localName, name, atts);
        this.logger.trace((Object)("Start element: " + name));
        if ("body".equalsIgnoreCase(name)) {
            this.isInBody = true;
        } else if (this.isInBody && NEWLINE_ELEMENTS.contains(name)) {
            this.sb.append("\n");
        }
    }

    public void endElement(String uri, String localName, String name) throws SAXException {
        super.endElement(uri, localName, name);
        this.logger.trace((Object)("End element: " + name));
        if ("body".equalsIgnoreCase(name)) {
            this.createdText.setContent(this.sb.toString().trim());
            this.isInBody = false;
            if (this.createdText.getContent().trim().isEmpty()) {
                this.logger.warn((Object)("The MediaUnit " + this.createdText.getUri() + " will be removed since it is empty."));
                this.document.getMediaUnit().remove(this.createdText);
            }
        } else if (this.isInBody && NEWLINE_ELEMENTS.contains(name)) {
            this.sb.append("\n");
        } else if (this.isInBody && TAB_ELEMENTS.contains(name)) {
            this.sb.append("\t");
        } else if (this.isInBody) {
            this.sb.append(" ");
        }
    }

    public void characters(char[] ch, int start, int length) throws SAXException {
        String theContent;
        super.characters(ch, start, length);
        if (this.isInBody && !(theContent = new String(Arrays.copyOfRange(ch, start, start + length)).replaceAll("\\s+", " ")).isEmpty()) {
            this.sb.append(theContent);
        }
    }

    public void setDocument(Document document) {
        this.document = document;
        this.createdText = (Text)MediaUnitFactory.createAndLinkMediaUnit((Document)this.document, Text.class);
    }

    public void setTikaConfiguration(TikaConfiguration tikaConfiguration) {
    }
}

