/*
 * Decompiled with CFR 0.152.
 */
package org.ow2.weblab.service.normaliser.tika.handlers;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import javax.imageio.ImageIO;
import javax.xml.bind.DatatypeConverter;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.language.ProfilingWriter;
import org.ow2.weblab.content.api.ContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.factory.AnnotationFactory;
import org.ow2.weblab.core.extended.factory.MediaUnitFactory;
import org.ow2.weblab.core.helper.impl.JenaPoKHelper;
import org.ow2.weblab.core.model.Annotation;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Image;
import org.ow2.weblab.core.model.PieceOfKnowledge;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.service.normaliser.tika.TikaConfiguration;
import org.ow2.weblab.service.normaliser.tika.handlers.WebLabHandlerDecorator;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

@Deprecated
public class MediaUnitContentHandler
extends WebLabHandlerDecorator {
    private final Log logger = LogFactory.getLog(this.getClass());
    private static final int MIN_IMAGE_WIDTH = 100;
    private static final int MIN_IMAGE_HEIGHT = 100;
    private static final String UNKNOWN_LANGUAGE = "unk";
    private static String BASE_URI = "http://weblab.ow2.org/services/tika/";
    private Document document;
    private ProfilingWriter pWriter;
    private ProfilingWriter pTableWriter;
    private Text textInProcess = null;
    private Text tableInProcess = null;
    private String muHtmlElement = "";
    private TikaConfiguration configuration;
    private static List<String> NEW_LINE_ELEMENTS;
    private static List<String> TAB_LIST_ELEMENTS;
    private static List<String> TITLE_LIST_ELEMENTS;
    private static List<String> BLOC_LIST_ELEMENTS;

    private void openTextMediaUnit() {
        this.textInProcess = (Text)MediaUnitFactory.createAndLinkMediaUnit((Document)this.document, Text.class);
        this.textInProcess.setContent("");
        this.pWriter = new ProfilingWriter();
    }

    private void openTableMediaUnit() {
        this.tableInProcess = (Text)MediaUnitFactory.createAndLinkMediaUnit((Document)this.document, Text.class);
        this.tableInProcess.setContent("");
        this.pTableWriter = new ProfilingWriter();
    }

    private void closeTextMediaUnit() {
        String content = this.textInProcess.getContent().replaceAll("\t", "").replaceAll("\n", "").replaceAll("\r", "").trim();
        if (content.isEmpty() || content.length() < 4) {
            this.document.getMediaUnit().remove(this.textInProcess);
        } else if (this.pWriter.getLanguage().isReasonablyCertain() || content.length() > 200) {
            this.annotate((Resource)this.textInProcess, this.pWriter.getLanguage().getLanguage());
        } else {
            this.annotate((Resource)this.textInProcess, UNKNOWN_LANGUAGE);
        }
        this.textInProcess = null;
        this.pWriter = null;
    }

    private void closeTableMediaUnit() {
        String content = this.tableInProcess.getContent().replaceAll("\t", "").replaceAll("\n", "").replaceAll("\r", "").trim();
        if (content.isEmpty()) {
            this.document.getMediaUnit().remove(this.tableInProcess);
        } else if (this.pTableWriter.getLanguage().isReasonablyCertain() || content.length() > 200) {
            this.annotate((Resource)this.tableInProcess, this.pTableWriter.getLanguage().getLanguage());
        } else {
            this.annotate((Resource)this.tableInProcess, UNKNOWN_LANGUAGE);
        }
        this.tableInProcess = null;
        this.pTableWriter = null;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void addImageMediaUnit(Attributes atts) {
        block16: {
            String src = atts.getValue("src");
            String contentURI = "";
            BufferedImage image = null;
            if (src.endsWith(".jpg") || src.endsWith(".png") || src.endsWith(".bmp")) {
                try {
                    if (src.startsWith("http://")) {
                        URL url = new URL(src);
                        image = ImageIO.read(url);
                        contentURI = src;
                    }
                    if (image == null || image.getWidth() <= 100 || image.getHeight() <= 100) break block16;
                    Image ImageMU = (Image)MediaUnitFactory.createAndLinkMediaUnit((Document)this.document, Image.class);
                    ImageMU.setUri(ImageMU.getUri());
                    if (contentURI.isEmpty()) {
                        FileInputStream fis = new FileInputStream(new File(src));
                        ContentManager contentManager = ContentManager.getInstance();
                        try {
                            contentManager.writeNativeContent((InputStream)fis, (Resource)ImageMU);
                        }
                        catch (WebLabCheckedException wlce) {
                            this.logger.warn((Object)"Unable to copy image.", (Throwable)wlce);
                        }
                        finally {
                            IOUtils.closeQuietly((InputStream)fis);
                        }
                    }
                    for (int i = 0; i < atts.getLength(); ++i) {
                        if (!"alt".equals(atts.getLocalName(i)) && !"title".equals(atts.getLocalName(i))) continue;
                        Annotation muImageAnnot = AnnotationFactory.createAndLinkAnnotation((Resource)ImageMU);
                        JenaPoKHelper pokH = new JenaPoKHelper((PieceOfKnowledge)muImageAnnot);
                        if ("alt".equals(atts.getLocalName(i))) {
                            pokH.createLitStat(ImageMU.getUri(), BASE_URI + "alt", atts.getValue(i).replace("<br/>", ""));
                            break;
                        }
                        pokH.createLitStat(ImageMU.getUri(), "http://purl.org/dc/elements/1.1/title", atts.getValue(i).replace("<br/>", ""));
                        break;
                    }
                    try {
                        Annotation muImageAnnot = AnnotationFactory.createAndLinkAnnotation((Resource)ImageMU);
                        JenaPoKHelper pokH = new JenaPoKHelper((PieceOfKnowledge)muImageAnnot);
                        pokH.setAutoCommitMode(false);
                        pokH.createLitStat(ImageMU.getUri(), new URL(BASE_URI + "width").toURI().toString(), String.valueOf(image.getWidth()));
                        pokH.createLitStat(ImageMU.getUri(), new URL(BASE_URI + "height").toURI().toString(), String.valueOf(image.getHeight()));
                        pokH.commit();
                    }
                    catch (MalformedURLException murle) {
                        this.logger.warn((Object)("Malformed URL : " + BASE_URI + "width" + ")"), (Throwable)murle);
                    }
                    catch (URISyntaxException urise) {
                        this.logger.warn((Object)("Unable to transform the property 'width' into a predicate (" + BASE_URI + "width" + ")"), (Throwable)urise);
                    }
                }
                catch (MalformedURLException e) {
                    this.logger.info((Object)("Unable to read image. " + src + " can not be transformed to URI."));
                }
                catch (IOException e) {
                    this.logger.info((Object)("Unable to convert to image : " + src));
                }
            }
        }
    }

    public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
        super.startElement(uri, localName, name, atts);
        if (name.equals("body")) {
            this.muHtmlElement = name;
        }
        if (this.tableInProcess == null) {
            if (TITLE_LIST_ELEMENTS.contains(name)) {
                if (this.textInProcess == null) {
                    this.openTextMediaUnit();
                    this.muHtmlElement = name;
                } else if (this.muHtmlElement.equals(name) || this.muHtmlElement.equals("body")) {
                    this.closeTextMediaUnit();
                    this.muHtmlElement = name;
                    this.openTextMediaUnit();
                } else {
                    this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
                }
            }
            if (BLOC_LIST_ELEMENTS.contains(name)) {
                if (this.textInProcess == null) {
                    this.openTextMediaUnit();
                    this.muHtmlElement = name;
                } else if (TITLE_LIST_ELEMENTS.contains(this.muHtmlElement) || BLOC_LIST_ELEMENTS.contains(this.muHtmlElement)) {
                    this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
                } else {
                    this.closeTextMediaUnit();
                    this.muHtmlElement = name;
                    this.openTextMediaUnit();
                }
            }
            if (name.equals("table")) {
                this.openTableMediaUnit();
                if (this.textInProcess != null) {
                    this.closeTextMediaUnit();
                }
            }
        }
        if (name.equals("img")) {
            this.addImageMediaUnit(atts);
            if (this.tableInProcess == null && this.textInProcess != null) {
                if (BLOC_LIST_ELEMENTS.contains(this.muHtmlElement)) {
                    this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
                } else {
                    this.closeTextMediaUnit();
                }
            }
        }
        if (this.textInProcess != null) {
            if (NEW_LINE_ELEMENTS.contains(name)) {
                this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
            }
            if (TAB_LIST_ELEMENTS.contains(name)) {
                this.textInProcess.setContent(this.textInProcess.getContent() + "\t");
            }
            if (name.equals("a") || name.equals("span")) {
                this.textInProcess.setContent(this.textInProcess.getContent() + " ");
            }
        }
    }

    public void endElement(String uri, String localName, String name) throws SAXException {
        super.endElement(uri, localName, name);
        if (this.tableInProcess != null) {
            if (name.equals("tr")) {
                this.tableInProcess.setContent(this.tableInProcess.getContent() + "\n");
            } else if (name.equals("td")) {
                this.tableInProcess.setContent(this.tableInProcess.getContent() + "\t");
            } else if (name.equals("table")) {
                this.closeTableMediaUnit();
            }
        } else if (this.textInProcess != null) {
            if (name.equals("br")) {
                this.tableInProcess.setContent(this.tableInProcess.getContent() + "\n");
            }
            if (name.equals(this.muHtmlElement) && !TITLE_LIST_ELEMENTS.contains(this.muHtmlElement) || name.equals("body")) {
                this.closeTextMediaUnit();
                this.muHtmlElement = "body";
            } else if (BLOC_LIST_ELEMENTS.contains(name) || NEW_LINE_ELEMENTS.contains(name) || TITLE_LIST_ELEMENTS.contains(name)) {
                this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
            } else {
                this.textInProcess.setContent(this.textInProcess.getContent() + " ");
            }
        }
    }

    public void characters(char[] ch, int start, int length) throws SAXException {
        super.characters(ch, start, length);
        if (this.muHtmlElement != "") {
            int end = start + length;
            StringBuilder sb = new StringBuilder();
            for (int i = start; i < end; ++i) {
                sb.append(ch[i]);
            }
            String content = sb.toString().replaceAll("\t", "").replaceAll("\n", "").replaceAll("\r", "").trim();
            if (content.length() != 0) {
                content = sb.toString().replaceAll("\t", " ").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("  ", " ");
                if (this.textInProcess == null && this.tableInProcess == null) {
                    this.openTextMediaUnit();
                }
                if (this.tableInProcess != null) {
                    this.tableInProcess.setContent(this.tableInProcess.getContent() + content);
                    try {
                        this.pTableWriter.append((CharSequence)sb.toString());
                    }
                    catch (IOException e) {
                        this.logger.error((Object)e.getMessage());
                    }
                } else {
                    this.textInProcess.setContent(this.textInProcess.getContent() + content);
                    try {
                        this.pWriter.append((CharSequence)sb.toString());
                    }
                    catch (IOException e) {
                        this.logger.error((Object)e.getMessage());
                    }
                }
            }
        }
    }

    private void annotate(Resource mu, String language) {
        Annotation muLangAnnot = AnnotationFactory.createAndLinkAnnotation((Resource)mu);
        JenaPoKHelper pokH = new JenaPoKHelper((PieceOfKnowledge)muLangAnnot);
        if (this.configuration.getServiceUri() != null) {
            pokH.setAutoCommitMode(false);
            pokH.setNSPrefix("dct", "http://purl.org/dc/terms/");
            pokH.setNSPrefix("wlp", "http://weblab.ow2.org/core/1.2/ontology/processing#");
            pokH.createLitStat(mu.getUri(), "http://purl.org/dc/elements/1.1/language", language);
            pokH.createResStat(muLangAnnot.getUri(), "http://weblab.ow2.org/core/1.2/ontology/processing#isProducedBy", this.configuration.getServiceUri());
            pokH.createLitStat(muLangAnnot.getUri(), "http://purl.org/dc/terms/created", DatatypeConverter.printDateTime((Calendar)Calendar.getInstance()));
            pokH.commit();
        } else {
            pokH.createLitStat(mu.getUri(), "http://purl.org/dc/elements/1.1/language", language);
        }
    }

    public void setDocument(Document document) {
        this.document = document;
    }

    public void setTikaConfiguration(TikaConfiguration tikaConfiguration) {
        this.configuration = tikaConfiguration;
    }

    static {
        ArrayList<String> tempList = new ArrayList<String>();
        tempList.add("ol");
        tempList.add("dt");
        tempList.add("dl");
        tempList.add("li");
        NEW_LINE_ELEMENTS = Collections.unmodifiableList(tempList);
        ArrayList<String> tempList2 = new ArrayList<String>();
        tempList2.add("li");
        tempList2.add("dd");
        TAB_LIST_ELEMENTS = Collections.unmodifiableList(tempList2);
        ArrayList<String> tempList3 = new ArrayList<String>();
        tempList3.add("h1");
        tempList3.add("h2");
        tempList3.add("h3");
        tempList3.add("h4");
        tempList3.add("h5");
        tempList3.add("h6");
        TITLE_LIST_ELEMENTS = Collections.unmodifiableList(tempList3);
        ArrayList<String> tempList4 = new ArrayList<String>();
        tempList4.add("div");
        tempList4.add("p");
        BLOC_LIST_ELEMENTS = Collections.unmodifiableList(tempList4);
    }
}

