/*
 * Decompiled with CFR 0.152.
 */
package org.sakaiproject.search.component.adapter.contenthosting;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.sakaiproject.content.api.ContentResource;
import org.sakaiproject.exception.ServerOverloadException;
import org.sakaiproject.search.api.SearchUtils;
import org.sakaiproject.search.component.adapter.contenthosting.BaseContentDigester;
import org.sakaiproject.search.component.adapter.util.DigestHtml;
import org.sakaiproject.search.util.HTMLParser;
import org.w3c.tidy.Tidy;

public class HtmlContentDigester
extends BaseContentDigester {
    private static Log log = LogFactory.getLog(HtmlContentDigester.class);
    private boolean useDirectParser = true;

    @Override
    public String getContent(ContentResource contentResource) {
        if (contentResource == null) {
            throw new RuntimeException("null contentResource passed to getContent");
        }
        if (this.useDirectParser) {
            try {
                String content = new String(contentResource.getContent(), "UTF-8");
                StringBuilder sb = new StringBuilder();
                HTMLParser i = new HTMLParser(content);
                while (i.hasNext()) {
                    String s = (String)i.next();
                    if (s.length() <= 0) continue;
                    SearchUtils.appendCleanString((String)s, (StringBuilder)sb);
                }
                return sb.toString();
            }
            catch (ServerOverloadException ex) {
                throw new RuntimeException("Failed get Resource Content ", ex);
            }
            catch (UnsupportedEncodingException e) {
                throw new RuntimeException("Failed get Resource Content ", e);
            }
        }
        InputStream contentStream = null;
        Tidy tidy = new Tidy();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try {
            contentStream = contentResource.streamContent();
            log.info((Object)("Raw Content was " + contentStream));
            tidy.setQuiet(true);
            tidy.setShowWarnings(false);
            tidy.setOnlyErrors(true);
            tidy.parse(contentStream, (OutputStream)baos);
            String tidyOut = SearchUtils.appendCleanString((String)new String(baos.toByteArray(), "UTF-8"), null).toString();
            log.info((Object)(contentResource.getReference() + " Tidy Output was " + tidyOut));
            log.debug((Object)("Tidy Output was " + tidyOut));
            String string = DigestHtml.digest(tidyOut);
            return string;
        }
        catch (ServerOverloadException e) {
            throw new RuntimeException("Failed get Resource Content ", e);
        }
        catch (UnsupportedEncodingException e) {
            throw new RuntimeException("Failed get Resource Content ", e);
        }
        finally {
            if (baos != null) {
                try {
                    baos.close();
                }
                catch (IOException e) {
                    log.debug((Object)e);
                }
            }
            if (contentStream != null) {
                try {
                    contentStream.close();
                }
                catch (IOException e) {
                    log.debug((Object)e);
                }
            }
        }
    }

    @Override
    public Reader getContentReader(ContentResource contentResource) {
        return new StringReader(this.getContent(contentResource));
    }
}

