/*
 * Decompiled with CFR 0.152.
 */
package org.apache.stanbol.enhancer.engines.htmlextractor.impl;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.stanbol.enhancer.engines.htmlextractor.impl.DOMBuilder;
import org.apache.stanbol.enhancer.engines.htmlextractor.impl.DOMUtils;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;

public class HtmlParser {
    private static final Logger LOG = LoggerFactory.getLogger(HtmlParser.class);
    private String baseURI = "";

    public String getBaseURI() {
        return this.baseURI;
    }

    public void setBaseURI(String baseURI) {
        this.baseURI = baseURI;
    }

    public Document getDOM(String html) {
        if (html != null) {
            return this.getDOM(new ByteArrayInputStream(html.getBytes()), null);
        }
        return null;
    }

    public Document getDOM(InputStream html, String charset) {
        Document doc = null;
        try {
            doc = DOMBuilder.jsoup2DOM(Jsoup.parse((InputStream)html, (String)charset, (String)this.baseURI));
        }
        catch (RuntimeException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return doc;
    }

    public static void main(String[] args) throws Exception {
        String encoding = null;
        for (int argv = 0; argv < args.length && args[argv].startsWith("-"); ++argv) {
            if (!args[argv].equals("-enc")) continue;
            encoding = args[++argv];
        }
        HtmlParser parser = new HtmlParser();
        for (int i = argv; i < args.length; ++i) {
            FileInputStream is = new FileInputStream(args[i]);
            Document doc = parser.getDOM(is, encoding);
            FileOutputStream out = new FileOutputStream(new File(args[i]).getName() + ".xml");
            DOMUtils.writeXml(doc, "UTF-8", null, out);
            ((OutputStream)out).close();
            ((InputStream)is).close();
        }
    }
}

