/*
 * Decompiled with CFR 0.152.
 */
package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.Agent;
import edu.harvard.hul.ois.jhove.AgentType;
import edu.harvard.hul.ois.jhove.Checksum;
import edu.harvard.hul.ois.jhove.ChecksumInputStream;
import edu.harvard.hul.ois.jhove.ChecksumType;
import edu.harvard.hul.ois.jhove.Checksummer;
import edu.harvard.hul.ois.jhove.Document;
import edu.harvard.hul.ois.jhove.DocumentType;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.ExternalSignature;
import edu.harvard.hul.ois.jhove.Identifier;
import edu.harvard.hul.ois.jhove.IdentifierType;
import edu.harvard.hul.ois.jhove.InfoMessage;
import edu.harvard.hul.ois.jhove.JhoveBase;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.SignatureType;
import edu.harvard.hul.ois.jhove.SignatureUseType;
import edu.harvard.hul.ois.jhove.TextMDMetadata;
import edu.harvard.hul.ois.jhove.module.XmlModule;
import edu.harvard.hul.ois.jhove.module.html.Html3_2DocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01FrameDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01StrictDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01TransDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0FrameDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0StrictDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0TransDocDesc;
import edu.harvard.hul.ois.jhove.module.html.HtmlCharStream;
import edu.harvard.hul.ois.jhove.module.html.HtmlDocDesc;
import edu.harvard.hul.ois.jhove.module.html.HtmlMetadata;
import edu.harvard.hul.ois.jhove.module.html.JHDoctype;
import edu.harvard.hul.ois.jhove.module.html.JHElement;
import edu.harvard.hul.ois.jhove.module.html.JHOpenTag;
import edu.harvard.hul.ois.jhove.module.html.JHXmlDecl;
import edu.harvard.hul.ois.jhove.module.html.ParseException;
import edu.harvard.hul.ois.jhove.module.html.ParseHtml;
import edu.harvard.hul.ois.jhove.module.html.Token;
import edu.harvard.hul.ois.jhove.module.html.TokenMgrError;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.List;

public class HtmlModule
extends ModuleBase {
    private static final String TRANSITIONAL = "Transitional";
    private static final String STRICT = "Strict";
    private static final String FRAMESET = "Frameset";
    private static final String HTML_4_0 = "HTML 4.0";
    private static final String HTML_4_01 = "HTML 4.01";
    private static final String XHTML_1_0 = "XHTML 1.0";
    private static final String NAME = "HTML-hul";
    private static final String RELEASE = "1.3";
    private static final int[] DATE = new int[]{2006, 9, 5};
    private static final String[] FORMAT = new String[]{"HTML"};
    private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict,HTML 4.0 Transitional, HTML 4.0 Frameset, HTML 4.01 Strict, HTML 4.01 Transitional, HTML 4.01 FramesetXHTML 1.0 Strict, XHTML 1.0 Transitional, XHTML 1.0 FramesetXHTML 1.1";
    private static final String[] MIMETYPE = new String[]{"text/html"};
    private static final String WELLFORMED = "An HTML file is well-formed if it meets the criteria defined in the HTML 3.2 specification (W3C Recommendation, 14-Jan-1997), the HTML 4.0 specification (W3C Recommendation, 24-Apr-1998, the HTML 4.01 specification (W3C Recommendation, 24-Dec-1999, the XHTML 1.0 specification (W3C Recommendation, 26-Jan-2000, revised 1-Aug-2002, or the XHTML 1.1 specification (W3C Recommendation, 31-May-2001";
    private static final String VALIDITY = "An HTML file is valid if it is well-formed and has a valid DOCTYPE declaration.";
    private static final String REPINFO = "Languages, title, META tags, frames, links, scripts, images, citations, defined terms, abbreviations, entities, Unicode entity blocks";
    private static final String NOTE = "";
    private static final String RIGHTS = "Copyright 2004-2007 by JSTOR and the President and Fellows of Harvard College. Released under the GNU Lesser General Public License.";
    protected ChecksumInputStream _cstream;
    protected DataInputStream _dstream;
    protected String _doctype;
    public static final int HTML_3_2 = 1;
    public static final int HTML_4_0_STRICT = 2;
    public static final int HTML_4_0_FRAMESET = 3;
    public static final int HTML_4_0_TRANSITIONAL = 4;
    public static final int HTML_4_01_STRICT = 5;
    public static final int HTML_4_01_FRAMESET = 6;
    public static final int HTML_4_01_TRANSITIONAL = 7;
    public static final int XHTML_1_0_STRICT = 8;
    public static final int XHTML_1_0_TRANSITIONAL = 9;
    public static final int XHTML_1_0_FRAMESET = 10;
    public static final int XHTML_1_1 = 11;
    private static final String[] profileNames = new String[]{null, null, "Strict", "Frameset", "Transitional", "Strict", "Frameset", "Transitional", "Strict", "Frameset", "Transitional", null};
    private static final String[] versionNames = new String[]{null, "HTML 3.2", "HTML 4.0", "HTML 4.0", "HTML 4.0", "HTML 4.01", "HTML 4.01", "HTML 4.01", "XHTML 1.0", "XHTML 1.0", "XHTML 1.0", "XHTML 1.1"};
    protected boolean _withTextMD = false;
    protected TextMDMetadata _textMD;

    public HtmlModule() {
        super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, VALIDITY, REPINFO, NOTE, RIGHTS, false);
        this._vendor = Agent.harvardInstance();
        Document doc = new Document("HTML 3.2 Reference Specification", DocumentType.REPORT);
        Agent w3cAgent = Agent.newW3CInstance();
        doc.setPublisher(w3cAgent);
        Agent dRaggett = new Agent.Builder("Dave Raggett", AgentType.OTHER).build();
        doc.setAuthor(dRaggett);
        doc.setDate("1997-01-14");
        doc.setIdentifier(new Identifier("http://www.w3c.org/TR/REC-html32-19970114", IdentifierType.URL));
        this._specification.add(doc);
        doc = new Document("HTML 4.0 Specification", DocumentType.REPORT);
        doc.setPublisher(w3cAgent);
        doc.setAuthor(dRaggett);
        Agent leHors = new Agent.Builder("Arnaud Le Hors", AgentType.OTHER).build();
        doc.setAuthor(leHors);
        Agent jacobs = new Agent.Builder("Ian Jacobs", AgentType.OTHER).build();
        doc.setAuthor(jacobs);
        doc.setDate("1998-04-24");
        doc.setIdentifier(new Identifier("http://www.w3.org/TR/1998/REC-html40-19980424/", IdentifierType.URL));
        this._specification.add(doc);
        doc = new Document("HTML 4.01 Specification", DocumentType.REPORT);
        doc.setPublisher(w3cAgent);
        doc.setAuthor(dRaggett);
        doc.setAuthor(leHors);
        doc.setAuthor(jacobs);
        doc.setDate("1999-12-24");
        doc.setIdentifier(new Identifier("http://www.w3.org/TR/1999/REC-html401-19991224/", IdentifierType.URL));
        this._specification.add(doc);
        doc = new Document("XHTML(TM) 1.0 The Extensible HyperText Markup Language (Second Edition)", DocumentType.REPORT);
        doc.setPublisher(w3cAgent);
        doc.setDate("01-08-2002");
        doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/", IdentifierType.URL));
        this._specification.add(doc);
        doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML", DocumentType.REPORT);
        doc.setPublisher(w3cAgent);
        doc.setDate("31-05-2001");
        doc.setIdentifier(new Identifier("http://www.w3.org/TR/2001/REC-xhtml11-20010531/", IdentifierType.URL));
        this._specification.add(doc);
        ExternalSignature sig = new ExternalSignature(".html", SignatureType.EXTENSION, SignatureUseType.OPTIONAL);
        this._signature.add(sig);
        sig = new ExternalSignature(".htm", SignatureType.EXTENSION, SignatureUseType.OPTIONAL);
        this._signature.add(sig);
    }

    @Override
    public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException {
        Property property;
        if (parseIndex != 0) {
            if (HtmlModule.isXmlAvailable()) {
                XmlModule xmlMod = new XmlModule();
                if (parseIndex == 100) {
                    parseIndex = 0;
                }
                xmlMod.setApp(this._app);
                xmlMod.setBase(this._je);
                xmlMod.setDefaultParams(this._defaultParams);
                try {
                    xmlMod.applyDefaultParams();
                }
                catch (Exception exception) {
                    // empty catch block
                }
                xmlMod.setXhtmlDoctype(this._doctype);
                return xmlMod.parse(stream, info, parseIndex);
            }
            info.setMessage(new ErrorMessage("XML-HUL module required to validate XHTML documents"));
            info.setWellFormed(false);
            return 0;
        }
        this._doctype = null;
        if (this._defaultParams != null) {
            for (String param : this._defaultParams) {
                if (!"withtextmd=true".equalsIgnoreCase(param)) continue;
                this._withTextMD = true;
            }
        }
        this.initParse();
        info.setFormat(this._format[0]);
        info.setMimeType(this._mimeType[0]);
        info.setModule(this);
        if (this._textMD == null || parseIndex == 0) {
            this._textMD = new TextMDMetadata();
        }
        Checksummer ckSummer = null;
        if (this._je != null && this._je.getChecksumFlag() && info.getChecksum().isEmpty()) {
            ckSummer = new Checksummer();
            this._cstream = new ChecksumInputStream(stream, ckSummer);
            this._dstream = HtmlModule.getBufferedDataStream(this._cstream, this._je != null ? this._je.getBufferSize() : 0);
        } else {
            this._dstream = HtmlModule.getBufferedDataStream(stream, this._je != null ? this._je.getBufferSize() : 0);
        }
        ParseHtml parser = null;
        HtmlMetadata metadata = null;
        HtmlCharStream cstream = null;
        try {
            cstream = new HtmlCharStream(this._dstream, "ISO-8859-1");
            parser = new ParseHtml(cstream);
        }
        catch (UnsupportedEncodingException e) {
            info.setMessage(new ErrorMessage("Internal error: " + e.getMessage()));
            info.setWellFormed(false);
            return 0;
        }
        int type = 0;
        try {
            List elements = parser.HtmlDoc();
            if (elements.isEmpty()) {
                info.setWellFormed(false);
                info.setMessage(new ErrorMessage("Document is empty"));
                return 0;
            }
            type = this.checkDoctype(elements);
            if (type < 0) {
                info.setWellFormed(false);
                info.setMessage(new ErrorMessage("DOCTYPE is not HTML"));
                return 0;
            }
            boolean hasElements = false;
            for (Object o : elements) {
                if (!(o instanceof JHOpenTag)) continue;
                String name = ((JHOpenTag)o).getName();
                if (!"html".equals(name) && !"head".equals(name) && !"body".equals(name) && !"title".equals(name)) break;
                hasElements = true;
                break;
            }
            if (!hasElements) {
                info.setMessage(new ErrorMessage("Document contains no html, head, body or title tags"));
                info.setWellFormed(false);
                return 0;
            }
            String lineEnd = cstream.getKindOfLineEnd();
            if (lineEnd == null) {
                info.setMessage(new InfoMessage("Not able to determine type of end of line"));
                this._textMD.setLinebreak(-1);
            } else if ("CR".equalsIgnoreCase(lineEnd)) {
                this._textMD.setLinebreak(0);
            } else if ("LF".equalsIgnoreCase(lineEnd)) {
                this._textMD.setLinebreak(1);
            } else if ("CRLF".equalsIgnoreCase(lineEnd)) {
                this._textMD.setLinebreak(2);
            }
            if (type == 0) {
                switch (this.seemsToBeXHTML(elements)) {
                    case 0: {
                        break;
                    }
                    case 1: {
                        info.setMessage(new ErrorMessage("Document has XML declaration but no DOCTYPE; probably XML rather than HTML"));
                        info.setWellFormed(false);
                        return 0;
                    }
                    case 2: {
                        return 100;
                    }
                }
                info.setMessage(new ErrorMessage("Unrecognized or missing DOCTYPE declaration; validation continuing as HTML 3.2"));
                info.setValid(false);
            }
            HtmlDocDesc docDesc = null;
            switch (type) {
                default: {
                    docDesc = new Html3_2DocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("3.2");
                    break;
                }
                case 3: {
                    docDesc = new Html4_0FrameDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                }
                case 4: {
                    docDesc = new Html4_0TransDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                }
                case 2: {
                    docDesc = new Html4_0StrictDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                }
                case 6: {
                    docDesc = new Html4_01FrameDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                }
                case 7: {
                    docDesc = new Html4_01TransDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                }
                case 5: {
                    docDesc = new Html4_01StrictDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                }
                case 8: 
                case 9: 
                case 10: 
                case 11: {
                    return 100;
                }
            }
            this._textMD.setMarkup_language(this._doctype);
            if (docDesc == null) {
                info.setMessage(new InfoMessage("Code for appropriate HTML version not available yet:substituting HTML 3.2"));
                docDesc = new Html3_2DocDesc();
            }
            docDesc.validate(elements, info);
            metadata = docDesc.getMetadata();
            if (metadata.getCharset() != null) {
                this._textMD.setCharset(metadata.getCharset());
            } else {
                this._textMD.setCharset("ISO-8859-1");
            }
            String textMDEncoding = this._textMD.getCharset();
            if (textMDEncoding.indexOf("UTF") != -1) {
                this._textMD.setByte_order(this._bigEndian ? 0 : 1);
                this._textMD.setByte_size("8");
                this._textMD.setCharacter_size("variable");
            } else {
                this._textMD.setByte_order(this._bigEndian ? 0 : 1);
                this._textMD.setByte_size("8");
                this._textMD.setCharacter_size("1");
            }
        }
        catch (ParseException e) {
            Token t = e.currentToken;
            info.setMessage(new ErrorMessage("Parse error", "Line = " + t.beginLine + ", column = " + t.beginColumn));
            info.setWellFormed(false);
        }
        catch (TokenMgrError f) {
            info.setMessage(new ErrorMessage("TokenMgrError: " + f.getLocalizedMessage()));
            info.setWellFormed(false);
        }
        if (info.getWellFormed() == 0) {
            return 0;
        }
        if (type != 0) {
            if (profileNames[type] != null) {
                info.setProfile(profileNames[type]);
            }
            info.setVersion(versionNames[type]);
        }
        if (metadata != null && (property = metadata.toProperty(this._withTextMD ? this._textMD : null)) != null) {
            info.setProperty(property);
        }
        if (ckSummer != null) {
            info.setSize(this._cstream.getNBytes());
            info.setChecksum(new Checksum(ckSummer.getCRC32(), ChecksumType.CRC32));
            String value = ckSummer.getMD5();
            if (value != null) {
                info.setChecksum(new Checksum(value, ChecksumType.MD5));
            }
            if ((value = ckSummer.getSHA1()) != null) {
                info.setChecksum(new Checksum(value, ChecksumType.SHA1));
            }
        }
        return 0;
    }

    @Override
    public void checkSignatures(File file, InputStream stream, RepInfo info) throws IOException {
        info.setFormat(this._format[0]);
        info.setMimeType(this._mimeType[0]);
        info.setModule(this);
        char[][] sigtext = new char[][]{"<!DOCTYPE HTML".toCharArray(), "<HTML".toCharArray(), "<TITLE".toCharArray()};
        int[] sigstate = new int[]{0, 0, 0};
        JhoveBase jb = this.getBase();
        int sigBytes = jb.getSigBytes();
        int bytesRead = 0;
        boolean eof = false;
        DataInputStream dstream = new DataInputStream(stream);
        while (!eof && bytesRead < sigBytes) {
            try {
                int ch = HtmlModule.readUnsignedByte(dstream, this);
                char chr = Character.toUpperCase((char)ch);
                ++bytesRead;
                if (Character.isWhitespace(chr)) continue;
                for (int i = 0; i < 3; ++i) {
                    char[] st = sigtext[i];
                    int ss = sigstate[i];
                    if (chr == st[ss]) {
                        int n = i;
                        sigstate[n] = sigstate[n] + 1;
                        if (sigstate[i] != st.length) continue;
                        info.setSigMatch(this._name);
                        return;
                    }
                    sigstate[i] = 0;
                }
            }
            catch (EOFException e) {
                eof = true;
            }
        }
        info.setWellFormed(false);
    }

    protected int checkDoctype(List elements) {
        JHElement firstElem = (JHElement)elements.get(0);
        if (firstElem instanceof JHXmlDecl && elements.size() >= 2) {
            firstElem = (JHElement)elements.get(1);
        }
        if (!(firstElem instanceof JHDoctype)) {
            return 0;
        }
        List dt = ((JHDoctype)firstElem).getDoctypeElements();
        if (dt.size() < 3) {
            return 0;
        }
        try {
            String str = ((String)dt.get(0)).toUpperCase();
            if (!"HTML".equals(str)) {
                return -1;
            }
            str = ((String)dt.get(1)).toUpperCase();
            if (!"PUBLIC".equals(str)) {
                return 0;
            }
            this._doctype = str = this.stripQuotes(((String)dt.get(2)).toUpperCase());
            if ("-//W3C//DTD HTML 3.2 FINAL//EN".equals(str) || "-//W3C//DTD HTML 3.2//EN".equals(str)) {
                return 1;
            }
            if ("-//W3C//DTD HTML 4.0//EN".equals(str)) {
                return 2;
            }
            if ("-//W3C//DTD HTML 4.0 TRANSITIONAL//EN".equals(str)) {
                return 4;
            }
            if ("-//W3C//DTD HTML 4.0 FRAMESET//EN".equals(str)) {
                return 3;
            }
            if ("-//W3C//DTD HTML 4.01//EN".equals(str)) {
                return 5;
            }
            if ("-//W3C//DTD HTML 4.01 TRANSITIONAL//EN".equals(str)) {
                return 7;
            }
            if ("-//W3C//DTD HTML 4.01 FRAMESET//EN".equals(str)) {
                return 6;
            }
        }
        catch (Exception e) {
            return 0;
        }
        return 0;
    }

    protected int seemsToBeXHTML(List elements) {
        try {
            JHElement elem2 = (JHElement)elements.get(0);
            if (!(elem2 instanceof JHXmlDecl)) {
                return 0;
            }
            for (JHElement elem2 : elements) {
                if (!(elem2 instanceof JHOpenTag)) continue;
                JHOpenTag tag = (JHOpenTag)elem2;
                return "html".equals(tag.getName()) ? 2 : 1;
            }
        }
        catch (Exception e) {
            return 0;
        }
        return 1;
    }

    protected String stripQuotes(String str) {
        int len = str.length();
        if (str.charAt(0) == '\"' && str.charAt(len - 1) == '\"') {
            return str.substring(1, len - 1);
        }
        return str;
    }

    protected static boolean isXmlAvailable() {
        try {
            Class.forName("edu.harvard.hul.ois.jhove.module.XmlModule");
            return true;
        }
        catch (Exception e) {
            return false;
        }
    }
}

