/*
 * Copyright 2005-2010 the original author or authors.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.wamblee.xml;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;

import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 * Some basic XML utilities for common reoccuring tasks for DOM documents.
 * 
 * @author Erik Brakkee
 */
public final class DomUtils {
    private static final Logger LOG = Logger
        .getLogger(DomUtils.class.getName());

    /**
     * Disabled default constructor.
     * 
     */
    private DomUtils() {
        // Empty.
    }

    /**
     * Parses an XML document from a string.
     * 
     * @param aDocument
     *            document.
     * 
     * @return
     * 
     */
    public static Document read(String aDocument) throws XMLException {
        ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes());

        return read(is);
    }

    /**
     * Parses an XML document from a stream.
     * 
     * @param aIs
     *            Input stream.
     * 
     * @return
     * 
     */
    public static Document read(InputStream aIs) throws XMLException {
        try {
            DocumentBuilder builder = DocumentBuilderFactory.newInstance()
                .newDocumentBuilder();

            return builder.parse(aIs);
        } catch (SAXException e) {
            throw new XMLException(e.getMessage(), e);
        } catch (IOException e) {
            throw new XMLException(e.getMessage(), e);
        } catch (ParserConfigurationException e) {
            throw new XMLException(e.getMessage(), e);
        } finally {
            try {
                aIs.close();
            } catch (Exception e) {
                LOG.log(Level.WARNING, "Error closing XML file", e);
            }
        }
    }

    /**
     * Reads and validates a document against a schema.
     * 
     * @param aIs
     *            Input stream.
     * @param aSchema
     *            Schema.
     * 
     * @return Parsed and validated document.
     * 
     */
    public static Document readAndValidate(InputStream aIs, InputStream aSchema)
        throws XMLException {
        try {
            final Schema schema = SchemaFactory.newInstance(
                XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(
                new StreamSource(aSchema));

            final DocumentBuilderFactory factory = DocumentBuilderFactory
                .newInstance();
            factory.setValidating(true);
            factory.setNamespaceAware(true);
            factory.setSchema(schema);

            return factory.newDocumentBuilder().parse(aIs);
        } catch (SAXException e) {
            throw new XMLException(e.getMessage(), e);
        } catch (IOException e) {
            throw new XMLException(e.getMessage(), e);
        } catch (ParserConfigurationException e) {
            throw new XMLException(e.getMessage(), e);
        } finally {
            try {
                aSchema.close();
            } catch (Exception e) {
                LOG.log(Level.WARNING, "Error closing schema", e);
            }

            try {
                aIs.close();
            } catch (Exception e) {
                LOG.log(Level.WARNING, "Error closing XML file", e);
            }
        }
    }

    /**
     * Serializes an XML document to a stream.
     * 
     * @param aDocument
     *            Document to serialize.
     * @param aOs
     *            Output stream.
     * 
     */
    public static void serialize(Document aDocument, OutputStream aOs)
        throws IOException {
        try {
            TransformerFactory factory = TransformerFactory.newInstance();
            Transformer identityTransform = factory.newTransformer();
            DOMSource source = new DOMSource(aDocument);
            StreamResult result = new StreamResult(aOs);
            identityTransform.transform(source, result);
        } catch (TransformerException e) {
            throw new IOException(e.getMessage(), e);
        }
    }

    /**
     * Serializes an XML document.
     * 
     * @param aDocument
     *            Document to serialize.
     * 
     * @return Serialized document.
     * 
     */
    public static String serialize(Document aDocument) throws IOException {
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        serialize(aDocument, os);

        return os.toString();
    }

    /**
     * Removes duplicate attributes from a DOM tree.This is useful for
     * postprocessing the output of JTidy as a workaround for a bug in JTidy.
     * 
     * @param aNode
     *            Node to remove duplicate attributes from (recursively).
     *            Attributes of the node itself are not dealt with. Only the
     *            child nodes are dealt with.
     */
    public static void removeDuplicateAttributes(Node aNode) {
        NodeList list = aNode.getChildNodes();

        for (int i = 0; i < list.getLength(); i++) {
            Node node = list.item(i);

            if (node instanceof Element) {
                removeDuplicateAttributes((Element) node);
                removeDuplicateAttributes(node);
            }
        }
    }

    /**
     * Removes duplicate attributes from an element.
     * 
     * @param aElement
     *            Element.
     */
    private static void removeDuplicateAttributes(Element aElement) {
        NamedNodeMap attributes = aElement.getAttributes();
        Map<String, Attr> uniqueAttributes = new TreeMap<String, Attr>();
        List<Attr> attlist = new ArrayList<Attr>();

        for (int i = 0; i < attributes.getLength(); i++) {
            Attr attribute = (Attr) attributes.item(i);

            if (uniqueAttributes.containsKey(attribute.getNodeName())) {
                LOG.info("Detected duplicate attribute (will be removed)'" +
                    attribute.getNodeName() + "'");
            }

            uniqueAttributes.put(attribute.getNodeName(), attribute);
            attlist.add(attribute);
        }

        // Remove all attributes from the element.
        for (Attr att : attlist) {
            aElement.removeAttributeNode(att);
        }

        // Add the unique attributes back to the element.
        for (Attr att : uniqueAttributes.values()) {
            aElement.setAttributeNode(att);
        }
    }
}
