/*
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 */
package org.openbp.common.markup;

import java.util.HashMap;
import java.util.Map;

/**
 * HTML-related utilities, i\.e\. to convert convert regular text to HTML text.
 */
public final class HTMLEscapeHelper
{
	//////////////////////////////////////////////////
	// @@ Special character data
	//////////////////////////////////////////////////

	/**
	 * Table of special characters like umlaut characters that will be
	 * converted to their respective HTML representation.
	 * Based on http://hotwired.lycos.com/webmonkey/reference/special_characters
	 */
	static Object [][] entities = { {
	// apostrophe
		"#39", new Integer(39) }, {
	// slash
		"#47", new Integer('/') }, {
	// backslash
		"#92", new Integer('\\') }, {
	// double-quote
		"quot", new Integer(34) }, {
	// ampersand
		"amp", new Integer(38), }, {
	// less-than
		"lt", new Integer(60) }, {
	// greater-than
		"gt", new Integer(62) }, {
	// breaking space
		"nbsp", new Integer(160) }, {
	// copyright
		"copy", new Integer(169) }, {
	// registered trademark
		"reg", new Integer(174) }, {
	// uppercase A, grave accent
		"Agrave", new Integer(192) }, {
	// uppercase A, acute accent
		"Aacute", new Integer(193) }, {
	// uppercase A, circumflex accent
		"Acirc", new Integer(194) }, {
	// uppercase A, tilde
		"Atilde", new Integer(195) }, {
	// uppercase A, umlaut
		"Auml", new Integer(196) }, {
	// uppercase A, ring
		"Aring", new Integer(197) }, {
	// uppercase AE
		"AElig", new Integer(198) }, {
	// uppercase C, cedilla
		"Ccedil", new Integer(199) }, {
	// uppercase E, grave accent
		"Egrave", new Integer(200) }, {
	// uppercase E, acute accent
		"Eacute", new Integer(201) }, {
	// uppercase E, circumflex accent
		"Ecirc", new Integer(202) }, {
	// uppercase E, umlaut
		"Euml", new Integer(203) }, {
	// uppercase I, grave accent
		"Igrave", new Integer(204) }, {
	// uppercase I, acute accent
		"Iacute", new Integer(205) }, {
	// uppercase I, circumflex accent
		"Icirc", new Integer(206) }, {
	// uppercase I, umlaut
		"Iuml", new Integer(207) }, {
	// uppercase Eth, Icelandic
		"ETH", new Integer(208) }, {
	// uppercase N, tilde
		"Ntilde", new Integer(209) }, {
	// uppercase O, grave accent
		"Ograve", new Integer(210) }, {
	// uppercase O, acute accent
		"Oacute", new Integer(211) }, {
	// uppercase O, circumflex accent
		"Ocirc", new Integer(212) }, {
	// uppercase O, tilde
		"Otilde", new Integer(213) }, {
	// uppercase O, umlaut
		"Ouml", new Integer(214) }, {
	// uppercase O, slash
		"Oslash", new Integer(216) }, {
	// uppercase U, grave accent
		"Ugrave", new Integer(217) }, {
	// uppercase U, acute accent
		"Uacute", new Integer(218) }, {
	// uppercase U, circumflex accent
		"Ucirc", new Integer(219) }, {
	// uppercase U, umlaut
		"Uuml", new Integer(220) }, {
	// uppercase Y, acute accent
		"Yacute", new Integer(221) }, {
	// uppercase THORN, Icelandic
		"THORN", new Integer(222) }, {
	// lowercase sharps, German
		"szlig", new Integer(223) }, {
	// lowercase a, grave accent
		"agrave", new Integer(224) }, {
	// lowercase a, acute accent
		"aacute", new Integer(225) }, {
	// lowercase a, circumflex accent
		"acirc", new Integer(226) }, {
	// lowercase a, tilde
		"atilde", new Integer(227) }, {
	// lowercase a, umlaut
		"auml", new Integer(228) }, {
	// lowercase a, ring
		"aring", new Integer(229) }, {
	// lowercase ae
		"aelig", new Integer(230) }, {
	// lowercase c, cedilla
		"ccedil", new Integer(231) }, {
	// lowercase e, grave accent
		"egrave", new Integer(232) }, {
	// lowercase e, acute accent
		"eacute", new Integer(233) }, {
	// lowercase e, circumflex accent
		"ecirc", new Integer(234) }, {
	// lowercase e, umlaut
		"euml", new Integer(235) }, {
	// lowercase i, grave accent
		"igrave", new Integer(236) }, {
	// lowercase i, acute accent
		"iacute", new Integer(237) }, {
	// lowercase i, circumflex accent
		"icirc", new Integer(238) }, {
	// lowercase i, umlaut
		"iuml", new Integer(239) }, {
	// lowercase eth, Icelandic
		"eth", new Integer(240) }, {
	// lowercase n, tilde
		"ntilde", new Integer(241) }, {
	// lowercase o, grave accent
		"ograve", new Integer(242) }, {
	// lowercase o, acute accent
		"oacute", new Integer(243) }, {
	// lowercase o, circumflex accent
		"ocirc", new Integer(244) }, {
	// lowercase o, tilde
		"otilde", new Integer(245) }, {
	// lowercase o, umlaut
		"ouml", new Integer(246) }, {
	// lowercase o, slash
		"oslash", new Integer(248) }, {
	// lowercase u, grave accent
		"ugrave", new Integer(249) }, {
	// lowercase u, acute accent
		"uacute", new Integer(250) }, {
	// lowercase u, circumflex accent
		"ucirc", new Integer(251) }, {
	// lowercase u, umlaut
		"uuml", new Integer(252) }, {
	// lowercase y, acute accent
		"yacute", new Integer(253) }, {
	// lowercase thorn, Icelandic
		"thorn", new Integer(254) }, {
	// lowercase y, umlaut
		"yuml", new Integer(255) }, {
	// Euro symbol
		"euro", new Integer(8364) }, };

	/**
	 * Hashtable of special characters.
	 */
	private static Map i2e = new HashMap();

	static
	{
		for (int i = 0; i < entities.length; ++i)
		{
			i2e.put(entities [i] [1], entities [i] [0]);
		}
	}

	/**
	 * Private constructor prevents instantiation.
	 */
	private HTMLEscapeHelper()
	{
	}

	//////////////////////////////////////////////////
	// @@ Escaped character data
	//////////////////////////////////////////////////

	/**
	 * Table of supported escaped characters like "\n" that will be
	 * converted to their HTML equivalents.
	 */
	private static Object [][] maskedEntities = { { "\n", "<br>" }, { "\t", "&nbsp;&nbsp;&nbsp;&nbsp;" } };

	/**
	 * Hashtable of escaped characters.
	 */
	private static Map masked2html = new HashMap();

	static
	{
		for (int i = 0; i < maskedEntities.length; ++i)
		{
			masked2html.put(maskedEntities [i] [0], maskedEntities [i] [1]);
		}
	}

	/**
	 * Turns funky characters into HTML entity equivalents<p>
	 * e.g. <tt>"bread" & "butter"</tt>
	 * =>
	 * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
	 *
	 * Supports nearly all HTML entities, including funky accents.
	 * See the source code for more detail.
	 *
	 * In addition, "\n" characters will be converted to HTML line breaks and "\t" to
	 * 4 non-breaking spaces.
	 *
	 * @param s String to escape
	 * @return Escaped string
	 */
	public static String htmlescape(String s)
	{
		if (s == null)
			return "";
		StringBuffer buf = new StringBuffer();

		int n = s.length();
		for (int i = 0; i < n; ++i)
		{
			char ch = s.charAt(i);

			String entity = (String) i2e.get(new Integer(ch));

			if (entity == null)
			{
				String htmlEntity = (String) masked2html.get(String.valueOf(ch));
				if (htmlEntity != null)
				{
					buf.append(htmlEntity);
					continue;
				}
			}

			if (entity == null)
			{
				if (ch > 128)
				{
					buf.append("&#" + ((int) ch) + ";");
				}
				else
				{
					buf.append(ch);
				}
			}
			else
			{
				buf.append("&" + entity + ";");
			}
		}
		return buf.toString();
	}
}
