001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.common.text;
017
018import java.text.CharacterIterator;
019import java.text.StringCharacterIterator;
020import java.util.HashMap;
021import java.util.Map;
022import org.modeshape.common.annotation.Immutable;
023
024/**
025 * An encoder useful for converting text to be used within XML attribute values. The following translations will be performed:
026 * <table cellspacing="0" cellpadding="1" border="1">
027 * <tr>
028 * <th>Raw (Unencoded)<br/>
029 * Character</th>
030 * <th>Translated (Encoded)<br/>
031 * Entity</th>
032 * </tr>
033 * <tr>
034 * <td>&amp;</td>
035 * <td>&amp;amp;</td>
036 * </tr>
037 * <tr>
038 * <td>&lt;</td>
039 * <td>&amp;lt;</td>
040 * </tr>
041 * <tr>
042 * <td>&gt;</td>
043 * <td>&amp;gt;</td>
044 * </tr>
045 * <tr>
046 * <td>&quot;</td>
047 * <td>&amp;quot;</td>
048 * </tr>
049 * <tr>
050 * <td>&#039;</td>
051 * <td>&amp;#039;</td>
052 * </tr>
053 * <tr>
054 * <td>All Others</td>
055 * <td>No Translation</td>
056 * </tr>
057 * </table>
058 * </p>
059 */
060@Immutable
061public class XmlValueEncoder implements TextEncoder, TextDecoder {
062
063    private static final Map<String, Character> SPECIAL_ENTITIES;
064
065    static {
066        SPECIAL_ENTITIES = new HashMap<String, Character>();
067
068        SPECIAL_ENTITIES.put("quot", '"');
069        SPECIAL_ENTITIES.put("gt", '>');
070        SPECIAL_ENTITIES.put("lt", '<');
071        SPECIAL_ENTITIES.put("amp", '&');
072
073    }
074
075    /**
076     * {@inheritDoc}
077     * 
078     * @see org.modeshape.common.text.TextEncoder#encode(java.lang.String)
079     */
080    @Override
081    public String encode( String text ) {
082        if (text == null) return null;
083        StringBuilder sb = new StringBuilder();
084        CharacterIterator iter = new StringCharacterIterator(text);
085        for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
086            switch (c) {
087                case '&':
088                    sb.append("&amp;");
089                    break;
090                case '"':
091                    sb.append("&quot;");
092                    break;
093                case '<':
094                    sb.append("&lt;");
095                    break;
096                case '>':
097                    sb.append("&gt;");
098                    break;
099                case '\'':
100                    sb.append("&#039;");
101                    break;
102                default:
103                    sb.append(c);
104            }
105        }
106        return sb.toString();
107    }
108
109    /**
110     * {@inheritDoc}
111     * 
112     * @see org.modeshape.common.text.TextDecoder#decode(java.lang.String)
113     */
114    @Override
115    public String decode( String encodedText ) {
116        if (encodedText == null) return null;
117        StringBuilder sb = new StringBuilder();
118        CharacterIterator iter = new StringCharacterIterator(encodedText);
119        for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
120            if (c == '&') {
121                int index = iter.getIndex();
122
123                do {
124                    // CHECKSTYLE IGNORE check FOR NEXT 1 LINES
125                    c = iter.next();
126                } while (c != CharacterIterator.DONE && c != ';');
127
128                // We found a closing semicolon
129                if (c == ';') {
130                    String s = encodedText.substring(index + 1, iter.getIndex());
131
132                    if (SPECIAL_ENTITIES.containsKey(s)) {
133                        sb.append(SPECIAL_ENTITIES.get(s));
134                        continue;
135
136                    }
137
138                    if (s.length() > 0 && s.charAt(0) == '#') {
139                        try {
140                            sb.append((char)Short.parseShort(s.substring(1, s.length())));
141                            continue;
142                        } catch (NumberFormatException nfe) {
143                            // This is possible in malformed encodings, but let it fall through
144                        }
145                    }
146                }
147
148                // Malformed encoding, restore state and pass poorly encoded data back
149                // CHECKSTYLE IGNORE check FOR NEXT 1 LINES
150                c = '&';
151                iter.setIndex(index);
152            }
153
154            sb.append(c);
155
156        }
157        return sb.toString();
158    }
159}