001/* 002 * ModeShape (http://www.modeshape.org) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.modeshape.common.text; 017 018import java.text.CharacterIterator; 019import java.text.StringCharacterIterator; 020import java.util.HashMap; 021import java.util.Map; 022import org.modeshape.common.annotation.Immutable; 023 024/** 025 * An encoder useful for converting text to be used within XML attribute values. The following translations will be performed: 026 * <table cellspacing="0" cellpadding="1" border="1"> 027 * <tr> 028 * <th>Raw (Unencoded)<br/> 029 * Character</th> 030 * <th>Translated (Encoded)<br/> 031 * Entity</th> 032 * </tr> 033 * <tr> 034 * <td>&</td> 035 * <td>&amp;</td> 036 * </tr> 037 * <tr> 038 * <td><</td> 039 * <td>&lt;</td> 040 * </tr> 041 * <tr> 042 * <td>></td> 043 * <td>&gt;</td> 044 * </tr> 045 * <tr> 046 * <td>"</td> 047 * <td>&quot;</td> 048 * </tr> 049 * <tr> 050 * <td>'</td> 051 * <td>&#039;</td> 052 * </tr> 053 * <tr> 054 * <td>All Others</td> 055 * <td>No Translation</td> 056 * </tr> 057 * </table> 058 * </p> 059 */ 060@Immutable 061public class XmlValueEncoder implements TextEncoder, TextDecoder { 062 063 private static final Map<String, Character> SPECIAL_ENTITIES; 064 065 static { 066 SPECIAL_ENTITIES = new HashMap<String, Character>(); 067 068 SPECIAL_ENTITIES.put("quot", '"'); 069 SPECIAL_ENTITIES.put("gt", '>'); 070 SPECIAL_ENTITIES.put("lt", '<'); 071 SPECIAL_ENTITIES.put("amp", '&'); 072 073 } 074 075 /** 076 * {@inheritDoc} 077 * 078 * @see org.modeshape.common.text.TextEncoder#encode(java.lang.String) 079 */ 080 @Override 081 public String encode( String text ) { 082 if (text == null) return null; 083 StringBuilder sb = new StringBuilder(); 084 CharacterIterator iter = new StringCharacterIterator(text); 085 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { 086 switch (c) { 087 case '&': 088 sb.append("&"); 089 break; 090 case '"': 091 sb.append("""); 092 break; 093 case '<': 094 sb.append("<"); 095 break; 096 case '>': 097 sb.append(">"); 098 break; 099 case '\'': 100 sb.append("'"); 101 break; 102 default: 103 sb.append(c); 104 } 105 } 106 return sb.toString(); 107 } 108 109 /** 110 * {@inheritDoc} 111 * 112 * @see org.modeshape.common.text.TextDecoder#decode(java.lang.String) 113 */ 114 @Override 115 public String decode( String encodedText ) { 116 if (encodedText == null) return null; 117 StringBuilder sb = new StringBuilder(); 118 CharacterIterator iter = new StringCharacterIterator(encodedText); 119 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { 120 if (c == '&') { 121 int index = iter.getIndex(); 122 123 do { 124 // CHECKSTYLE IGNORE check FOR NEXT 1 LINES 125 c = iter.next(); 126 } while (c != CharacterIterator.DONE && c != ';'); 127 128 // We found a closing semicolon 129 if (c == ';') { 130 String s = encodedText.substring(index + 1, iter.getIndex()); 131 132 if (SPECIAL_ENTITIES.containsKey(s)) { 133 sb.append(SPECIAL_ENTITIES.get(s)); 134 continue; 135 136 } 137 138 if (s.length() > 0 && s.charAt(0) == '#') { 139 try { 140 sb.append((char)Short.parseShort(s.substring(1, s.length()))); 141 continue; 142 } catch (NumberFormatException nfe) { 143 // This is possible in malformed encodings, but let it fall through 144 } 145 } 146 } 147 148 // Malformed encoding, restore state and pass poorly encoded data back 149 // CHECKSTYLE IGNORE check FOR NEXT 1 LINES 150 c = '&'; 151 iter.setIndex(index); 152 } 153 154 sb.append(c); 155 156 } 157 return sb.toString(); 158 } 159}