001/* 002 * ModeShape (http://www.modeshape.org) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.modeshape.sequencer.xml; 017 018import java.util.HashMap; 019import java.util.LinkedList; 020import java.util.Map; 021import javax.jcr.NamespaceException; 022import javax.jcr.Node; 023import javax.jcr.RepositoryException; 024import javax.jcr.Session; 025import org.modeshape.common.logging.Logger; 026import org.modeshape.common.text.TextDecoder; 027import org.modeshape.common.text.XmlNameEncoder; 028import org.modeshape.common.util.CheckArg; 029import org.modeshape.common.util.StringUtil; 030import org.modeshape.jcr.api.JcrConstants; 031import org.modeshape.jcr.api.NamespaceRegistry; 032import org.xml.sax.Attributes; 033import org.xml.sax.SAXException; 034import org.xml.sax.SAXParseException; 035import org.xml.sax.ext.DefaultHandler2; 036 037/** 038 * A {@link org.xml.sax.ext.DefaultHandler2} implementation that is used by the sequencer. 039 */ 040public class XmlSequencerHandler extends DefaultHandler2 { 041 042 private static final Logger LOGGER = Logger.getLogger(XmlSequencerHandler.class); 043 044 /** 045 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16 046 * characters. 047 */ 048 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder(); 049 050 /** 051 * The default {@link XmlSequencer.AttributeScoping}. 052 */ 053 public static XmlSequencer.AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = XmlSequencer.AttributeScoping.USE_DEFAULT_NAMESPACE; 054 055 /** 056 * The TextDecoder that is used to decode the names. 057 */ 058 protected final TextDecoder decoder; 059 060 /** 061 * The stack of prefixes for each namespace, which is used to keep the {@link NamespaceRegistry namespace registry} in sync 062 * with the namespaces in the XML document. 063 */ 064 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>(); 065 066 private final XmlSequencer.AttributeScoping attributeScoping; 067 068 private Node currentNode; 069 private Session session; 070 071 private String currentEntityName; 072 private StringBuilder cDataContent; 073 private StringBuilder contentBuilder; 074 075 private final Map<String, String> entityValues = new HashMap<String, String>(); 076 077 XmlSequencerHandler( Node rootNode, 078 XmlSequencer.AttributeScoping scoping ) throws RepositoryException { 079 CheckArg.isNotNull(rootNode, "outputNode"); 080 this.currentNode = rootNode; 081 082 this.session = currentNode.getSession(); 083 this.decoder = DEFAULT_DECODER; 084 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING; 085 } 086 087 private void startNode( String name, 088 String primaryType ) throws RepositoryException { 089 // Check if content still needs to be output 090 if (contentBuilder != null) endContent(); 091 currentNode = currentNode.addNode(name, primaryType); 092 } 093 094 private void endNode() throws RepositoryException { 095 // Recover parent's path, namespace, and indexedName map, clearing the ended element's map to free memory 096 currentNode = currentNode.getParent(); 097 } 098 099 /** 100 * See if there is any element content that needs to be completed. 101 * 102 * @throws RepositoryException if there is a problem writing the content to the repository session 103 */ 104 protected void endContent() throws RepositoryException { 105 // Process the content of the element ... 106 String content = StringUtil.normalize(contentBuilder.toString()); 107 // Null-out builder to setup for subsequent content. 108 // Must be done before call to startElement below to prevent infinite loop. 109 contentBuilder = null; 110 // Skip if nothing in content but whitespace 111 if (content.length() > 0) { 112 // Create separate node for each content entry since entries can be interspersed amongst child elements 113 startNode(XmlLexicon.ELEMENT_CONTENT, XmlLexicon.ELEMENT_CONTENT); 114 currentNode.setProperty(XmlLexicon.ELEMENT_CONTENT, content); 115 endNode(); 116 } 117 } 118 119 @Override 120 public void startDocument() throws SAXException { 121 try { 122 currentNode.setPrimaryType(XmlLexicon.DOCUMENT); 123 } catch (RepositoryException e) { 124 throw new SAXException(e); 125 } 126 } 127 128 @Override 129 public void startDTD( String name, 130 String publicId, 131 String systemId ) throws SAXException { 132 try { 133 currentNode.setProperty(DtdLexicon.NAME, name); 134 currentNode.setProperty(DtdLexicon.PUBLIC_ID, publicId); 135 currentNode.setProperty(DtdLexicon.SYSTEM_ID, systemId); 136 } catch (RepositoryException e) { 137 throw new SAXException(e); 138 } 139 } 140 141 @Override 142 public void externalEntityDecl( String name, 143 String publicId, 144 String systemId ) throws SAXException { 145 // Add "synthetic" entity container to path to help prevent name collisions with XML elements 146 try { 147 startNode(DtdLexicon.ENTITY, DtdLexicon.ENTITY); 148 currentNode.setProperty(DtdLexicon.NAME, name); 149 if (publicId != null) { 150 currentNode.setProperty(DtdLexicon.PUBLIC_ID, publicId); 151 } 152 if (systemId != null) { 153 currentNode.setProperty(DtdLexicon.SYSTEM_ID, systemId); 154 } 155 endNode(); 156 } catch (RepositoryException e) { 157 throw new SAXException(e); 158 } 159 } 160 161 @Override 162 public void internalEntityDecl( String name, 163 String value ) throws SAXException { 164 // Add "synthetic" entity container to path to help prevent name collisions with XML elements 165 try { 166 startNode(DtdLexicon.ENTITY, DtdLexicon.ENTITY); 167 currentNode.setProperty(DtdLexicon.NAME, name); 168 currentNode.setProperty(DtdLexicon.VALUE, value); 169 // Record the name/value pair ... 170 entityValues.put(name, value); 171 endNode(); 172 } catch (RepositoryException e) { 173 throw new SAXException(e); 174 } 175 } 176 177 @Override 178 public void processingInstruction( String target, 179 String data ) throws SAXException { 180 // Output separate nodes for each instruction since multiple are allowed 181 try { 182 startNode(XmlLexicon.PROCESSING_INSTRUCTION, XmlLexicon.PROCESSING_INSTRUCTION); 183 currentNode.setProperty(XmlLexicon.TARGET, target.trim()); 184 if (data != null) { 185 currentNode.setProperty(XmlLexicon.PROCESSING_INSTRUCTION_CONTENT, data.trim()); 186 } 187 endNode(); 188 } catch (RepositoryException e) { 189 throw new SAXException(e); 190 } 191 } 192 193 /** 194 * <p> 195 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix 196 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create 197 * names, no attempt is made to match the XML namespace prefixes. 198 * </p> 199 * 200 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(String, String) 201 */ 202 @Override 203 public void startPrefixMapping( String prefix, 204 String uri ) throws SAXException { 205 try { 206 if (isUriRegistered(uri) && !StringUtil.isBlank(prefix)) { 207 // It is already registered, but re-register it locally using the supplied prefix ... 208 session.setNamespacePrefix(prefix, uri); 209 } else { 210 // The namespace is not already registered so we have to register it with the ws namespace registry. 211 // This should also make the prefix available to the current session 212 NamespaceRegistry namespaceRegistry = (NamespaceRegistry)session.getWorkspace().getNamespaceRegistry(); 213 if (StringUtil.isBlank(prefix)) { 214 prefix = namespaceRegistry.registerNamespace(uri); 215 } else { 216 namespaceRegistry.registerNamespace(prefix, uri); 217 } 218 } 219 } catch (RepositoryException e) { 220 throw new SAXException(e); 221 } 222 223 // Add the prefix to the stack ... 224 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 225 if (prefixStack == null) { 226 prefixStack = new LinkedList<String>(); 227 this.prefixStackByUri.put(uri, prefixStack); 228 } 229 prefixStack.addFirst(prefix); 230 } 231 232 private boolean isUriRegistered( String uri ) throws RepositoryException { 233 try { 234 session.getNamespacePrefix(uri); 235 return true; 236 } catch (NamespaceException e) { 237 return false; 238 } 239 } 240 241 @Override 242 public void endPrefixMapping( String prefix ) throws SAXException { 243 CheckArg.isNotNull(prefix, "prefix"); 244 if (StringUtil.isBlank(prefix)) { 245 return; 246 } 247 try { 248 // Get the current URI for this prefix ... 249 String uri = session.getNamespaceURI(prefix); 250 251 // Get the previous prefix from the stack ... 252 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 253 assert prefixStack != null; 254 assert !prefixStack.isEmpty(); 255 String existingPrefix = prefixStack.removeFirst(); 256 assert prefix.equals(existingPrefix); 257 258 // If there are no previous prefixes, then remove the mapping ... 259 if (prefixStack.isEmpty()) { 260 prefixStackByUri.remove(uri); 261 } else { 262 String previous = prefixStack.getFirst(); 263 session.setNamespacePrefix(previous, uri); 264 } 265 } catch (RepositoryException e) { 266 throw new SAXException(e); 267 } 268 } 269 270 @Override 271 public void startEntity( String name ) { 272 // Record that we've started an entity by capturing the name of the entity ... 273 currentEntityName = name; 274 } 275 276 @Override 277 public void endEntity( String name ) { 278 // currentEntityName is nulled in 'characters(...)', not here. 279 // See ModeShape-231 for an issue related to this 280 } 281 282 @Override 283 public void startCDATA() throws SAXException { 284 // CDATA sections can start in the middle of element content, so there may already be some 285 // element content already processed ... 286 try { 287 if (contentBuilder != null) endContent(); 288 } catch (RepositoryException e) { 289 throw new SAXException(e); 290 } 291 292 // Prepare builder for concatenating consecutive lines of CDATA 293 cDataContent = new StringBuilder(); 294 } 295 296 @Override 297 public void endCDATA() throws SAXException { 298 // Output CDATA built in characters() method 299 try { 300 startNode(XmlLexicon.CDATA, XmlLexicon.CDATA); 301 currentNode.setProperty(XmlLexicon.CDATA_CONTENT, cDataContent.toString()); 302 endNode(); 303 } catch (RepositoryException e) { 304 throw new SAXException(e); 305 } 306 // Null-out builder to free memory 307 cDataContent = null; 308 } 309 310 @Override 311 public void characters( char[] ch, 312 int start, 313 int length ) { 314 String content = String.valueOf(ch, start, length); 315 if (cDataContent != null) { 316 // Processing the characters in the CDATA, so add to the builder 317 cDataContent.append(ch, start, length); 318 // Text within builder will be output at the end of CDATA 319 } else { 320 if (contentBuilder == null) { 321 // This is the first line of content, so we have to create the StringBuilder ... 322 contentBuilder = new StringBuilder(); 323 } 324 if (currentEntityName != null) { 325 // This is an entity reference, so rather than use the entity value characters (the content passed 326 // into this method), we want to keep the entity reference ... 327 contentBuilder.append('&').append(currentEntityName).append(';'); 328 329 // Normally, 'characters' is called with just the entity replacement characters, 330 // and is called between 'startEntity' and 'endEntity'. However, per ModeShape-231, some JVMs 331 // use an incorrect ordering: 'startEntity', 'endEntity' and then 'characters', and the 332 // content passed to the 'characters' call not only includes the entity replacement characters 333 // followed by other content. Look for this condition ... 334 String entityValue = entityValues.get(currentEntityName); 335 if (!content.equals(entityValue) && entityValue != null && entityValue.length() < content.length()) { 336 // Per ModeShape-231, there's extra content after the entity value. So replace the entity value in the 337 // content with the entity reference (not the replacement characters), and add the extra content ... 338 String extraContent = content.substring(entityValue.length()); 339 contentBuilder.append(extraContent); 340 } 341 // We're done reading the entity characters, so null it out 342 currentEntityName = null; 343 } else { 344 // Just append the content normally ... 345 contentBuilder.append(content); 346 } 347 // Text within builder will be output when another element or CDATA is encountered 348 } 349 } 350 351 @Override 352 public void comment( char[] ch, 353 int start, 354 int length ) throws SAXException { 355 // Output separate nodes for each comment since multiple are allowed 356 try { 357 startNode(XmlLexicon.COMMENT, XmlLexicon.COMMENT); 358 currentNode.setProperty(XmlLexicon.COMMENT_CONTENT, String.valueOf(ch, start, length).trim()); 359 endNode(); 360 } catch (RepositoryException e) { 361 throw new SAXException(e); 362 } 363 } 364 365 @Override 366 public void startElement( String uri, 367 String localName, 368 String name, 369 Attributes attributes ) throws SAXException { 370 assert localName != null; 371 372 try { 373 374 // Create the node with the name built from the element's name ... 375 String nodeName = createAttributeName(uri, localName); 376 startNode(nodeName, XmlLexicon.ELEMENT); 377 378 // Now, set each attribute as a property ... 379 for (int i = 0, len = attributes.getLength(); i != len; ++i) { 380 String attributeLocalName = attributes.getLocalName(i); 381 String attributeUri = attributes.getURI(i); 382 String attributeName = null; 383 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) { 384 switch (this.attributeScoping) { 385 case INHERIT_ELEMENT_NAMESPACE: 386 attributeName = createAttributeName(uri, attributeLocalName); 387 break; 388 case USE_DEFAULT_NAMESPACE: 389 attributeName = createAttributeName(null, attributeLocalName); 390 break; 391 } 392 } else { 393 attributeName = createAttributeName(attributeUri, attributeLocalName); 394 } 395 assert attributeName != null; 396 if (JcrConstants.JCR_NAME.equals(attributeName)) { 397 // We don't want to record the "jcr:name" attribute since it won't match the node name ... 398 continue; 399 } 400 currentNode.setProperty(attributeName, attributes.getValue(i)); 401 } 402 } catch (RepositoryException e) { 403 throw new SAXException(e); 404 } 405 } 406 407 private String createAttributeName( String uri, 408 String localName ) throws RepositoryException { 409 if (StringUtil.isBlank(uri)) { 410 return decoder.decode(localName.trim()); 411 } 412 String prefix = session.getNamespacePrefix(uri); 413 assert prefix != null; 414 return prefix + ":" + decoder.decode(localName.trim()); 415 } 416 417 @Override 418 public void endElement( String uri, 419 String localName, 420 String name ) throws SAXException { 421 try { 422 // Check if content still needs to be output 423 if (contentBuilder != null) endContent(); 424 // End the current node ... 425 endNode(); 426 } catch (RepositoryException e) { 427 throw new SAXException(e); 428 } 429 } 430 431 @Override 432 public void warning( SAXParseException warning ) { 433 LOGGER.debug(warning, "SAX warning:"); 434 } 435 436 @Override 437 public void error( SAXParseException error ) { 438 LOGGER.debug(error, "SAX error:"); 439 } 440}