001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.sequencer.xml;
017
018import java.util.HashMap;
019import java.util.LinkedList;
020import java.util.Map;
021import javax.jcr.NamespaceException;
022import javax.jcr.Node;
023import javax.jcr.RepositoryException;
024import javax.jcr.Session;
025import org.modeshape.common.logging.Logger;
026import org.modeshape.common.text.TextDecoder;
027import org.modeshape.common.text.XmlNameEncoder;
028import org.modeshape.common.util.CheckArg;
029import org.modeshape.common.util.StringUtil;
030import org.modeshape.jcr.api.JcrConstants;
031import org.modeshape.jcr.api.NamespaceRegistry;
032import org.xml.sax.Attributes;
033import org.xml.sax.SAXException;
034import org.xml.sax.SAXParseException;
035import org.xml.sax.ext.DefaultHandler2;
036
037/**
038 * A {@link org.xml.sax.ext.DefaultHandler2} implementation that is used by the sequencer.
039 */
040public class XmlSequencerHandler extends DefaultHandler2 {
041
042    private static final Logger LOGGER = Logger.getLogger(XmlSequencerHandler.class);
043
044    /**
045     * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
046     * characters.
047     */
048    public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder();
049
050    /**
051     * The default {@link XmlSequencer.AttributeScoping}.
052     */
053    public static XmlSequencer.AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = XmlSequencer.AttributeScoping.USE_DEFAULT_NAMESPACE;
054
055    /**
056     * The TextDecoder that is used to decode the names.
057     */
058    protected final TextDecoder decoder;
059
060    /**
061     * The stack of prefixes for each namespace, which is used to keep the {@link NamespaceRegistry namespace registry} in sync
062     * with the namespaces in the XML document.
063     */
064    private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>();
065
066    private final XmlSequencer.AttributeScoping attributeScoping;
067
068    private Node currentNode;
069    private Session session;
070
071    private String currentEntityName;
072    private StringBuilder cDataContent;
073    private StringBuilder contentBuilder;
074
075    private final Map<String, String> entityValues = new HashMap<String, String>();
076
077    XmlSequencerHandler( Node rootNode,
078                         XmlSequencer.AttributeScoping scoping ) throws RepositoryException {
079        CheckArg.isNotNull(rootNode, "outputNode");
080        this.currentNode = rootNode;
081
082        this.session = currentNode.getSession();
083        this.decoder = DEFAULT_DECODER;
084        this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
085    }
086
087    private void startNode( String name,
088                            String primaryType ) throws RepositoryException {
089        // Check if content still needs to be output
090        if (contentBuilder != null) endContent();
091        currentNode = currentNode.addNode(name, primaryType);
092    }
093
094    private void endNode() throws RepositoryException {
095        // Recover parent's path, namespace, and indexedName map, clearing the ended element's map to free memory
096        currentNode = currentNode.getParent();
097    }
098
099    /**
100     * See if there is any element content that needs to be completed.
101     * 
102     * @throws RepositoryException if there is a problem writing the content to the repository session
103     */
104    protected void endContent() throws RepositoryException {
105        // Process the content of the element ...
106        String content = StringUtil.normalize(contentBuilder.toString());
107        // Null-out builder to setup for subsequent content.
108        // Must be done before call to startElement below to prevent infinite loop.
109        contentBuilder = null;
110        // Skip if nothing in content but whitespace
111        if (content.length() > 0) {
112            // Create separate node for each content entry since entries can be interspersed amongst child elements
113            startNode(XmlLexicon.ELEMENT_CONTENT, XmlLexicon.ELEMENT_CONTENT);
114            currentNode.setProperty(XmlLexicon.ELEMENT_CONTENT, content);
115            endNode();
116        }
117    }
118
119    @Override
120    public void startDocument() throws SAXException {
121        try {
122            currentNode.setPrimaryType(XmlLexicon.DOCUMENT);
123        } catch (RepositoryException e) {
124            throw new SAXException(e);
125        }
126    }
127
128    @Override
129    public void startDTD( String name,
130                          String publicId,
131                          String systemId ) throws SAXException {
132        try {
133            currentNode.setProperty(DtdLexicon.NAME, name);
134            currentNode.setProperty(DtdLexicon.PUBLIC_ID, publicId);
135            currentNode.setProperty(DtdLexicon.SYSTEM_ID, systemId);
136        } catch (RepositoryException e) {
137            throw new SAXException(e);
138        }
139    }
140
141    @Override
142    public void externalEntityDecl( String name,
143                                    String publicId,
144                                    String systemId ) throws SAXException {
145        // Add "synthetic" entity container to path to help prevent name collisions with XML elements
146        try {
147            startNode(DtdLexicon.ENTITY, DtdLexicon.ENTITY);
148            currentNode.setProperty(DtdLexicon.NAME, name);
149            if (publicId != null) {
150                currentNode.setProperty(DtdLexicon.PUBLIC_ID, publicId);
151            }
152            if (systemId != null) {
153                currentNode.setProperty(DtdLexicon.SYSTEM_ID, systemId);
154            }
155            endNode();
156        } catch (RepositoryException e) {
157            throw new SAXException(e);
158        }
159    }
160
161    @Override
162    public void internalEntityDecl( String name,
163                                    String value ) throws SAXException {
164        // Add "synthetic" entity container to path to help prevent name collisions with XML elements
165        try {
166            startNode(DtdLexicon.ENTITY, DtdLexicon.ENTITY);
167            currentNode.setProperty(DtdLexicon.NAME, name);
168            currentNode.setProperty(DtdLexicon.VALUE, value);
169            // Record the name/value pair ...
170            entityValues.put(name, value);
171            endNode();
172        } catch (RepositoryException e) {
173            throw new SAXException(e);
174        }
175    }
176
177    @Override
178    public void processingInstruction( String target,
179                                       String data ) throws SAXException {
180        // Output separate nodes for each instruction since multiple are allowed
181        try {
182            startNode(XmlLexicon.PROCESSING_INSTRUCTION, XmlLexicon.PROCESSING_INSTRUCTION);
183            currentNode.setProperty(XmlLexicon.TARGET, target.trim());
184            if (data != null) {
185                currentNode.setProperty(XmlLexicon.PROCESSING_INSTRUCTION_CONTENT, data.trim());
186            }
187            endNode();
188        } catch (RepositoryException e) {
189            throw new SAXException(e);
190        }
191    }
192
193    /**
194     * <p>
195     * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
196     * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
197     * names, no attempt is made to match the XML namespace prefixes.
198     * </p>
199     * 
200     * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(String, String)
201     */
202    @Override
203    public void startPrefixMapping( String prefix,
204                                    String uri ) throws SAXException {
205        try {
206            if (isUriRegistered(uri) && !StringUtil.isBlank(prefix)) {
207                // It is already registered, but re-register it locally using the supplied prefix ...
208                session.setNamespacePrefix(prefix, uri);
209            } else {
210                // The namespace is not already registered so we have to register it with the ws namespace registry.
211                // This should also make the prefix available to the current session
212                NamespaceRegistry namespaceRegistry = (NamespaceRegistry)session.getWorkspace().getNamespaceRegistry();
213                if (StringUtil.isBlank(prefix)) {
214                    prefix = namespaceRegistry.registerNamespace(uri);
215                } else {
216                    namespaceRegistry.registerNamespace(prefix, uri);
217                }
218            }
219        } catch (RepositoryException e) {
220            throw new SAXException(e);
221        }
222
223        // Add the prefix to the stack ...
224        LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
225        if (prefixStack == null) {
226            prefixStack = new LinkedList<String>();
227            this.prefixStackByUri.put(uri, prefixStack);
228        }
229        prefixStack.addFirst(prefix);
230    }
231
232    private boolean isUriRegistered( String uri ) throws RepositoryException {
233        try {
234            session.getNamespacePrefix(uri);
235            return true;
236        } catch (NamespaceException e) {
237            return false;
238        }
239    }
240
241    @Override
242    public void endPrefixMapping( String prefix ) throws SAXException {
243        CheckArg.isNotNull(prefix, "prefix");
244        if (StringUtil.isBlank(prefix)) {
245            return;
246        }
247        try {
248            // Get the current URI for this prefix ...
249            String uri = session.getNamespaceURI(prefix);
250
251            // Get the previous prefix from the stack ...
252            LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
253            assert prefixStack != null;
254            assert !prefixStack.isEmpty();
255            String existingPrefix = prefixStack.removeFirst();
256            assert prefix.equals(existingPrefix);
257
258            // If there are no previous prefixes, then remove the mapping ...
259            if (prefixStack.isEmpty()) {
260                prefixStackByUri.remove(uri);
261            } else {
262                String previous = prefixStack.getFirst();
263                session.setNamespacePrefix(previous, uri);
264            }
265        } catch (RepositoryException e) {
266            throw new SAXException(e);
267        }
268    }
269
270    @Override
271    public void startEntity( String name ) {
272        // Record that we've started an entity by capturing the name of the entity ...
273        currentEntityName = name;
274    }
275
276    @Override
277    public void endEntity( String name ) {
278        // currentEntityName is nulled in 'characters(...)', not here.
279        // See ModeShape-231 for an issue related to this
280    }
281
282    @Override
283    public void startCDATA() throws SAXException {
284        // CDATA sections can start in the middle of element content, so there may already be some
285        // element content already processed ...
286        try {
287            if (contentBuilder != null) endContent();
288        } catch (RepositoryException e) {
289            throw new SAXException(e);
290        }
291
292        // Prepare builder for concatenating consecutive lines of CDATA
293        cDataContent = new StringBuilder();
294    }
295
296    @Override
297    public void endCDATA() throws SAXException {
298        // Output CDATA built in characters() method
299        try {
300            startNode(XmlLexicon.CDATA, XmlLexicon.CDATA);
301            currentNode.setProperty(XmlLexicon.CDATA_CONTENT, cDataContent.toString());
302            endNode();
303        } catch (RepositoryException e) {
304            throw new SAXException(e);
305        }
306        // Null-out builder to free memory
307        cDataContent = null;
308    }
309
310    @Override
311    public void characters( char[] ch,
312                            int start,
313                            int length ) {
314        String content = String.valueOf(ch, start, length);
315        if (cDataContent != null) {
316            // Processing the characters in the CDATA, so add to the builder
317            cDataContent.append(ch, start, length);
318            // Text within builder will be output at the end of CDATA
319        } else {
320            if (contentBuilder == null) {
321                // This is the first line of content, so we have to create the StringBuilder ...
322                contentBuilder = new StringBuilder();
323            }
324            if (currentEntityName != null) {
325                // This is an entity reference, so rather than use the entity value characters (the content passed
326                // into this method), we want to keep the entity reference ...
327                contentBuilder.append('&').append(currentEntityName).append(';');
328
329                // Normally, 'characters' is called with just the entity replacement characters,
330                // and is called between 'startEntity' and 'endEntity'. However, per ModeShape-231, some JVMs
331                // use an incorrect ordering: 'startEntity', 'endEntity' and then 'characters', and the
332                // content passed to the 'characters' call not only includes the entity replacement characters
333                // followed by other content. Look for this condition ...
334                String entityValue = entityValues.get(currentEntityName);
335                if (!content.equals(entityValue) && entityValue != null && entityValue.length() < content.length()) {
336                    // Per ModeShape-231, there's extra content after the entity value. So replace the entity value in the
337                    // content with the entity reference (not the replacement characters), and add the extra content ...
338                    String extraContent = content.substring(entityValue.length());
339                    contentBuilder.append(extraContent);
340                }
341                // We're done reading the entity characters, so null it out
342                currentEntityName = null;
343            } else {
344                // Just append the content normally ...
345                contentBuilder.append(content);
346            }
347            // Text within builder will be output when another element or CDATA is encountered
348        }
349    }
350
351    @Override
352    public void comment( char[] ch,
353                         int start,
354                         int length ) throws SAXException {
355        // Output separate nodes for each comment since multiple are allowed
356        try {
357            startNode(XmlLexicon.COMMENT, XmlLexicon.COMMENT);
358            currentNode.setProperty(XmlLexicon.COMMENT_CONTENT, String.valueOf(ch, start, length).trim());
359            endNode();
360        } catch (RepositoryException e) {
361            throw new SAXException(e);
362        }
363    }
364
365    @Override
366    public void startElement( String uri,
367                              String localName,
368                              String name,
369                              Attributes attributes ) throws SAXException {
370        assert localName != null;
371
372        try {
373
374            // Create the node with the name built from the element's name ...
375            String nodeName = createAttributeName(uri, localName);
376            startNode(nodeName, XmlLexicon.ELEMENT);
377
378            // Now, set each attribute as a property ...
379            for (int i = 0, len = attributes.getLength(); i != len; ++i) {
380                String attributeLocalName = attributes.getLocalName(i);
381                String attributeUri = attributes.getURI(i);
382                String attributeName = null;
383                if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
384                    switch (this.attributeScoping) {
385                        case INHERIT_ELEMENT_NAMESPACE:
386                            attributeName = createAttributeName(uri, attributeLocalName);
387                            break;
388                        case USE_DEFAULT_NAMESPACE:
389                            attributeName = createAttributeName(null, attributeLocalName);
390                            break;
391                    }
392                } else {
393                    attributeName = createAttributeName(attributeUri, attributeLocalName);
394                }
395                assert attributeName != null;
396                if (JcrConstants.JCR_NAME.equals(attributeName)) {
397                    // We don't want to record the "jcr:name" attribute since it won't match the node name ...
398                    continue;
399                }
400                currentNode.setProperty(attributeName, attributes.getValue(i));
401            }
402        } catch (RepositoryException e) {
403            throw new SAXException(e);
404        }
405    }
406
407    private String createAttributeName( String uri,
408                                        String localName ) throws RepositoryException {
409        if (StringUtil.isBlank(uri)) {
410            return decoder.decode(localName.trim());
411        }
412        String prefix = session.getNamespacePrefix(uri);
413        assert prefix != null;
414        return prefix + ":" + decoder.decode(localName.trim());
415    }
416
417    @Override
418    public void endElement( String uri,
419                            String localName,
420                            String name ) throws SAXException {
421        try {
422            // Check if content still needs to be output
423            if (contentBuilder != null) endContent();
424            // End the current node ...
425            endNode();
426        } catch (RepositoryException e) {
427            throw new SAXException(e);
428        }
429    }
430
431    @Override
432    public void warning( SAXParseException warning ) {
433        LOGGER.debug(warning, "SAX warning:");
434    }
435
436    @Override
437    public void error( SAXParseException error ) {
438        LOGGER.debug(error, "SAX error:");
439    }
440}