001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.common.text;
017
018import java.util.ArrayList;
019import java.util.Iterator;
020import java.util.List;
021import java.util.ListIterator;
022import java.util.Locale;
023import java.util.NoSuchElementException;
024import org.modeshape.common.CommonI18n;
025import org.modeshape.common.annotation.Immutable;
026import org.modeshape.common.annotation.NotThreadSafe;
027import org.modeshape.common.util.CheckArg;
028import org.modeshape.common.xml.XmlCharacters;
029
030/**
031 * A foundation for basic parsers that tokenizes input content and allows parsers to easily access and use those tokens. A
032 * {@link TokenStream} object literally represents the stream of {@link Token} objects that each represent a word, symbol, comment
033 * or other lexically-relevant piece of information. This simple framework makes it very easy to create a parser that walks
034 * through (or "consumes") the tokens in the order they appear and do something useful with that content (usually creating another
035 * representation of the content, such as some domain-specific Abstract Syntax Tree or object model).
036 * <p>
037 * </p>
038 * <h3>The parts</h3>
039 * <p>
040 * This simple framework consists of a couple of pieces that fit together to do the whole job of parsing input content.
041 * </p>
042 * <p>
043 * The {@link Tokenizer} is responsible for consuming the character-level input content and constructing {@link Token} objects for
044 * the different words, symbols, or other meaningful elements contained in the content. Each Token object is a simple object that
045 * records the character(s) that make up the token's value, but it does this in a very lightweight and efficient way by pointing
046 * to the original character stream. Each token can be assigned a parser-specific integral <i>token type</i> that may make it
047 * easier to do quickly figure out later in the process what kind of information each token represents. The general idea is to
048 * keep the Tokenizer logic very simple, and very often Tokenizers will merely look for the different kinds of characters (e.g.,
049 * symbols, letters, digits, etc.) as well as things like quoted strings and comments. However, Tokenizers are never called by the
050 * parser, but instead are always given to the TokenStream that then calls the Tokenizer at the appropriate time.
051 * </p>
052 * <p>
053 * The {@link TokenStream} is supplied the input content, a Tokenizer implementation, and a few options. Its job is to prepare the
054 * content for processing, call the Tokenizer implementation to create the series of Token objects, and then provide an interface
055 * for walking through and consuming the tokens. This interface makes it possible to discover the value and type of the current
056 * token, and consume the current token and move to the next token. Plus, the interface has been designed to make the code that
057 * works with the tokens to be as readable as possible.
058 * </p>
059 * <p>
060 * The final component in this framework is the <b>Parser</b>. The parser is really any class that takes as input the content to
061 * be parsed and that outputs some meaningful information. The parser will do this by defining the Tokenizer, constructing a
062 * TokenStream object, and then using the TokenStream to walk through the sequence of Tokens and produce some meaningful
063 * representation of the content. Parsers can create instances of some object model, or they can create a domain-specific Abstract
064 * Syntax Tree representation.
065 * </p>
066 * <p>
067 * The benefit of breaking the responsibility along these lines is that the TokenStream implementation is able to encapsulate
068 * quite a bit of very tedious and very useful functionality, while still allowing a lot of flexibility as to what makes up the
069 * different tokens. It also makes the parser very easy to write and read (and thus maintain), without placing very many
070 * restrictions on how that logic is to be defined. Plus, because the TokenStream takes responsibility for tracking the positions
071 * of every token (including line and column numbers), it can automatically produce meaningful errors.
072 * </p>
073 * <h3>Consuming tokens</h3>
074 * <p>
075 * A parser works with the tokens on the TokenStream using a variety of methods:
076 * <ul>
077 * <li>The {@link #start()} method must be called before any of the other methods. It performs initialization and tokenizing, and
078 * prepares the internal state by finding the first token and setting an internal <i>current token</i> reference.</li>
079 * <li>The {@link #hasNext()} method can be called repeatedly to determine if there is another token after the <i>current
080 * token</i>. This is often useful when an unknown number of tokens is to be processed, and behaves very similarly to the
081 * {@link Iterator#hasNext()} method.</li>
082 * <li>The {@link #consume()} method returns the {@link Token#value() value} of the <i>current token</i> and moves the <i>current
083 * token</i> pointer to the next available token.</li>
084 * <li>The {@link #consume(String)} and {@link #consume(char)} methods look at the <i>current token</i> and ensure the token's
085 * {@link Token#value() value} matches the value supplied as a method parameter, or they throw a {@link ParsingException} if the
086 * values don't match. The {@link #consume(int)} method works similarly, except that it attempts to match the token's
087 * {@link Token#type() type}. And, the {@link #consume(String, String...)} is a convenience method that is equivalent to calling
088 * {@link #consume(String)} for each of the arguments.</li>
089 * <li>The {@link #canConsume(String)} and {@link #canConsume(char)} methods look at the <i>current token</i> and check whether
090 * the token's {@link Token#value() value} matches the value supplied as a method parameter. If there is a match, the method
091 * advances the <i>current token</i> reference and returns true. Otherwise, the <i>current token</i> does not match and the method
092 * returns false without advancing the <i>current token</i> reference or throwing a ParsingException. Similarly, the
093 * {@link #canConsume(int)} method checks the token's {@link Token#type() type} rather than the value, consuming the token and
094 * returning true if there is a match, or just returning false if there is no match. The {@link #canConsume(String, String...)}
095 * method determines whether all of the supplied values can be consumed in the given order.</li>
096 * <li>The {@link #matches(String)} and {@link #matches(char)} methods look at the <i>current token</i> and check whether the
097 * token's {@link Token#value() value} matches the value supplied as a method parameter. The method then returns whether there was
098 * a match, but does <i>not</i> advance the <i>current token</i> pointer. Similarly, the {@link #matches(int)} method checks the
099 * token's {@link Token#type() type} rather than the value. The {@link #matches(String, String...)} method is a convenience method
100 * that is equivalent to calling {@link #matches(String)} for each of the arguments, and the {@link #matches(int, int...)} method
101 * is a convenience method that is equivalent to calling {@link #matches(int)} for each of the arguments.</li>
102 * </ul>
103 * <li>The {@link #matchesAnyOf(String, String...)} methods look at the <i>current token</i> and check whether the token's
104 * {@link Token#value() value} matches at least one of the values supplied as method parameters. The method then returns whether
105 * there was a match, but does <i>not</i> advance the <i>current token</i> pointer. Similarly, the
106 * {@link #matchesAnyOf(int, int...)} method checks the token's {@link Token#type() type} rather than the value.</li> </ul>
107 * </p>
108 * <p>
109 * With these methods, it's very easy to create a parser that looks at the current token to decide what to do, and then consume
110 * that token, and repeat this process.
111 * </p>
112 * <h3>Example parser</h3>
113 * <p>
114 * Here is an example of a very simple parser that parses very simple and limited SQL <code>SELECT</code> and <code>DELETE</code>
115 * statements, such as <code>SELECT * FROM Customers</code> or
116 * <code>SELECT Name, StreetAddress AS Address, City, Zip FROM Customers</code> or
117 * <code>DELETE FROM Customers WHERE Zip=12345</code>:
118 * 
119 * <pre>
120 * public class SampleSqlSelectParser {
121 *     public List&lt;Statement&gt; parse( String ddl ) {
122 *         TokenStream tokens = new TokenStream(ddl, new SqlTokenizer(), false);
123 *         List&lt;Statement&gt; statements = new LinkedList&lt;Statement&gt;();
124 *         token.start();
125 *         while (tokens.hasNext()) {
126 *             if (tokens.matches(&quot;SELECT&quot;)) {
127 *                 statements.add(parseSelect(tokens));
128 *             } else {
129 *                 statements.add(parseDelete(tokens));
130 *             }
131 *         }
132 *         return statements;
133 *     }
134 * 
135 *     protected Select parseSelect( TokenStream tokens ) throws ParsingException {
136 *         tokens.consume(&quot;SELECT&quot;);
137 *         List&lt;Column&gt; columns = parseColumns(tokens);
138 *         tokens.consume(&quot;FROM&quot;);
139 *         String tableName = tokens.consume();
140 *         return new Select(tableName, columns);
141 *     }
142 * 
143 *     protected List&lt;Column&gt; parseColumns( TokenStream tokens ) throws ParsingException {
144 *         List&lt;Column&gt; columns = new LinkedList&lt;Column&gt;();
145 *         if (tokens.matches('*')) {
146 *             tokens.consume(); // leave the columns empty to signal wildcard
147 *         } else {
148 *             // Read names until we see a ','
149 *             do {
150 *                 String columnName = tokens.consume();
151 *                 if (tokens.canConsume(&quot;AS&quot;)) {
152 *                     String columnAlias = tokens.consume();
153 *                     columns.add(new Column(columnName, columnAlias));
154 *                 } else {
155 *                     columns.add(new Column(columnName, null));
156 *                 }
157 *             } while (tokens.canConsume(','));
158 *         }
159 *         return columns;
160 *     }
161 * 
162 *     protected Delete parseDelete( TokenStream tokens ) throws ParsingException {
163 *         tokens.consume(&quot;DELETE&quot;, &quot;FROM&quot;);
164 *         String tableName = tokens.consume();
165 *         tokens.consume(&quot;WHERE&quot;);
166 *         String lhs = tokens.consume();
167 *         tokens.consume('=');
168 *         String rhs = tokens.consume();
169 *         return new Delete(tableName, new Criteria(lhs, rhs));
170 *     }
171 *  }
172 *  public abstract class Statement { ... }
173 *  public class Query extends Statement { ... }
174 *  public class Delete extends Statement { ... }
175 *  public class Column { ... }
176 * </pre>
177 * 
178 * This example shows an idiomatic way of writing a parser that is stateless and thread-safe. The <code>parse(...)</code> method
179 * takes the input as a parameter, and returns the domain-specific representation that resulted from the parsing. All other
180 * methods are utility methods that simply encapsulate common logic or make the code more readable.
181 * </p>
182 * <p>
183 * In the example, the <code>parse(...)</code> first creates a TokenStream object (using a Tokenizer implementation that is not
184 * shown), and then loops as long as there are more tokens to read. As it loops, if the next token is "SELECT", the parser calls
185 * the <code>parseSelect(...)</code> method which immediately consumes a "SELECT" token, the names of the columns separated by
186 * commas (or a '*' if there all columns are to be selected), a "FROM" token, and the name of the table being queried. The
187 * <code>parseSelect(...)</code> method returns a <code>Select</code> object, which then added to the list of statements in the
188 * <code>parse(...)</code> method. The parser handles the "DELETE" statements in a similar manner.
189 * </p>
190 * <h3>Case sensitivity</h3>
191 * <p>
192 * Very often grammars to not require the case of keywords to match. This can make parsing a challenge, because all combinations
193 * of case need to be used. The TokenStream framework provides a very simple solution that requires no more effort than providing
194 * a boolean parameter to the constructor.
195 * </p>
196 * <p>
197 * When a <code>false</code> value is provided for the the <code>caseSensitive</code> parameter, the TokenStream performs all
198 * matching operations as if each token's value were in uppercase only. This means that the arguments supplied to the
199 * <code>match(...)</code>, <code>canConsume(...)</code>, and <code>consume(...)</code> methods should be upper-cased. Note that
200 * the <i>actual value</i> of each token remains the <i>actual</i> case as it appears in the input.
201 * </p>
202 * <p>
203 * Of course, when the TokenStream is created with a <code>true</code> value for the <code>caseSensitive</code> parameter, the
204 * matching is performed using the <i>actual</i> value as it appears in the input content
205 * </p>
206 * <h3>Whitespace</h3>
207 * <p>
208 * Many grammars are independent of lines breaks or whitespace, allowing a lot of flexibility when writing the content. The
209 * TokenStream framework makes it very easy to ignore line breaks and whitespace. To do so, the Tokenizer implementation must
210 * simply not include the line break character sequences and whitespace in the token ranges. Since none of the tokens contain
211 * whitespace, the parser never has to deal with them.
212 * </p>
213 * <p>
214 * Of course, many parsers will require that some whitespace be included. For example, whitespace within a quoted string may be
215 * needed by the parser. In this case, the Tokenizer should simply include the whitespace characters in the tokens.
216 * </p>
217 * <h3>Writing a Tokenizer</h3>
218 * <p>
219 * Each parser will likely have its own {@link Tokenizer} implementation that contains the parser-specific logic about how to
220 * break the content into token objects. Generally, the easiest way to do this is to simply iterate through the character sequence
221 * passed into the {@link Tokenizer#tokenize(TokenStream.CharacterStream, TokenStream.Tokens) tokenize(...)} method, and use a switch statement to decide
222 * what to do. 
223 * </p>
224 * <p>
225 * Here is the code for a very basic Tokenizer implementation that ignores whitespace, line breaks and Java-style (multi-line and
226 * end-of-line) comments, while constructing single tokens for each quoted string.
227 * 
228 * <pre>
229 *  public class BasicTokenizer implements Tokenizer {
230 *      public void tokenize( CharacterStream input,
231 *                            Tokens tokens ) throws ParsingException {
232 *          while (input.hasNext()) {
233 *              char c = input.next();
234 *              switch (c) {
235 *                  case ' ':
236 *                  case '\t':
237 *                  case '\n':
238 *                  case '\r':
239 *                      // Just skip these whitespace characters ...
240 *                      break;
241 *                  case '-':
242 *                  case '(':
243 *                  case ')':
244 *                  case '{':
245 *                  case '}':
246 *                  case '*':
247 *                  case ',':
248 *                  case ';':
249 *                  case '+':
250 *                  case '%':
251 *                  case '?':
252 *                  case '$':
253 *                  case '[':
254 *                  case ']':
255 *                  case '!':
256 *                  case '<':
257 *                  case '>':
258 *                  case '|':
259 *                  case '=':
260 *                  case ':':
261 *                      tokens.addToken(input.index(), input.index() + 1, SYMBOL);
262 *                      break;
263 *                  case '.':
264 *                      tokens.addToken(input.index(), input.index() + 1, DECIMAL);
265 *                      break;
266 *                  case '\"':
267 *                  case '\"':
268 *                      int startIndex = input.index();
269 *                      Position startingPosition = input.position();
270 *                      boolean foundClosingQuote = false;
271 *                      while (input.hasNext()) {
272 *                          c = input.next();
273 *                          if (c == '\\' && input.isNext('"')) {
274 *                              c = input.next(); // consume the ' character since it is escaped
275 *                          } else if (c == '"') {
276 *                              foundClosingQuote = true;
277 *                              break;
278 *                          }
279 *                      }
280 *                      if (!foundClosingQuote) {
281 *                          throw new ParsingException(startingPosition, "No matching closing double quote found");
282 *                      }
283 *                      int endIndex = input.index() + 1; // beyond last character read
284 *                      tokens.addToken(startIndex, endIndex, DOUBLE_QUOTED_STRING);
285 *                      break;
286 *                  case '\'':
287 *                      startIndex = input.index();
288 *                      startingPosition = input.position();
289 *                      foundClosingQuote = false;
290 *                      while (input.hasNext()) {
291 *                          c = input.next();
292 *                          if (c == '\\' && input.isNext('\'')) {
293 *                              c = input.next(); // consume the ' character since it is escaped
294 *                          } else if (c == '\'') {
295 *                              foundClosingQuote = true;
296 *                              break;
297 *                          }
298 *                      }
299 *                      if (!foundClosingQuote) {
300 *                          throw new ParsingException(startingPosition, "No matching closing single quote found");
301 *                      }
302 *                      endIndex = input.index() + 1; // beyond last character read
303 *                      tokens.addToken(startIndex, endIndex, SINGLE_QUOTED_STRING);
304 *                      break;
305 *                  case '/':
306 *                      startIndex = input.index();
307 *                      if (input.isNext('/')) {
308 *                          // End-of-line comment ...
309 *                          boolean foundLineTerminator = false;
310 *                          while (input.hasNext()) {
311 *                              c = input.next();
312 *                              if (c == '\n' || c == '\r') {
313 *                                  foundLineTerminator = true;
314 *                                  break;
315 *                              }
316 *                          }
317 *                          endIndex = input.index(); // the token won't include the '\n' or '\r' character(s)
318 *                          if (!foundLineTerminator) ++endIndex; // must point beyond last char
319 *                          if (c == '\r' && input.isNext('\n')) input.next();
320 *                          if (useComments) {
321 *                              tokens.addToken(startIndex, endIndex, COMMENT);
322 *                          }
323 *                      } else if (input.isNext('*')) {
324 *                          // Multi-line comment ...
325 *                          while (input.hasNext() && !input.isNext('*', '/')) {
326 *                              c = input.next();
327 *                          }
328 *                          if (input.hasNext()) input.next(); // consume the '*'
329 *                          if (input.hasNext()) input.next(); // consume the '/'
330 *                          if (useComments) {
331 *                              endIndex = input.index() + 1; // the token will include the '/' and '*' characters
332 *                              tokens.addToken(startIndex, endIndex, COMMENT);
333 *                          }
334 *                      } else {
335 *                          // just a regular slash ...
336 *                          tokens.addToken(startIndex, startIndex + 1, SYMBOL);
337 *                      }
338 *                      break;
339 *                  default:
340 *                      startIndex = input.index();
341 *                      // Read until another whitespace/symbol/decimal/slash is found
342 *                      while (input.hasNext() && !(input.isNextWhitespace() || input.isNextAnyOf("/.-(){}*,;+%?$[]!<>|=:"))) {
343 *                          c = input.next();
344 *                      }
345 *                      endIndex = input.index() + 1; // beyond last character that was included
346 *                      tokens.addToken(startIndex, endIndex, WORD);
347 *              }
348 *          }
349 *      }
350 *  }
351 * </pre>
352 * Tokenizers with exactly this behavior can actually be created using the {@link #basicTokenizer(boolean)} method.  So while this very
353 * basic implementation is not meant to be used in all situations, it may be useful in some situations.
354 * </p>
355 */
356@NotThreadSafe
357public class TokenStream {
358
359    /**
360     * A constant that can be used with the {@link #matches(String)}, {@link #matches(String, String...)},
361     * {@link #consume(String)}, {@link #consume(String, String...)}, {@link #canConsume(String)} and
362     * {@link #canConsume(String, String...)} methods to signal that any value is allowed to be matched.
363     * <p>
364     * Note that this exact instance must be used; an equivalent string will not work.
365     * </p>
366     */
367    public static final String ANY_VALUE = "any value";
368    /**
369     * A constant that can be used with the {@link #matches(int)}, {@link #matches(int, int...)}, {@link #consume(int)}, and
370     * {@link #canConsume(int)} methods to signal that any token type is allowed to be matched.
371     */
372    public static final int ANY_TYPE = Integer.MIN_VALUE;
373
374    protected final String inputString;
375    private final char[] inputContent;
376    private final boolean caseSensitive;
377    private final Tokenizer tokenizer;
378    private List<Token> tokens;
379    /**
380     * This class navigates the Token objects using this iterator. However, because it very often needs to access the
381     * "current token" in the "consume(...)" and "canConsume(...)" and "matches(...)" methods, the class caches a "current token"
382     * and makes this iterator point to the 2nd token.
383     * 
384     * <pre>
385     *     T1     T2    T3    T4    T5
386     *         &circ;   &circ;  &circ;
387     *         |   |  |
388     *         |   |  +- The position of the tokenIterator, where tokenIterator.hasNext() will return T3
389     *         |   +---- The token referenced by currentToken
390     *         +-------- The logical position of the TokenStream object, where the &quot;consume()&quot; would return T2
391     * </pre>
392     */
393    private ListIterator<Token> tokenIterator;
394    private Token currentToken;
395    private boolean completed;
396
397    public TokenStream( String content,
398                        Tokenizer tokenizer,
399                        boolean caseSensitive ) {
400        CheckArg.isNotNull(content, "content");
401        CheckArg.isNotNull(tokenizer, "tokenizer");
402        this.inputString = content;
403        this.inputContent = content.toCharArray();
404        this.caseSensitive = caseSensitive;
405        this.tokenizer = tokenizer;
406    }
407
408    /**
409     * Begin the token stream, including (if required) the tokenization of the input content.
410     * 
411     * @return this object for easy method chaining; never null
412     * @throws ParsingException if an error occurs during tokenization of the content
413     */
414    public TokenStream start() throws ParsingException {
415        // Create the tokens ...
416        if (tokens == null) {
417            TokenFactory tokenFactory = caseSensitive ? new CaseSensitiveTokenFactory() : new CaseInsensitiveTokenFactory();
418            CharacterStream characterStream = new CharacterArrayStream(inputContent);
419            tokenizer.tokenize(characterStream, tokenFactory);
420            this.tokens = initializeTokens(tokenFactory.getTokens());
421        }
422
423        // Create the iterator ...
424        tokenIterator = this.tokens.listIterator();
425        moveToNextToken();
426        return this;
427    }
428
429    /**
430     * Method to allow subclasses to preprocess the set of tokens and return the correct tokens to use. The default behavior is to
431     * simply return the supplied tokens.
432     * 
433     * @param tokens
434     * @return list of tokens.
435     */
436    protected List<Token> initializeTokens( List<Token> tokens ) {
437        return tokens;
438    }
439
440    /**
441     * Method to allow tokens to be re-used from the start without re-tokenizing content.
442     */
443    public void rewind() {
444        // recreate the iterator ...
445        tokenIterator = this.tokens.listIterator();
446        completed = false;
447        currentToken = null;
448        moveToNextToken();
449    }
450
451    /**
452     * Get the position of the previous token.
453     * 
454     * @return the previous token's position; never null
455     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
456     * @throws NoSuchElementException if there is no previous token
457     */
458    public Position previousPosition() {
459        return previousToken().position();
460    }
461
462    /**
463     * Get the position of the next (or current) token.
464     * 
465     * @return the current token's position; never null
466     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
467     * @throws NoSuchElementException if there is no previous token
468     */
469    public Position nextPosition() {
470        return currentToken().position();
471    }
472
473    /**
474     * Convert the value of this token to an integer, return it, and move to the next token.
475     * 
476     * @return the current token's value, converted to an integer
477     * @throws ParsingException if there is no such token to consume, or if the token cannot be converted to an integer
478     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
479     */
480    public int consumeInteger() throws ParsingException, IllegalStateException {
481        if (completed) throwNoMoreContent();
482        // Get the value from the current token ...
483        String value = currentToken().value();
484        try {
485            int result = Integer.parseInt(value);
486            moveToNextToken();
487            return result;
488        } catch (NumberFormatException e) {
489            Position position = currentToken().position();
490            String msg = CommonI18n.expectingValidIntegerAtLineAndColumn.text(value, position.getLine(), position.getColumn());
491            throw new ParsingException(position, msg);
492        }
493    }
494
495    /**
496     * Convert the value of this token to a long, return it, and move to the next token.
497     * 
498     * @return the current token's value, converted to an integer
499     * @throws ParsingException if there is no such token to consume, or if the token cannot be converted to a long
500     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
501     */
502    public long consumeLong() throws ParsingException, IllegalStateException {
503        if (completed) throwNoMoreContent();
504        // Get the value from the current token ...
505        String value = currentToken().value();
506        try {
507            long result = Long.parseLong(value);
508            moveToNextToken();
509            return result;
510        } catch (NumberFormatException e) {
511            Position position = currentToken().position();
512            String msg = CommonI18n.expectingValidLongAtLineAndColumn.text(value, position.getLine(), position.getColumn());
513            throw new ParsingException(position, msg);
514        }
515    }
516
517    /**
518     * Convert the value of this token to an integer, return it, and move to the next token.
519     * 
520     * @return the current token's value, converted to an integer
521     * @throws ParsingException if there is no such token to consume, or if the token cannot be converted to an integer
522     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
523     */
524    public boolean consumeBoolean() throws ParsingException, IllegalStateException {
525        if (completed) throwNoMoreContent();
526        // Get the value from the current token ...
527        String value = currentToken().value();
528        try {
529            boolean result = Boolean.parseBoolean(value);
530            moveToNextToken();
531            return result;
532        } catch (NumberFormatException e) {
533            Position position = currentToken().position();
534            String msg = CommonI18n.expectingValidBooleanAtLineAndColumn.text(value, position.getLine(), position.getColumn());
535            throw new ParsingException(position, msg);
536        }
537    }
538
539    /**
540     * Return the value of this token and move to the next token.
541     * 
542     * @return the value of the current token
543     * @throws ParsingException if there is no such token to consume
544     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
545     */
546    public String consume() throws ParsingException, IllegalStateException {
547        if (completed) throwNoMoreContent();
548        // Get the value from the current token ...
549        String result = currentToken().value();
550        moveToNextToken();
551        return result;
552    }
553
554    protected void throwNoMoreContent() throws ParsingException {
555        String msg = CommonI18n.noMoreContent.text();
556        Position pos = tokens.isEmpty() ? new Position(-1, 1, 0) : tokens.get(tokens.size() - 1).position();
557        throw new ParsingException(pos, msg);
558    }
559
560    /**
561     * Attempt to consume this current token as long as it matches the expected value, or throw an exception if the token does not
562     * match.
563     * <p>
564     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
565     * </p>
566     * 
567     * @param expected the expected value of the current token
568     * @throws ParsingException if the current token doesn't match the supplied value
569     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
570     */
571    public void consume( String expected ) throws ParsingException, IllegalStateException {
572        if (completed) {
573            String msg = CommonI18n.noMoreContentButWasExpectingToken.text(expected);
574            throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg);
575        }
576        // Get the value from the current token ...
577        if (expected != ANY_VALUE && !currentToken().matches(expected)) {
578            String found = currentToken().value();
579            Position pos = currentToken().position();
580            String fragment = generateFragment();
581            String msg = CommonI18n.unexpectedToken.text(expected, found, pos.getLine(), pos.getColumn(), fragment);
582            throw new ParsingException(pos, msg);
583        }
584        moveToNextToken();
585    }
586
587    /**
588     * Attempt to consume this current token as long as it matches the expected character, or throw an exception if the token does
589     * not match.
590     * 
591     * @param expected the expected character of the current token
592     * @throws ParsingException if the current token doesn't match the supplied value
593     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
594     */
595    public void consume( char expected ) throws ParsingException, IllegalStateException {
596        if (completed) {
597            String msg = CommonI18n.noMoreContentButWasExpectingCharacter.text(expected);
598            throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg);
599        }
600        // Get the value from the current token ...
601        if (!currentToken().matches(expected)) {
602            String found = currentToken().value();
603            Position pos = currentToken().position();
604            String fragment = generateFragment();
605            String msg = CommonI18n.unexpectedCharacter.text(expected, found, pos.getLine(), pos.getColumn(), fragment);
606            throw new ParsingException(pos, msg);
607        }
608        moveToNextToken();
609    }
610
611    /**
612     * Attempt to consume this current token as long as it matches the expected character, or throw an exception if the token does
613     * not match.
614     * <p>
615     * The {@link #ANY_TYPE ANY_TYPE} constant can be used in the expected values as a wildcard.
616     * </p>
617     * 
618     * @param expectedType the expected token type of the current token
619     * @throws ParsingException if the current token doesn't match the supplied value
620     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
621     */
622    public void consume( int expectedType ) throws ParsingException, IllegalStateException {
623        if (completed) {
624            String msg = CommonI18n.noMoreContentButWasExpectingTokenType.text(expectedType);
625            throw new ParsingException(tokens.get(tokens.size() - 1).position(), msg);
626        }
627        // Get the value from the current token ...
628        if (expectedType != ANY_TYPE && currentToken().type() != expectedType) {
629            String found = currentToken().value();
630            Position pos = currentToken().position();
631            String fragment = generateFragment();
632            String msg = CommonI18n.unexpectedTokenType.text(expectedType, found, pos.getLine(), pos.getColumn(), fragment);
633            throw new ParsingException(pos, msg);
634        }
635        moveToNextToken();
636    }
637
638    /**
639     * Attempt to consume this current token as the next tokens as long as they match the expected values, or throw an exception
640     * if the token does not match.
641     * <p>
642     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
643     * </p>
644     * 
645     * @param expected the expected value of the current token
646     * @param expectedForNextTokens the expected values fo the following tokens
647     * @throws ParsingException if the current token doesn't match the supplied value
648     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
649     */
650    public void consume( String expected,
651                         String... expectedForNextTokens ) throws ParsingException, IllegalStateException {
652        consume(expected);
653        for (String nextExpected : expectedForNextTokens) {
654            consume(nextExpected);
655        }
656    }
657
658    /**
659     * Attempt to consume this current token as the next tokens as long as they match the expected values, or throw an exception
660     * if the token does not match.
661     * <p>
662     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
663     * </p>
664     * 
665     * @param nextTokens the expected values for the next tokens
666     * @throws ParsingException if the current token doesn't match the supplied value
667     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
668     */
669    public void consume( String[] nextTokens ) throws ParsingException, IllegalStateException {
670        for (String nextExpected : nextTokens) {
671            consume(nextExpected);
672        }
673    }
674
675    /**
676     * Attempt to consume this current token as the next tokens as long as they match the expected values, or throw an exception
677     * if the token does not match.
678     * <p>
679     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
680     * </p>
681     * 
682     * @param nextTokens the expected values for the next tokens
683     * @throws ParsingException if the current token doesn't match the supplied value
684     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
685     */
686    public void consume( Iterable<String> nextTokens ) throws ParsingException, IllegalStateException {
687        for (String nextExpected : nextTokens) {
688            consume(nextExpected);
689        }
690    }
691
692    /**
693     * Attempt to consume this current token if it matches the expected value, and return whether this method was indeed able to
694     * consume the token.
695     * <p>
696     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected value as a wildcard.
697     * </p>
698     * 
699     * @param expected the expected value of the current token token
700     * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was
701     *         not consumed
702     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
703     */
704    public boolean canConsume( String expected ) throws IllegalStateException {
705        if (!matches(expected)) return false;
706        moveToNextToken();
707        return true;
708    }
709
710    /**
711     * Attempt to consume this current token if it matches the expected value, and return whether this method was indeed able to
712     * consume the token.
713     * 
714     * @param expected the expected value of the current token token
715     * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was
716     *         not consumed
717     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
718     */
719    public boolean canConsume( char expected ) throws IllegalStateException {
720        if (!matches(expected)) return false;
721        moveToNextToken();
722        return true;
723    }
724
725    /**
726     * Attempt to consume this current token if it matches the expected token type, and return whether this method was indeed able
727     * to consume the token.
728     * <p>
729     * The {@link #ANY_TYPE ANY_TYPE} constant can be used in the expected type as a wildcard.
730     * </p>
731     * 
732     * @param expectedType the expected token type of the current token
733     * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was
734     *         not consumed
735     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
736     */
737    public boolean canConsume( int expectedType ) throws IllegalStateException {
738        if (!matches(expectedType)) return false;
739        moveToNextToken();
740        return true;
741    }
742
743    /**
744     * Attempt to consume this current token and the next tokens if and only if they match the expected values, and return whether
745     * this method was indeed able to consume all of the supplied tokens.
746     * <p>
747     * This is <i>not</i> the same as calling {@link #canConsume(String)} for each of the supplied arguments, since this method
748     * ensures that <i>all</i> of the supplied values can be consumed.
749     * </p>
750     * <p>
751     * This method <i>is</i> equivalent to calling the following:
752     * 
753     * <pre>
754     * 
755     * if (tokens.matches(currentExpected, expectedForNextTokens)) {
756     *     tokens.consume(currentExpected, expectedForNextTokens);
757     * }
758     * 
759     * </pre>
760     * 
761     * </p>
762     * <p>
763     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
764     * </p>
765     * 
766     * @param currentExpected the expected value of the current token
767     * @param expectedForNextTokens the expected values fo the following tokens
768     * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was
769     *         not consumed
770     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
771     */
772    public boolean canConsume( String currentExpected,
773                               String... expectedForNextTokens ) throws IllegalStateException {
774        if (completed) return false;
775        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
776        if (!iter.hasNext()) return false;
777        Token token = iter.next();
778        if (currentExpected != ANY_VALUE && !token.matches(currentExpected)) return false;
779        for (String nextExpected : expectedForNextTokens) {
780            if (!iter.hasNext()) return false;
781            token = iter.next();
782            if (nextExpected == ANY_VALUE) continue;
783            if (!token.matches(nextExpected)) return false;
784        }
785        this.tokenIterator = iter;
786        this.currentToken = tokenIterator.hasNext() ? tokenIterator.next() : null;
787        this.completed = this.currentToken == null;
788        return true;
789    }
790
791    /**
792     * Attempt to consume this current token and the next tokens if and only if they match the expected values, and return whether
793     * this method was indeed able to consume all of the supplied tokens.
794     * <p>
795     * This is <i>not</i> the same as calling {@link #canConsume(String)} for each of the supplied arguments, since this method
796     * ensures that <i>all</i> of the supplied values can be consumed.
797     * </p>
798     * <p>
799     * This method <i>is</i> equivalent to calling the following:
800     * 
801     * <pre>
802     * 
803     * if (tokens.matches(currentExpected, expectedForNextTokens)) {
804     *     tokens.consume(currentExpected, expectedForNextTokens);
805     * }
806     * 
807     * </pre>
808     * 
809     * </p>
810     * <p>
811     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
812     * </p>
813     * 
814     * @param nextTokens the expected values of the next tokens
815     * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was
816     *         not consumed
817     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
818     */
819    public boolean canConsume( String[] nextTokens ) throws IllegalStateException {
820        if (completed) return false;
821        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
822        Token token = null;
823        for (String nextExpected : nextTokens) {
824            if (!iter.hasNext()) return false;
825            token = iter.next();
826            if (nextExpected == ANY_VALUE) continue;
827            if (!token.matches(nextExpected)) return false;
828        }
829        this.tokenIterator = iter;
830        this.currentToken = tokenIterator.hasNext() ? tokenIterator.next() : null;
831        this.completed = this.currentToken == null;
832        return true;
833    }
834
835    /**
836     * Attempt to consume this current token and the next tokens if and only if they match the expected values, and return whether
837     * this method was indeed able to consume all of the supplied tokens.
838     * <p>
839     * This is <i>not</i> the same as calling {@link #canConsume(String)} for each of the supplied arguments, since this method
840     * ensures that <i>all</i> of the supplied values can be consumed.
841     * </p>
842     * <p>
843     * This method <i>is</i> equivalent to calling the following:
844     * 
845     * <pre>
846     * 
847     * if (tokens.matches(currentExpected, expectedForNextTokens)) {
848     *     tokens.consume(currentExpected, expectedForNextTokens);
849     * }
850     * 
851     * </pre>
852     * 
853     * </p>
854     * <p>
855     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
856     * </p>
857     * 
858     * @param nextTokens the expected values of the next tokens
859     * @return true if the current token did match and was consumed, or false if the current token did not match and therefore was
860     *         not consumed
861     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
862     */
863    public boolean canConsume( Iterable<String> nextTokens ) throws IllegalStateException {
864        if (completed) return false;
865        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
866        Token token = null;
867        for (String nextExpected : nextTokens) {
868            if (!iter.hasNext()) return false;
869            token = iter.next();
870            if (nextExpected == ANY_VALUE) continue;
871            if (!token.matches(nextExpected)) return false;
872        }
873        this.tokenIterator = iter;
874        this.currentToken = tokenIterator.hasNext() ? tokenIterator.next() : null;
875        this.completed = this.currentToken == null;
876        return true;
877    }
878
879    /**
880     * Attempt to consume the next token if it matches one of the supplied values.
881     * 
882     * @param firstOption the first option for the value of the current token
883     * @param additionalOptions the additional options for the value of the current token
884     * @return true if the current token's value did match one of the suplied options, or false otherwise
885     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
886     */
887    public boolean canConsumeAnyOf( String firstOption,
888                                    String... additionalOptions ) throws IllegalStateException {
889        if (completed) return false;
890        if (canConsume(firstOption)) return true;
891        for (String nextOption : additionalOptions) {
892            if (canConsume(nextOption)) return true;
893        }
894        return false;
895    }
896
897    /**
898     * Attempt to consume the next token if it matches one of the supplied values.
899     * 
900     * @param options the options for the value of the current token
901     * @return true if the current token's value did match one of the suplied options, or false otherwise
902     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
903     */
904    public boolean canConsumeAnyOf( String[] options ) throws IllegalStateException {
905        if (completed) return false;
906        for (String option : options) {
907            if (canConsume(option)) return true;
908        }
909        return false;
910    }
911
912    /**
913     * Attempt to consume the next token if it matches one of the supplied values.
914     * 
915     * @param options the options for the value of the current token
916     * @return true if the current token's value did match one of the suplied options, or false otherwise
917     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
918     */
919    public boolean canConsumeAnyOf( Iterable<String> options ) throws IllegalStateException {
920        if (completed) return false;
921        for (String option : options) {
922            if (canConsume(option)) return true;
923        }
924        return false;
925    }
926
927    /**
928     * Attempt to consume the next token if it matches one of the supplied types.
929     * 
930     * @param firstTypeOption the first option for the type of the current token
931     * @param additionalTypeOptions the additional options for the type of the current token
932     * @return true if the current token's type matched one of the supplied options, or false otherwise
933     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
934     */
935    public boolean canConsumeAnyOf( int firstTypeOption,
936                                    int... additionalTypeOptions ) throws IllegalStateException {
937        if (completed) return false;
938        if (canConsume(firstTypeOption)) return true;
939        for (int nextTypeOption : additionalTypeOptions) {
940            if (canConsume(nextTypeOption)) return true;
941        }
942        return false;
943    }
944
945    /**
946     * Attempt to consume the next token if it matches one of the supplied types.
947     * 
948     * @param typeOptions the options for the type of the current token
949     * @return true if the current token's type matched one of the supplied options, or false otherwise
950     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
951     */
952    public boolean canConsumeAnyOf( int[] typeOptions ) throws IllegalStateException {
953        if (completed) return false;
954        for (int nextTypeOption : typeOptions) {
955            if (canConsume(nextTypeOption)) return true;
956        }
957        return false;
958    }
959
960    /**
961     * Determine if the current token matches the expected value.
962     * <p>
963     * The {@link #ANY_VALUE ANY_VALUE} constant can be used as a wildcard.
964     * </p>
965     * 
966     * @param expected the expected value of the current token token
967     * @return true if the current token did match, or false if the current token did not match
968     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
969     */
970    public boolean matches( String expected ) throws IllegalStateException {
971        return !completed && (expected == ANY_VALUE || currentToken().matches(expected));
972    }
973
974    /**
975     * Determine if the current token matches the expected value.
976     * 
977     * @param expected the expected value of the current token token
978     * @return true if the current token did match, or false if the current token did not match
979     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
980     */
981    public boolean matches( char expected ) throws IllegalStateException {
982        return !completed && currentToken().matches(expected);
983    }
984
985    /**
986     * Determine if the current token matches the expected token type.
987     * 
988     * @param expectedType the expected token type of the current token
989     * @return true if the current token did match, or false if the current token did not match
990     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
991     */
992    public boolean matches( int expectedType ) throws IllegalStateException {
993        return !completed && currentToken().matches(expectedType);
994    }
995
996    /**
997     * Determine if the next few tokens match the expected values.
998     * <p>
999     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
1000     * </p>
1001     * 
1002     * @param currentExpected the expected value of the current token
1003     * @param expectedForNextTokens the expected values for the following tokens
1004     * @return true if the tokens did match, or false otherwise
1005     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1006     */
1007    public boolean matches( String currentExpected,
1008                            String... expectedForNextTokens ) throws IllegalStateException {
1009        if (completed) return false;
1010        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
1011        if (!iter.hasNext()) return false;
1012        Token token = iter.next();
1013        if (currentExpected != ANY_VALUE && !token.matches(currentExpected)) return false;
1014        for (String nextExpected : expectedForNextTokens) {
1015            if (!iter.hasNext()) return false;
1016            token = iter.next();
1017            if (nextExpected == ANY_VALUE) continue;
1018            if (!token.matches(nextExpected)) return false;
1019        }
1020        return true;
1021    }
1022
1023    /**
1024     * Determine if the next few tokens match the expected values.
1025     * <p>
1026     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
1027     * </p>
1028     * 
1029     * @param nextTokens the expected value of the next tokens
1030     * @return true if the tokens did match, or false otherwise
1031     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1032     */
1033    public boolean matches( String[] nextTokens ) throws IllegalStateException {
1034        if (completed) return false;
1035        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
1036        Token token = null;
1037        for (String nextExpected : nextTokens) {
1038            if (!iter.hasNext()) return false;
1039            token = iter.next();
1040            if (nextExpected == ANY_VALUE) continue;
1041            if (!token.matches(nextExpected)) return false;
1042        }
1043        return true;
1044    }
1045
1046    /**
1047     * Determine if the next few tokens match the expected values.
1048     * <p>
1049     * The {@link #ANY_VALUE ANY_VALUE} constant can be used in the expected values as a wildcard.
1050     * </p>
1051     * 
1052     * @param nextTokens the expected value of the next tokens
1053     * @return true if the tokens did match, or false otherwise
1054     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1055     */
1056    public boolean matches( Iterable<String> nextTokens ) throws IllegalStateException {
1057        if (completed) return false;
1058        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
1059        Token token = null;
1060        for (String nextExpected : nextTokens) {
1061            if (!iter.hasNext()) return false;
1062            token = iter.next();
1063            if (nextExpected == ANY_VALUE) continue;
1064            if (!token.matches(nextExpected)) return false;
1065        }
1066        return true;
1067    }
1068
1069    /**
1070     * Determine if the next few tokens have the supplied types.
1071     * <p>
1072     * The {@link #ANY_TYPE ANY_TYPE} constant can be used in the expected values as a wildcard.
1073     * </p>
1074     * 
1075     * @param currentExpectedType the expected type of the current token
1076     * @param expectedTypeForNextTokens the expected type for the following tokens
1077     * @return true if the tokens did match, or false otherwise
1078     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1079     */
1080    public boolean matches( int currentExpectedType,
1081                            int... expectedTypeForNextTokens ) throws IllegalStateException {
1082        if (completed) return false;
1083        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
1084        if (!iter.hasNext()) return false;
1085        Token token = iter.next();
1086        if (currentExpectedType != ANY_TYPE && currentToken().type() != currentExpectedType) return false;
1087        for (int nextExpectedType : expectedTypeForNextTokens) {
1088            if (!iter.hasNext()) return false;
1089            token = iter.next();
1090            if (nextExpectedType == ANY_TYPE) continue;
1091            if (token.type() != nextExpectedType) return false;
1092        }
1093        return true;
1094    }
1095
1096    /**
1097     * Determine if the next few tokens have the supplied types.
1098     * <p>
1099     * The {@link #ANY_TYPE ANY_TYPE} constant can be used in the expected values as a wildcard.
1100     * </p>
1101     * 
1102     * @param typesForNextTokens the expected type for each of the next tokens
1103     * @return true if the tokens did match, or false otherwise
1104     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1105     */
1106    public boolean matches( int[] typesForNextTokens ) throws IllegalStateException {
1107        if (completed) return false;
1108        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
1109        Token token = null;
1110        for (int nextExpectedType : typesForNextTokens) {
1111            if (!iter.hasNext()) return false;
1112            token = iter.next();
1113            if (nextExpectedType == ANY_TYPE) continue;
1114            if (!token.matches(nextExpectedType)) return false;
1115        }
1116        return true;
1117    }
1118
1119    /**
1120     * Determine if the next token matches one of the supplied values.
1121     * 
1122     * @param firstOption the first option for the value of the current token
1123     * @param additionalOptions the additional options for the value of the current token
1124     * @return true if the current token's value did match one of the suplied options, or false otherwise
1125     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1126     */
1127    public boolean matchesAnyOf( String firstOption,
1128                                 String... additionalOptions ) throws IllegalStateException {
1129        if (completed) return false;
1130        Token current = currentToken();
1131        if (current.matches(firstOption)) return true;
1132        for (String nextOption : additionalOptions) {
1133            if (current.matches(nextOption)) return true;
1134        }
1135        return false;
1136    }
1137
1138    /**
1139     * Determine if the next token matches one of the supplied values.
1140     * 
1141     * @param options the options for the value of the current token
1142     * @return true if the current token's value did match one of the suplied options, or false otherwise
1143     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1144     */
1145    public boolean matchesAnyOf( String[] options ) throws IllegalStateException {
1146        if (completed) return false;
1147        Token current = currentToken();
1148        for (String option : options) {
1149            if (current.matches(option)) return true;
1150        }
1151        return false;
1152    }
1153
1154    /**
1155     * Determine if the next token matches one of the supplied values.
1156     * 
1157     * @param options the options for the value of the current token
1158     * @return true if the current token's value did match one of the suplied options, or false otherwise
1159     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1160     */
1161    public boolean matchesAnyOf( Iterable<String> options ) throws IllegalStateException {
1162        if (completed) return false;
1163        Token current = currentToken();
1164        for (String option : options) {
1165            if (current.matches(option)) return true;
1166        }
1167        return false;
1168    }
1169
1170    /**
1171     * Determine if the next token have one of the supplied types.
1172     * 
1173     * @param firstTypeOption the first option for the type of the current token
1174     * @param additionalTypeOptions the additional options for the type of the current token
1175     * @return true if the current token's type matched one of the supplied options, or false otherwise
1176     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1177     */
1178    public boolean matchesAnyOf( int firstTypeOption,
1179                                 int... additionalTypeOptions ) throws IllegalStateException {
1180        if (completed) return false;
1181        int currentType = currentToken().type();
1182        if (currentType == firstTypeOption) return true;
1183        for (int nextTypeOption : additionalTypeOptions) {
1184            if (currentType == nextTypeOption) return true;
1185        }
1186        return false;
1187    }
1188
1189    /**
1190     * Determine if the next token have one of the supplied types.
1191     * 
1192     * @param typeOptions the options for the type of the current token
1193     * @return true if the current token's type matched one of the supplied options, or false otherwise
1194     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1195     */
1196    public boolean matchesAnyOf( int[] typeOptions ) throws IllegalStateException {
1197        if (completed) return false;
1198        int currentType = currentToken().type();
1199        for (int nextTypeOption : typeOptions) {
1200            if (currentType == nextTypeOption) return true;
1201        }
1202        return false;
1203    }
1204
1205    /**
1206     * Determine if this stream has another token to be consumed.
1207     * 
1208     * @return true if there is another token ready for consumption, or false otherwise
1209     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1210     */
1211    public boolean hasNext() {
1212        if (tokenIterator == null) {
1213            throw new IllegalStateException(CommonI18n.startMethodMustBeCalledBeforeNext.text());
1214        }
1215        return !completed;
1216    }
1217
1218    @Override
1219    public String toString() {
1220        ListIterator<Token> iter = tokens.listIterator(tokenIterator.previousIndex());
1221        StringBuilder sb = new StringBuilder();
1222        if (iter.hasNext()) {
1223            sb.append(iter.next());
1224            int count = 1;
1225            while (iter.hasNext()) {
1226                if (count > 20) {
1227                    sb.append(" ...");
1228                    break;
1229                }
1230                sb.append("  ");
1231                ++count;
1232                sb.append(iter.next());
1233            }
1234        }
1235        return sb.toString();
1236    }
1237
1238    private void moveToNextToken() {
1239        // And move the currentToken to the next token ...
1240        if (!tokenIterator.hasNext()) {
1241            completed = true;
1242            currentToken = null;
1243        } else {
1244            currentToken = tokenIterator.next();
1245        }
1246    }
1247
1248    /**
1249     * Get the current token.
1250     * 
1251     * @return the current token; never null
1252     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1253     * @throws NoSuchElementException if there are no more tokens
1254     */
1255    final Token currentToken() throws IllegalStateException, NoSuchElementException {
1256        if (currentToken == null) {
1257            if (completed) {
1258                throw new NoSuchElementException(CommonI18n.noMoreContent.text());
1259            }
1260            throw new IllegalStateException(CommonI18n.startMethodMustBeCalledBeforeConsumingOrMatching.text());
1261        }
1262        assert currentToken != null;
1263        return currentToken;
1264    }
1265
1266    /**
1267     * Gets the content string starting at the first position (inclusive) and continuing up to the end position (exclusive).
1268     * 
1269     * @param starting the position marking the beginning of the desired content string.
1270     * @param end the position located directly after the returned content string; can be null, which means end of content
1271     * @return the content string; never null
1272     */
1273    public String getContentBetween( Position starting,
1274                                     Position end ) {
1275        CheckArg.isNotNull(starting, "starting");
1276
1277        int startIndex = starting.getIndexInContent();
1278        int endIndex = inputString.length();
1279        if (end != null) {
1280            endIndex = end.getIndexInContent();
1281        }
1282
1283        if (startIndex >= endIndex) {
1284            throw new IllegalArgumentException(CommonI18n.endPositionMustBeGreaterThanStartingPosition.text(startIndex, endIndex));
1285        }
1286
1287        return inputString.substring(startIndex, endIndex);
1288    }
1289
1290    /**
1291     * Get the previous token. This does not modify the state.
1292     * 
1293     * @return the previous token; never null
1294     * @throws IllegalStateException if this method was called before the stream was {@link #start() started}
1295     * @throws NoSuchElementException if there is no previous token
1296     */
1297    final Token previousToken() throws IllegalStateException, NoSuchElementException {
1298        if (currentToken == null) {
1299            if (completed) {
1300                if (tokens.isEmpty()) {
1301                    throw new NoSuchElementException(CommonI18n.noMoreContent.text());
1302                }
1303                return tokens.get(tokens.size() - 1);
1304            }
1305            throw new IllegalStateException(CommonI18n.startMethodMustBeCalledBeforeConsumingOrMatching.text());
1306        }
1307        if (tokenIterator.previousIndex() == 0) {
1308            throw new NoSuchElementException(CommonI18n.noMoreContent.text());
1309        }
1310        return tokens.get(tokenIterator.previousIndex() - 1);
1311    }
1312
1313    String generateFragment() {
1314        // Find the current position ...
1315        assert currentToken != null;
1316        int startIndex = currentToken.startIndex();
1317        return generateFragment(inputString, startIndex, 20, " ===>> ");
1318    }
1319
1320    /**
1321     * Utility method to generate a highlighted fragment of a particular point in the stream.
1322     * 
1323     * @param content the content from which the fragment should be taken; may not be null
1324     * @param indexOfProblem the index of the problem point that should be highlighted; must be a valid index in the content
1325     * @param charactersToIncludeBeforeAndAfter the maximum number of characters before and after the problem point to include in
1326     *        the fragment
1327     * @param highlightText the text that should be included in the fragment at the problem point to highlight the location, or an
1328     *        empty string if there should be no highlighting
1329     * @return the highlighted fragment; never null
1330     */
1331    static String generateFragment( String content,
1332                                    int indexOfProblem,
1333                                    int charactersToIncludeBeforeAndAfter,
1334                                    String highlightText ) {
1335        assert content != null;
1336        assert indexOfProblem < content.length();
1337        // Find the substring that immediately precedes the current position ...
1338        int beforeStart = Math.max(0, indexOfProblem - charactersToIncludeBeforeAndAfter);
1339        String before = content.substring(beforeStart, indexOfProblem);
1340
1341        // Find the substring that immediately follows the current position ...
1342        int afterEnd = Math.min(indexOfProblem + charactersToIncludeBeforeAndAfter, content.length());
1343        String after = content.substring(indexOfProblem, afterEnd);
1344
1345        return before + (highlightText != null ? highlightText : "") + after;
1346    }
1347
1348    /**
1349     * Interface for a Tokenizer component responsible for processing the characters in a {@link CharacterStream} and constructing
1350     * the appropriate {@link Token} objects.
1351     */
1352    public static interface Tokenizer {
1353        /**
1354         * Process the supplied characters and construct the appropriate {@link Token} objects.
1355         * 
1356         * @param input the character input stream; never null
1357         * @param tokens the factory for {@link Token} objects, which records the order in which the tokens are created
1358         * @throws ParsingException if there is an error while processing the character stream (e.g., a quote is not closed, etc.)
1359         */
1360        void tokenize( CharacterStream input,
1361                       Tokens tokens ) throws ParsingException;
1362    }
1363
1364    /**
1365     * Interface used by a {@link Tokenizer} to iterate through the characters in the content input to the {@link TokenStream}.
1366     */
1367    public static interface CharacterStream {
1368
1369        /**
1370         * Determine if there is another character available in this stream.
1371         * 
1372         * @return true if there is another character (and {@link #next()} can be called), or false otherwise
1373         */
1374        boolean hasNext();
1375
1376        /**
1377         * Obtain the next character value, and advance the stream.
1378         * 
1379         * @return the next character
1380         * @throws NoSuchElementException if there is no {@link #hasNext() next character}
1381         */
1382        char next();
1383
1384        /**
1385         * Get the index for the last character returned from {@link #next()}.
1386         * 
1387         * @return the index of the last character returned
1388         */
1389        int index();
1390
1391        /**
1392         * Get the position for the last character returned from {@link #next()}.
1393         * 
1394         * @param startIndex
1395         * @return the position of the last character returned; never null
1396         */
1397        Position position( int startIndex );
1398
1399        /**
1400         * Determine if the next character on the sream is a {@link Character#isWhitespace(char) whitespace character}. This
1401         * method does <i>not</i> advance the stream.
1402         * 
1403         * @return true if there is a {@link #next() next} character and it is a whitespace character, or false otherwise
1404         */
1405        boolean isNextWhitespace();
1406
1407        /**
1408         * Determine if the next character on the sream is a {@link Character#isLetterOrDigit(char) letter or digit}. This method
1409         * does <i>not</i> advance the stream.
1410         * 
1411         * @return true if there is a {@link #next() next} character and it is a letter or digit, or false otherwise
1412         */
1413        boolean isNextLetterOrDigit();
1414
1415        /**
1416         * Determine if the next character on the sream is a {@link XmlCharacters#isValid(int) valid XML character}. This method
1417         * does <i>not</i> advance the stream.
1418         * 
1419         * @return true if there is a {@link #next() next} character and it is a valid XML character, or false otherwise
1420         */
1421        boolean isNextValidXmlCharacter();
1422
1423        /**
1424         * Determine if the next character on the sream is a {@link XmlCharacters#isValidName(int) valid XML NCName character}.
1425         * This method does <i>not</i> advance the stream.
1426         * 
1427         * @return true if there is a {@link #next() next} character and it is a valid XML Name character, or false otherwise
1428         */
1429        boolean isNextValidXmlNameCharacter();
1430
1431        /**
1432         * Determine if the next character on the sream is a {@link XmlCharacters#isValidNcName(int) valid XML NCName character}.
1433         * This method does <i>not</i> advance the stream.
1434         * 
1435         * @return true if there is a {@link #next() next} character and it is a valid XML NCName character, or false otherwise
1436         */
1437        boolean isNextValidXmlNcNameCharacter();
1438
1439        /**
1440         * Determine if the next character on the sream is the supplied value. This method does <i>not</i> advance the stream.
1441         * 
1442         * @param c the character value to compare to the next character on the stream
1443         * @return true if there is a {@link #next() next} character and it is the supplied character, or false otherwise
1444         */
1445        boolean isNext( char c );
1446
1447        /**
1448         * Determine if the next two characters on the stream match the supplied values. This method does <i>not</i> advance the
1449         * stream.
1450         * 
1451         * @param nextChar the character value to compare to the next character on the stream
1452         * @param followingChar the character value to compare to the character immediately after the next character on the stream
1453         * @return true if there are at least two characters left on the stream and the first matches <code>nextChar</code> and
1454         *         the second matches <code>followingChar</code>
1455         */
1456        boolean isNext( char nextChar,
1457                        char followingChar );
1458
1459        /**
1460         * Determine if the next three characters on the sream match the supplied values. This method does <i>not</i> advance the
1461         * stream.
1462         * 
1463         * @param nextChar the character value to compare to the next character on the stream
1464         * @param nextChar2 the character value to compare to the second character on the stream
1465         * @param nextChar3 the character value to compare to the second character on the stream
1466         * @return true if there are at least two characters left on the stream and the first matches <code>nextChar</code> and
1467         *         the second matches <code>followingChar</code>
1468         */
1469        boolean isNext( char nextChar,
1470                        char nextChar2,
1471                        char nextChar3 );
1472
1473        /**
1474         * Determine if the next character on the stream matches one of the supplied characters. This method does <i>not</i>
1475         * advance the stream.
1476         * 
1477         * @param characters the characters to match
1478         * @return true if there is a {@link #next() next} character and it does match one of the supplied characters, or false
1479         *         otherwise
1480         */
1481        boolean isNextAnyOf( char[] characters );
1482
1483        /**
1484         * Determine if the next character on the stream matches one of the supplied characters. This method does <i>not</i>
1485         * advance the stream.
1486         * 
1487         * @param characters the characters to match
1488         * @return true if there is a {@link #next() next} character and it does match one of the supplied characters, or false
1489         *         otherwise
1490         */
1491        boolean isNextAnyOf( String characters );
1492
1493    }
1494
1495    /**
1496     * A factory for Token objects, used by a {@link Tokenizer} to create tokens in the correct order.
1497     */
1498    public static interface Tokens {
1499        /**
1500         * Create a single-character token at the supplied index in the character stream. The token type is set to 0, meaning this
1501         * is equivalent to calling <code>addToken(index,index+1)</code> or <code>addToken(index,index+1,0)</code>.
1502         * 
1503         * @param position the position (line and column numbers) of this new token; may not be null
1504         * @param index the index of the character to appear in the token; must be a valid index in the stream
1505         */
1506        void addToken( Position position,
1507                       int index );
1508
1509        /**
1510         * Create a single- or multi-character token with the characters in the range given by the starting and ending index in
1511         * the character stream. The character at the ending index is <i>not</i> included in the token (as this is standard
1512         * practice when using 0-based indexes). The token type is set to 0, meaning this is equivalent to calling <code>
1513         * addToken(startIndex,endIndex,0)</code> .
1514         * 
1515         * @param position the position (line and column numbers) of this new token; may not be null
1516         * @param startIndex the index of the first character to appear in the token; must be a valid index in the stream
1517         * @param endIndex the index just past the last character to appear in the token; must be a valid index in the stream
1518         */
1519        void addToken( Position position,
1520                       int startIndex,
1521                       int endIndex );
1522
1523        /**
1524         * Create a single- or multi-character token with the supplied type and with the characters in the range given by the
1525         * starting and ending index in the character stream. The character at the ending index is <i>not</i> included in the
1526         * token (as this is standard practice when using 0-based indexes).
1527         * 
1528         * @param position the position (line and column numbers) of this new token; may not be null
1529         * @param startIndex the index of the first character to appear in the token; must be a valid index in the stream
1530         * @param endIndex the index just past the last character to appear in the token; must be a valid index in the stream
1531         * @param type the type of the token
1532         */
1533        void addToken( Position position,
1534                       int startIndex,
1535                       int endIndex,
1536                       int type );
1537    }
1538
1539    /**
1540     * The interface defining a token, which references the characters in the actual input character stream.
1541     * 
1542     * @see CaseSensitiveTokenFactory
1543     * @see CaseInsensitiveTokenFactory
1544     */
1545    @Immutable
1546    public interface Token {
1547        /**
1548         * Get the value of the token, in actual case.
1549         * 
1550         * @return the value
1551         */
1552        String value();
1553
1554        /**
1555         * Determine if the token matches the supplied string.
1556         * 
1557         * @param expected the expected value
1558         * @return true if the token's value matches the supplied value, or false otherwise
1559         */
1560        boolean matches( String expected );
1561
1562        /**
1563         * Determine if the token matches the supplied character.
1564         * 
1565         * @param expected the expected character value
1566         * @return true if the token's value matches the supplied character value, or false otherwise
1567         */
1568        boolean matches( char expected );
1569
1570        /**
1571         * Determine if the token matches the supplied type.
1572         * 
1573         * @param expectedType the expected integer type
1574         * @return true if the token's value matches the supplied integer type, or false otherwise
1575         */
1576        boolean matches( int expectedType );
1577
1578        /**
1579         * Get the type of the token.
1580         * 
1581         * @return the token's type
1582         */
1583        int type();
1584
1585        /**
1586         * Get the index in the raw stream for the first character in the token.
1587         * 
1588         * @return the starting index of the token
1589         */
1590        int startIndex();
1591
1592        /**
1593         * Get the index in the raw stream past the last character in the token.
1594         * 
1595         * @return the ending index of the token, which is past the last character
1596         */
1597        int endIndex();
1598
1599        /**
1600         * Get the length of the token, which is equivalent to <code>endIndex() - startIndex()</code>.
1601         * 
1602         * @return the length
1603         */
1604        int length();
1605
1606        /**
1607         * Get the position of this token, which includes the line number and column number of the first character in the token.
1608         * 
1609         * @return the position; never null
1610         */
1611        Position position();
1612
1613        /**
1614         * Bitmask ORed with existing type value.
1615         * 
1616         * @param typeMask
1617         * @return copy of Token with new type
1618         */
1619        Token withType( int typeMask );
1620    }
1621
1622    /**
1623     * An immutable {@link Token} that implements matching using case-sensitive logic.
1624     */
1625    @Immutable
1626    protected class CaseSensitiveToken implements Token {
1627        private final int startIndex;
1628        private final int endIndex;
1629        private final int type;
1630        private final Position position;
1631
1632        public CaseSensitiveToken( int startIndex,
1633                                   int endIndex,
1634                                   int type,
1635                                   Position position ) {
1636            this.startIndex = startIndex;
1637            this.endIndex = endIndex;
1638            this.type = type;
1639            this.position = position;
1640        }
1641
1642        @Override
1643        public Token withType( int typeMask ) {
1644            int type = this.type | typeMask;
1645            return new CaseSensitiveToken(startIndex, endIndex, type, position);
1646        }
1647
1648        @Override
1649        public final int type() {
1650            return type;
1651        }
1652
1653        @Override
1654        public final int startIndex() {
1655            return startIndex;
1656        }
1657
1658        @Override
1659        public final int endIndex() {
1660            return endIndex;
1661        }
1662
1663        @Override
1664        public final int length() {
1665            return endIndex - startIndex;
1666        }
1667
1668        @Override
1669        public final boolean matches( char expected ) {
1670            return length() == 1 && matchString().charAt(startIndex) == expected;
1671        }
1672
1673        @Override
1674        public boolean matches(String expected) {
1675            return matchString().substring(startIndex, endIndex).equals(expected);
1676        }
1677
1678        @Override
1679        public final boolean matches( int expectedType ) {
1680            return expectedType == ANY_TYPE || (currentToken().type() & expectedType) == expectedType;
1681        }
1682
1683        @Override
1684        public final String value() {
1685            return inputString.substring(startIndex, endIndex);
1686        }
1687
1688        @Override
1689        public Position position() {
1690            return position;
1691        }
1692
1693        protected String matchString() {
1694            return inputString;
1695        }
1696
1697        @Override
1698        public String toString() {
1699            return value();
1700        }
1701    }
1702
1703    @Immutable
1704    protected class CaseInsensitiveToken extends CaseSensitiveToken {
1705        public CaseInsensitiveToken( int startIndex,
1706                                     int endIndex,
1707                                     int type,
1708                                     Position position ) {
1709            super(startIndex, endIndex, type, position);
1710        }
1711
1712        @Override
1713        public boolean matches( String expected ) {
1714            return matchString().substring(startIndex(), endIndex()).toUpperCase(Locale.ROOT).equals(expected);
1715        }
1716
1717        @Override
1718        public Token withType( int typeMask ) {
1719            int type = this.type() | typeMask;
1720            return new CaseInsensitiveToken(startIndex(), endIndex(), type, position());
1721        }
1722    }
1723
1724    protected abstract class TokenFactory implements Tokens {
1725        protected final List<Token> tokens = new ArrayList<Token>();
1726
1727        @Override
1728        public void addToken( Position position,
1729                              int index ) {
1730            addToken(position, index, index + 1, 0);
1731        }
1732
1733        @Override
1734        public final void addToken( Position position,
1735                                    int startIndex,
1736                                    int endIndex ) {
1737            addToken(position, startIndex, endIndex, 0);
1738        }
1739
1740        public List<Token> getTokens() {
1741            return tokens;
1742        }
1743    }
1744
1745    public class CaseSensitiveTokenFactory extends TokenFactory {
1746        @Override
1747        public void addToken( Position position,
1748                              int startIndex,
1749                              int endIndex,
1750                              int type ) {
1751            tokens.add(new CaseSensitiveToken(startIndex, endIndex, type, position));
1752        }
1753    }
1754
1755    public class CaseInsensitiveTokenFactory extends TokenFactory {
1756        @Override
1757        public void addToken( Position position,
1758                              int startIndex,
1759                              int endIndex,
1760                              int type ) {
1761            tokens.add(new CaseInsensitiveToken(startIndex, endIndex, type, position));
1762        }
1763    }
1764
1765    /**
1766     * An implementation of {@link CharacterStream} that works with a single character array.
1767     */
1768    public static final class CharacterArrayStream implements CharacterStream {
1769        private final char[] content;
1770        private int lastIndex = -1;
1771        private final int maxIndex;
1772        private int lineNumber = 1;
1773        private int columnNumber = 0;
1774        private boolean nextCharMayBeLineFeed;
1775
1776        public CharacterArrayStream( char[] content ) {
1777            this.content = content;
1778            this.maxIndex = content.length - 1;
1779        }
1780
1781        @Override
1782        public boolean hasNext() {
1783            return lastIndex < maxIndex;
1784        }
1785
1786        @Override
1787        public int index() {
1788            return lastIndex;
1789        }
1790
1791        @Override
1792        public Position position( int startIndex ) {
1793            return new Position(startIndex, lineNumber, columnNumber);
1794        }
1795
1796        @Override
1797        public char next() {
1798            if (lastIndex >= maxIndex) {
1799                throw new NoSuchElementException();
1800            }
1801            char result = content[++lastIndex];
1802            ++columnNumber;
1803            if (result == '\r') {
1804                nextCharMayBeLineFeed = true;
1805                ++lineNumber;
1806                columnNumber = 0;
1807            } else if (result == '\n') {
1808                if (!nextCharMayBeLineFeed) ++lineNumber;
1809                columnNumber = 0;
1810            } else if (nextCharMayBeLineFeed) {
1811                nextCharMayBeLineFeed = false;
1812            }
1813            return result;
1814        }
1815
1816        @Override
1817        public boolean isNext( char c ) {
1818            int nextIndex = lastIndex + 1;
1819            return nextIndex <= maxIndex && content[nextIndex] == c;
1820        }
1821
1822        @Override
1823        public boolean isNext( char nextChar1,
1824                               char nextChar2 ) {
1825            int nextIndex1 = lastIndex + 1;
1826            int nextIndex2 = lastIndex + 2;
1827            return nextIndex2 <= maxIndex && content[nextIndex1] == nextChar1 && content[nextIndex2] == nextChar2;
1828        }
1829
1830        @Override
1831        public boolean isNext( char nextChar1,
1832                               char nextChar2,
1833                               char nextChar3 ) {
1834            int nextIndex1 = lastIndex + 1;
1835            int nextIndex2 = lastIndex + 2;
1836            int nextIndex3 = lastIndex + 3;
1837            return nextIndex3 <= maxIndex && content[nextIndex1] == nextChar1 && content[nextIndex2] == nextChar2
1838                   && content[nextIndex3] == nextChar3;
1839        }
1840
1841        @Override
1842        public boolean isNextAnyOf( char[] characters ) {
1843            int nextIndex = lastIndex + 1;
1844            if (nextIndex <= maxIndex) {
1845                char nextChar = content[lastIndex + 1];
1846                for (char c : characters) {
1847                    if (c == nextChar) return true;
1848                }
1849            }
1850            return false;
1851        }
1852
1853        @Override
1854        public boolean isNextAnyOf( String characters ) {
1855            int nextIndex = lastIndex + 1;
1856            if (nextIndex <= maxIndex) {
1857                char nextChar = content[lastIndex + 1];
1858                if (characters.indexOf(nextChar) != -1) return true;
1859            }
1860            return false;
1861        }
1862
1863        @Override
1864        public boolean isNextWhitespace() {
1865            int nextIndex = lastIndex + 1;
1866            return nextIndex <= maxIndex && Character.isWhitespace(content[nextIndex]);
1867        }
1868
1869        @Override
1870        public boolean isNextLetterOrDigit() {
1871            int nextIndex = lastIndex + 1;
1872            return nextIndex <= maxIndex && Character.isLetterOrDigit(content[nextIndex]);
1873        }
1874
1875        @Override
1876        public boolean isNextValidXmlCharacter() {
1877            int nextIndex = lastIndex + 1;
1878            return nextIndex <= maxIndex && XmlCharacters.isValid(content[nextIndex]);
1879        }
1880
1881        @Override
1882        public boolean isNextValidXmlNameCharacter() {
1883            int nextIndex = lastIndex + 1;
1884            return nextIndex <= maxIndex && XmlCharacters.isValidName(content[nextIndex]);
1885        }
1886
1887        @Override
1888        public boolean isNextValidXmlNcNameCharacter() {
1889            int nextIndex = lastIndex + 1;
1890            return nextIndex <= maxIndex && XmlCharacters.isValidNcName(content[nextIndex]);
1891        }
1892    }
1893
1894    /**
1895     * Obtain a basic {@link Tokenizer} implementation that ignores whitespace but includes tokens for individual symbols, the
1896     * period ('.'), single-quoted strings, double-quoted strings, whitespace-delimited words, and optionally comments.
1897     * <p>
1898     * Note that the resulting Tokenizer may not be appropriate in many situations, but is provided merely as a convenience for
1899     * those situations that happen to be able to use it.
1900     * </p>
1901     * 
1902     * @param includeComments true if the comments should be retained and be included in the token stream, or false if comments
1903     *        should be stripped and not included in the token stream
1904     * @return the tokenizer; never null
1905     */
1906    public static BasicTokenizer basicTokenizer( boolean includeComments ) {
1907        return new BasicTokenizer(includeComments);
1908    }
1909
1910    /**
1911     * A basic {@link Tokenizer} implementation that ignores whitespace but includes tokens for individual symbols, the period
1912     * ('.'), single-quoted strings, double-quoted strings, whitespace-delimited words, and optionally comments.
1913     * <p>
1914     * Note this Tokenizer may not be appropriate in many situations, but is provided merely as a convenience for those situations
1915     * that happen to be able to use it.
1916     * </p>
1917     */
1918    public static class BasicTokenizer implements Tokenizer {
1919        /**
1920         * The {@link Token#type() token type} for tokens that represent an unquoted string containing a character sequence made
1921         * up of non-whitespace and non-symbol characters.
1922         */
1923        public static final int WORD = 1;
1924        /**
1925         * The {@link Token#type() token type} for tokens that consist of an individual "symbol" character. The set of characters
1926         * includes: <code>-(){}*,;+%?$[]!<>|=:</code>
1927         */
1928        public static final int SYMBOL = 2;
1929        /**
1930         * The {@link Token#type() token type} for tokens that consist of an individual '.' character.
1931         */
1932        public static final int DECIMAL = 4;
1933        /**
1934         * The {@link Token#type() token type} for tokens that consist of all the characters within single-quotes. Single quote
1935         * characters are included if they are preceded (escaped) by a '\' character.
1936         */
1937        public static final int SINGLE_QUOTED_STRING = 8;
1938        /**
1939         * The {@link Token#type() token type} for tokens that consist of all the characters within double-quotes. Double quote
1940         * characters are included if they are preceded (escaped) by a '\' character.
1941         */
1942        public static final int DOUBLE_QUOTED_STRING = 16;
1943        /**
1944         * The {@link Token#type() token type} for tokens that consist of all the characters between "/*" and "&#42;/" or between
1945         * "//" and the next line terminator (e.g., '\n', '\r' or "\r\n").
1946         */
1947        public static final int COMMENT = 32;
1948
1949        private final boolean useComments;
1950
1951        protected BasicTokenizer( boolean useComments ) {
1952            this.useComments = useComments;
1953        }
1954
1955        @Override
1956        public void tokenize( CharacterStream input,
1957                              Tokens tokens ) throws ParsingException {
1958            while (input.hasNext()) {
1959                char c = input.next();
1960                switch (c) {
1961                    case ' ':
1962                    case '\t':
1963                    case '\n':
1964                    case '\r':
1965                        // Just skip these whitespace characters ...
1966                        break;
1967                    case '-':
1968                    case '(':
1969                    case ')':
1970                    case '{':
1971                    case '}':
1972                    case '*':
1973                    case ',':
1974                    case ';':
1975                    case '+':
1976                    case '%':
1977                    case '?':
1978                    case '$':
1979                    case '[':
1980                    case ']':
1981                    case '!':
1982                    case '<':
1983                    case '>':
1984                    case '|':
1985                    case '=':
1986                    case ':':
1987                        tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
1988                        break;
1989                    case '.':
1990                        tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, DECIMAL);
1991                        break;
1992                    case '\"':
1993                        int startIndex = input.index();
1994                        Position startingPosition = input.position(startIndex);
1995                        boolean foundClosingQuote = false;
1996                        while (input.hasNext()) {
1997                            c = input.next();
1998                            if (c == '\\' && input.isNext('"')) {
1999                                c = input.next(); // consume the ' character since it is escaped
2000                            } else if (c == '"') {
2001                                foundClosingQuote = true;
2002                                break;
2003                            }
2004                        }
2005                        if (!foundClosingQuote) {
2006                            String msg = CommonI18n.noMatchingDoubleQuoteFound.text(startingPosition.getLine(),
2007                                                                                    startingPosition.getColumn());
2008                            throw new ParsingException(startingPosition, msg);
2009                        }
2010                        int endIndex = input.index() + 1; // beyond last character read
2011                        tokens.addToken(startingPosition, startIndex, endIndex, DOUBLE_QUOTED_STRING);
2012                        break;
2013                    case '\'':
2014                        startIndex = input.index();
2015                        startingPosition = input.position(startIndex);
2016                        foundClosingQuote = false;
2017                        while (input.hasNext()) {
2018                            c = input.next();
2019                            if (c == '\\' && input.isNext('\'')) {
2020                                c = input.next(); // consume the ' character since it is escaped
2021                            } else if (c == '\'') {
2022                                foundClosingQuote = true;
2023                                break;
2024                            }
2025                        }
2026                        if (!foundClosingQuote) {
2027                            String msg = CommonI18n.noMatchingSingleQuoteFound.text(startingPosition.getLine(),
2028                                                                                    startingPosition.getColumn());
2029                            throw new ParsingException(startingPosition, msg);
2030                        }
2031                        endIndex = input.index() + 1; // beyond last character read
2032                        tokens.addToken(startingPosition, startIndex, endIndex, SINGLE_QUOTED_STRING);
2033                        break;
2034                    case '/':
2035                        startIndex = input.index();
2036                        startingPosition = input.position(startIndex);
2037                        if (input.isNext('/')) {
2038                            // End-of-line comment ...
2039                            boolean foundLineTerminator = false;
2040                            while (input.hasNext()) {
2041                                c = input.next();
2042                                if (c == '\n' || c == '\r') {
2043                                    foundLineTerminator = true;
2044                                    break;
2045                                }
2046                            }
2047                            endIndex = input.index(); // the token won't include the '\n' or '\r' character(s)
2048                            if (!foundLineTerminator) ++endIndex; // must point beyond last char
2049                            if (c == '\r' && input.isNext('\n')) input.next();
2050                            if (useComments) {
2051                                tokens.addToken(startingPosition, startIndex, endIndex, COMMENT);
2052                            }
2053                        } else if (input.isNext('*')) {
2054                            // Multi-line comment ...
2055                            while (input.hasNext() && !input.isNext('*', '/')) {
2056                                c = input.next();
2057                            }
2058                            if (input.hasNext()) input.next(); // consume the '*'
2059                            if (input.hasNext()) input.next(); // consume the '/'
2060                            if (useComments) {
2061                                endIndex = input.index() + 1; // the token will include the '/' and '*' characters
2062                                tokens.addToken(startingPosition, startIndex, endIndex, COMMENT);
2063                            }
2064                        } else {
2065                            // just a regular slash ...
2066                            tokens.addToken(startingPosition, startIndex, startIndex + 1, SYMBOL);
2067                        }
2068                        break;
2069                    default:
2070                        startIndex = input.index();
2071                        startingPosition = input.position(startIndex);
2072                        // Read until another whitespace/symbol/decimal/slash is found
2073                        while (input.hasNext() && !(input.isNextWhitespace() || input.isNextAnyOf("/.-(){}*,;+%?$[]!<>|=:"))) {
2074                            c = input.next();
2075                        }
2076                        endIndex = input.index() + 1; // beyond last character that was included
2077                        tokens.addToken(startingPosition, startIndex, endIndex, WORD);
2078                }
2079            }
2080        }
2081    }
2082}