1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.geometry.io.core.internal;
18
19 import java.io.Reader;
20 import java.util.Arrays;
21 import java.util.List;
22 import java.util.function.IntConsumer;
23 import java.util.function.IntPredicate;
24
25 /** Class providing basic text parsing capabilities. The goals of this class are to
26 * (1) provide a simple, flexible API for performing common text parsing operations and
27 * (2) provide a mechanism for creating consistent and informative parsing errors.
28 * This class is not intended as a replacement for grammar-based parsers and/or lexers.
29 */
30 public class SimpleTextParser {
31
32 /** Constant indicating that the end of the input has been reached. */
33 private static final int EOF = -1;
34
35 /** Carriage return character. */
36 private static final char CR = '\r';
37
38 /** Line feed character. */
39 private static final char LF = '\n';
40
41 /** Default value for the max string length property. */
42 private static final int DEFAULT_MAX_STRING_LENGTH = 1024;
43
44 /** Error message used when a string exceeds the configured maximum length. */
45 private static final String STRING_LENGTH_ERR_MSG = "string length exceeds maximum value of ";
46
47 /** Initial token position number. */
48 private static final int INITIAL_TOKEN_POS = -1;
49
50 /** Int consumer that does nothing. */
51 private static final IntConsumer NOOP_CONSUMER = ch -> { };
52
53 /** Current line number; line numbers start counting at 1. */
54 private int lineNumber = 1;
55
56 /** Current character column on the current line; column numbers start at 1.*/
57 private int columnNumber = 1;
58
59 /** Maximum length for strings returned by this instance. */
60 private int maxStringLength = DEFAULT_MAX_STRING_LENGTH;
61
62 /** The current token. */
63 private String currentToken;
64
65 /** The line number that the current token started on. */
66 private int currentTokenLineNumber = INITIAL_TOKEN_POS;
67
68 /** The character number that the current token started on. */
69 private int currentTokenColumnNumber = INITIAL_TOKEN_POS;
70
71 /** Flag used to indicate that at least one token has been read from the stream. */
72 private boolean hasSetToken;
73
74 /** Character read buffer used to access the character stream. */
75 private final CharReadBuffer buffer;
76
77 /** Construct a new instance that reads characters from the given reader. The
78 * reader will not be closed.
79 * @param reader reader instance to read characters from
80 */
81 public SimpleTextParser(final Reader reader) {
82 this(new CharReadBuffer(reader));
83 }
84
85 /** Construct a new instance that reads characters from the given character buffer.
86 * @param buffer read buffer to read characters from
87 */
88 public SimpleTextParser(final CharReadBuffer buffer) {
89 this.buffer = buffer;
90 }
91
92 /** Get the current line number. Line numbers start at 1.
93 * @return the current line number
94 */
95 public int getLineNumber() {
96 return lineNumber;
97 }
98
99 /** Set the current line number. This does not affect the character stream position,
100 * only the value returned by {@link #getLineNumber()}.
101 * @param lineNumber line number to set; line numbers start at 1
102 */
103 public void setLineNumber(final int lineNumber) {
104 this.lineNumber = lineNumber;
105 }
106
107 /** Get the current column number. This indicates the column position of the
108 * character that will returned by the next call to {@link #readChar()}. The first
109 * character of each line has a column number of 1.
110 * @return the current column number; column numbers start at 1
111 */
112 public int getColumnNumber() {
113 return columnNumber;
114 }
115
116 /** Set the current column number. This does not affect the character stream position,
117 * only the value returned by {@link #getColumnNumber()}.
118 * @param column the column number to set; column numbers start at 1
119 */
120 public void setColumnNumber(final int column) {
121 this.columnNumber = column;
122 }
123
124 /** Get the maximum length for strings returned by this instance. Operations
125 * that produce strings longer than this length will throw an exception.
126 * @return maximum length for strings returned by this instance
127 */
128 public int getMaxStringLength() {
129 return maxStringLength;
130 }
131
132 /** Set the maximum length for strings returned by this instance. Operations
133 * that produce strings longer than this length will throw an exception.
134 * @param maxStringLength maximum length for strings returned by this instance
135 * @throws IllegalArgumentException if the argument is less than zero
136 */
137 public void setMaxStringLength(final int maxStringLength) {
138 if (maxStringLength < 0) {
139 throw new IllegalArgumentException("Maximum string length cannot be less than zero; was " +
140 maxStringLength);
141 }
142 this.maxStringLength = maxStringLength;
143 }
144
145 /** Get the current token. This is the most recent string read by one of the {@code nextXXX()}
146 * methods. This value will be null if no token has yet been read or if the end of content has
147 * been reached.
148 * @return the current token
149 * @see #next(int)
150 * @see #next(IntPredicate)
151 * @see #nextLine()
152 * @see #nextAlphanumeric()
153 */
154 public String getCurrentToken() {
155 return currentToken;
156 }
157
158 /** Return true if the current token is not null or empty.
159 * @return true if the current token is not null or empty
160 * @see #getCurrentToken()
161 */
162 public boolean hasNonEmptyToken() {
163 return currentToken != null && !currentToken.isEmpty();
164 }
165
166 /** Get the line number that the current token started on. This value will
167 * be -1 if no token has been read yet.
168 * @return current token starting line number or -1 if no token has been
169 * read yet
170 * @see #getCurrentToken()
171 */
172 public int getCurrentTokenLineNumber() {
173 return currentTokenLineNumber;
174 }
175
176 /** Get the column position that the current token started on. This value will
177 * be -1 if no token has been read yet.
178 * @return current token column number or -1 if no oken has been read yet
179 * @see #getCurrentToken()
180 */
181 public int getCurrentTokenColumnNumber() {
182 return currentTokenColumnNumber;
183 }
184
185 /** Get the current token parsed as an integer.
186 * @return the current token parsed as an integer
187 * @throws IllegalStateException if no token has been read or the
188 * current token cannot be parsed as an integer
189 */
190 public int getCurrentTokenAsInt() {
191 ensureHasSetToken();
192
193 Throwable cause = null;
194
195 if (currentToken != null) {
196 try {
197 return Integer.parseInt(currentToken);
198 } catch (NumberFormatException exc) {
199 cause = exc;
200 }
201 }
202
203 throw unexpectedToken("integer", cause);
204 }
205
206 /** Get the current token parsed as a double.
207 * @return the current token parsed as a double
208 * @throws IllegalStateException if no token has been read or the
209 * current token cannot be parsed as a double
210 */
211 public double getCurrentTokenAsDouble() {
212 ensureHasSetToken();
213
214 Throwable cause = null;
215
216 if (currentToken != null) {
217 try {
218 return Double.parseDouble(currentToken);
219 } catch (NumberFormatException exc) {
220 cause = exc;
221 }
222 }
223
224 throw unexpectedToken("double", cause);
225 }
226
227 /** Return true if there are more characters to read from this instance.
228 * @return true if there are more characters to read from this instance
229 * @throws java.io.UncheckedIOException if an I/O error occurs
230 */
231 public boolean hasMoreCharacters() {
232 return buffer.hasMoreCharacters();
233 }
234
235 /** Return true if there are more characters to read on the current line.
236 * @return true if there are more characters to read on the current line
237 * @throws java.io.UncheckedIOException if an I/O error occurs
238 */
239 public boolean hasMoreCharactersOnLine() {
240 return hasMoreCharacters() && isNotNewLinePart(peekChar());
241 }
242
243 /** Read and return the next character in the stream and advance the parser position.
244 * This method updates the current line number and column number but does <strong>not</strong>
245 * set the {@link #getCurrentToken() current token}.
246 * @return the next character in the stream or -1 if the end of the stream has been
247 * reached
248 * @throws java.io.UncheckedIOException if an I/O error occurs
249 * @see #peekChar()
250 */
251 public int readChar() {
252 final int value = buffer.read();
253 if (value == LF ||
254 (value == CR && peekChar() != LF)) {
255 ++lineNumber;
256 columnNumber = 1;
257 } else if (value != EOF) {
258 ++columnNumber;
259 }
260
261 return value;
262 }
263
264 /** Read a string containing at most {@code len} characters from the stream and
265 * set it as the current token. Characters are added to the string until the string
266 * has the specified length or the end of the stream is reached. The characters are
267 * consumed from the stream. The token is set to null if no more characters are available
268 * from the character stream when this method is called.
269 * @param len the maximum length of the extracted string
270 * @return this instance
271 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
272 * configured {@link #getMaxStringLength() maximum string length}
273 * @throws java.io.UncheckedIOException if an I/O error occurs
274 * @see #getCurrentToken()
275 * @see #consume(int, IntConsumer)
276 */
277 public SimpleTextParser next(final int len) {
278 validateRequestedStringLength(len);
279
280 final int line = getLineNumber();
281 final int col = getColumnNumber();
282
283 String token = null;
284 if (hasMoreCharacters()) {
285 final StringBuilder sb = new StringBuilder(len);
286
287 consume(len, ch -> sb.append((char) ch));
288
289 token = sb.toString();
290 }
291
292 setToken(line, col, token);
293
294 return this;
295 }
296
297 /** Read a string containing at most {@code len} characters from the stream and
298 * set it as the current token. This is similar to {@link #next(int)} but with the exception
299 * that new line sequences beginning with {@code lineContinuationChar} are skipped.
300 * @param lineContinuationChar character used to indicate skipped new line sequences
301 * @param len the maximum length of the extracted string
302 * @return this instance
303 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
304 * configured {@link #getMaxStringLength() maximum string length}
305 * @throws java.io.UncheckedIOException if an I/O error occurs
306 * @see #getCurrentToken()
307 * @see #consumeWithLineContinuation(char, int, IntConsumer)
308 */
309 public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final int len) {
310 validateRequestedStringLength(len);
311
312 final int line = getLineNumber();
313 final int col = getColumnNumber();
314
315 String token = null;
316 if (hasMoreCharacters()) {
317 final StringBuilder sb = new StringBuilder(len);
318
319 consumeWithLineContinuation(lineContinuationChar, len,
320 ch -> sb.append((char) ch));
321
322 token = sb.toString();
323 }
324
325 setToken(line, col, token);
326
327 return this;
328 }
329
330 /** Read characters from the stream while the given predicate returns true and set the result
331 * as the current token. The next call to {@link #readChar()} will return either a character
332 * that fails the predicate test or -1 if the end of the stream has been reached.
333 * The token will be null if the end of the stream has been reached prior to the method call.
334 * @param pred predicate function passed characters read from the input; reading continues
335 * until the predicate returns false
336 * @return this instance
337 * @throws IllegalStateException if the length of the produced string exceeds the configured
338 * {@link #getMaxStringLength() maximum string length}
339 * @throws java.io.UncheckedIOException if an I/O error occurs
340 * @see #getCurrentToken()
341 * @see #consume(IntPredicate, IntConsumer)
342 */
343 public SimpleTextParser next(final IntPredicate pred) {
344 final int line = getLineNumber();
345 final int col = getColumnNumber();
346
347 String token = null;
348 if (hasMoreCharacters()) {
349 final StringCollector collector = new StringCollector(line, col, pred);
350
351 consume(collector, collector);
352
353 token = collector.getString();
354 }
355
356 setToken(line, col, token);
357
358 return this;
359 }
360
361 /** Read characters from the stream while the given predicate returns true and set the result
362 * as the current token. This is similar to {@link #next(IntPredicate)} but with the exception
363 * that new line sequences prefixed with {@code lineContinuationChar} are skipped.
364 * @param lineContinuationChar character used to indicate skipped new line sequences
365 * @param pred predicate function passed characters read from the input; reading continues
366 * until the predicate returns false
367 * @return this instance
368 * @throws IllegalStateException if the length of the produced string exceeds the configured
369 * {@link #getMaxStringLength() maximum string length}
370 * @throws java.io.UncheckedIOException if an I/O error occurs
371 * @see #getCurrentToken()
372 * @see #consume(IntPredicate, IntConsumer)
373 */
374 public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final IntPredicate pred) {
375 final int line = getLineNumber();
376 final int col = getColumnNumber();
377
378 String token = null;
379 if (hasMoreCharacters()) {
380 final StringCollector collector = new StringCollector(line, col, pred);
381
382 consumeWithLineContinuation(lineContinuationChar, collector, collector);
383
384 token = collector.getString();
385 }
386
387 setToken(line, col, token);
388
389 return this;
390 }
391
392 /** Read characters from the current parser position to the next new line sequence and
393 * set the result as the current token . The newline character sequence
394 * ('\r', '\n', or '\r\n') at the end of the line is consumed but is not included in the token.
395 * The token will be null if the end of the stream has been reached prior to the method call.
396 * @return this instance
397 * @throws IllegalStateException if the length of the produced string exceeds the configured
398 * {@link #getMaxStringLength() maximum string length}
399 * @throws java.io.UncheckedIOException if an I/O error occurs
400 * @see #getCurrentToken()
401 */
402 public SimpleTextParser nextLine() {
403 next(SimpleTextParser::isNotNewLinePart);
404
405 discardNewLineSequence();
406
407 return this;
408 }
409
410 /** Read a sequence of alphanumeric characters starting from the current parser position
411 * and set the result as the current token. The token will be the empty string if the next
412 * character in the stream is not alphanumeric and will be null if the end of the stream has
413 * been reached prior to the method call.
414 * @return this instance
415 * @throws IllegalStateException if the length of the produced string exceeds the configured
416 * {@link #getMaxStringLength() maximum string length}
417 * @throws java.io.UncheckedIOException if an I/O error occurs
418 * @see #getCurrentToken()
419 */
420 public SimpleTextParser nextAlphanumeric() {
421 return next(SimpleTextParser::isAlphanumeric);
422 }
423
424 /** Discard {@code len} number of characters from the character stream. The
425 * parser position is updated but the current token is not changed.
426 * @param len number of characters to discard
427 * @return this instance
428 * @throws java.io.UncheckedIOException if an I/O error occurs
429 */
430 public SimpleTextParser discard(final int len) {
431 return consume(len, NOOP_CONSUMER);
432 }
433
434 /** Discard {@code len} number of characters from the character stream. The
435 * parser position is updated but the current token is not changed. Lines beginning
436 * with {@code lineContinuationChar} are skipped.
437 * @param lineContinuationChar character used to indicate skipped new line sequences
438 * @param len number of characters to discard
439 * @return this instance
440 * @throws java.io.UncheckedIOException if an I/O error occurs
441 */
442 public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
443 final int len) {
444 return consumeWithLineContinuation(lineContinuationChar, len, NOOP_CONSUMER);
445 }
446
447 /** Discard characters from the stream while the given predicate returns true. The next call
448 * to {@link #readChar()} will return either a character that fails the predicate test or -1
449 * if the end of the stream has been reached. The parser position is updated but the current
450 * token is not changed.
451 * @param pred predicate test for characters to discard
452 * @return this instance
453 * @throws java.io.UncheckedIOException if an I/O error occurs
454 */
455 public SimpleTextParser discard(final IntPredicate pred) {
456 return consume(pred, NOOP_CONSUMER);
457 }
458
459 /** Discard characters from the stream while the given predicate returns true. New line sequences
460 * beginning with {@code lineContinuationChar} are skipped. The next call o {@link #readChar()}
461 * will return either a character that fails the predicate test or -1 if the end of the stream
462 * has been reached. The parser position is updated but the current token is not changed.
463 * @param lineContinuationChar character used to indicate skipped new line sequences
464 * @param pred predicate test for characters to discard
465 * @return this instance
466 * @throws java.io.UncheckedIOException if an I/O error occurs
467 */
468 public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
469 final IntPredicate pred) {
470 return consumeWithLineContinuation(lineContinuationChar, pred, NOOP_CONSUMER);
471 }
472
473 /** Discard a sequence of whitespace characters from the character stream starting from the
474 * current parser position. The next call to {@link #readChar()} will return either a non-whitespace
475 * character or -1 if the end of the stream has been reached. The parser position is updated
476 * but the current token is not changed.
477 * @return this instance
478 * @throws java.io.UncheckedIOException if an I/O error occurs
479 */
480 public SimpleTextParser discardWhitespace() {
481 return discard(SimpleTextParser::isWhitespace);
482 }
483
484 /** Discard the next whitespace characters on the current line. The next call to
485 * {@link #readChar()} will return either a non-whitespace character on the current line,
486 * the newline character sequence (indicating the end of the line), or -1 (indicating the
487 * end of the stream). The parser position is updated but the current token is not changed.
488 * @return this instance
489 * @throws java.io.UncheckedIOException if an I/O error occurs
490 */
491 public SimpleTextParser discardLineWhitespace() {
492 return discard(SimpleTextParser::isLineWhitespace);
493 }
494
495 /** Discard the newline character sequence at the current reader position. The sequence
496 * is defined as one of "\r", "\n", or "\r\n". Does nothing if the reader is not positioned
497 * at a newline sequence. The parser position is updated but the current token is not changed.
498 * @return this instance
499 * @throws java.io.UncheckedIOException if an I/O error occurs
500 */
501 public SimpleTextParser discardNewLineSequence() {
502 final int value = peekChar();
503 if (value == LF) {
504 readChar();
505 } else if (value == CR) {
506 readChar();
507
508 if (peekChar() == LF) {
509 readChar();
510 }
511 }
512
513 return this;
514 }
515
516 /** Discard all remaining characters on the current line, including the terminating
517 * newline character sequence. The next call to {@link #readChar()} will return either the
518 * first character on the next line or -1 if the end of the stream has been reached.
519 * The parser position is updated but the current token is not changed.
520 * @return this instance
521 * @throws java.io.UncheckedIOException if an I/O error occurs
522 */
523 public SimpleTextParser discardLine() {
524 discard(SimpleTextParser::isNotNewLinePart);
525
526 discardNewLineSequence();
527
528 return this;
529 }
530
531 /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
532 * returns true. The operation ends when the predicate returns false or the end of the stream is
533 * reached.
534 * @param pred predicate test for characters to consume
535 * @param consumer object to be passed each consumed character
536 * @return this instance
537 * @throws java.io.UncheckedIOException if an I/O error occurs
538 */
539 public SimpleTextParser consume(final IntPredicate pred, final IntConsumer consumer) {
540 int ch;
541 while ((ch = peekChar()) != EOF && pred.test(ch)) {
542 consumer.accept(readChar());
543 }
544
545 return this;
546 }
547
548 /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
549 * This method is similar to {@link #consume(int, IntConsumer)} with the exception that new line
550 * sequences prefixed with {@code lineContinuationChar} are skipped.
551 * @param lineContinuationChar character used to indicate skipped new line sequences
552 * @param len number of characters to consume
553 * @param consumer function to be passed each consumed character
554 * @return this instance
555 * @throws java.io.UncheckedIOException if an I/O error occurs
556 */
557 public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
558 final int len, final IntConsumer consumer) {
559 int i = -1;
560 int ch;
561 while (++i < len && (ch = readChar()) != EOF) {
562 if (ch == lineContinuationChar && isNewLinePart(peekChar())) {
563 --i; // don't count the continuation char toward the total length
564 discardNewLineSequence();
565 } else {
566 consumer.accept(ch);
567 }
568 }
569
570 return this;
571 }
572
573 /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
574 * The operation continues until {@code len} number of characters have been read or the end of
575 * the stream has been reached.
576 * @param len number of characters to consume
577 * @param consumer object to be passed each consumed character
578 * @return this instance
579 * @throws java.io.UncheckedIOException if an I/O error occurs
580 */
581 public SimpleTextParser consume(final int len, final IntConsumer consumer) {
582 int ch;
583 for (int i = 0; i < len; ++i) {
584 ch = readChar();
585 if (ch != EOF) {
586 consumer.accept(ch);
587 } else {
588 break;
589 }
590 }
591
592 return this;
593 }
594
595 /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
596 * returns true. This method is similar to {@link #consume(IntPredicate, IntConsumer)} with the
597 * exception that new lines sequences beginning with {@code lineContinuationChar} are skipped.
598 * @param lineContinuationChar character used to indicate skipped new line sequences
599 * @param pred predicate test for characters to consume
600 * @param consumer object to be passed each consumed character
601 * @return this instance
602 * @throws java.io.UncheckedIOException if an I/O error occurs
603 */
604 public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
605 final IntPredicate pred, final IntConsumer consumer) {
606 int ch;
607 while ((ch = peekChar()) != EOF) {
608 if (ch == lineContinuationChar && isNewLinePart(buffer.charAt(1))) {
609 readChar();
610 discardNewLineSequence();
611 } else if (pred.test(ch)) {
612 consumer.accept(readChar());
613 } else {
614 break;
615 }
616 }
617
618 return this;
619 }
620
621 /** Return the next character in the stream but do not advance the parser position.
622 * @return the next character in the stream or -1 if the end of the stream has been
623 * reached
624 * @throws java.io.UncheckedIOException if an I/O error occurs
625 * @see #readChar()
626 */
627 public int peekChar() {
628 return buffer.peek();
629 }
630
631 /** Return a string containing containing at most {@code len} characters from the stream but
632 * without changing the parser position. Characters are added to the string until the
633 * string has the specified length or the end of the stream is reached.
634 * @param len the maximum length of the returned string
635 * @return a string containing containing at most {@code len} characters from the stream
636 * or null if the parser has already reached the end of the stream
637 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
638 * configured {@link #getMaxStringLength() maximum string length}
639 * @throws java.io.UncheckedIOException if an I/O error occurs
640 * @see #next(int)
641 */
642 public String peek(final int len) {
643 validateRequestedStringLength(len);
644
645 return buffer.peekString(len);
646 }
647
648 /** Read characters from the stream while the given predicate returns true but do not
649 * change the current token or advance the parser position.
650 * @param pred predicate function passed characters read from the input; reading continues
651 * until the predicate returns false
652 * @return string containing characters matching {@code pred} or null if the parser has already
653 * reached the end of the stream
654 * @throws IllegalStateException if the length of the produced string exceeds the configured
655 * {@link #getMaxStringLength() maximum string length}
656 * @throws java.io.UncheckedIOException if an I/O error occurs
657 * @see #getCurrentToken()
658 */
659 public String peek(final IntPredicate pred) {
660 String token = null;
661
662 if (hasMoreCharacters()) {
663 final StringCollector collector = new StringCollector(lineNumber, columnNumber, pred);
664
665 int i = -1;
666 int ch = buffer.charAt(++i);
667 while (ch != EOF && collector.test(ch)) {
668 collector.accept(ch);
669
670 ch = buffer.charAt(++i);
671 }
672
673 token = collector.getString();
674 }
675
676 return token;
677 }
678
679 /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
680 * exception if they are not equal. The comparison is case-sensitive.
681 * @param expected expected token
682 * @return this instance
683 * @throws IllegalStateException if no token has been read or {@code expected} does not exactly
684 * equal the current token
685 */
686 public SimpleTextParser match(final String expected) {
687 matchInternal(expected, true, true);
688 return this;
689 }
690
691 /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
692 * exception if they are not equal. The comparison is <em>not</em> case-sensitive.
693 * @param expected expected token
694 * @return this instance
695 * @throws IllegalStateException if no token has been read or {@code expected} does not equal
696 * the current token (ignoring case)
697 */
698 public SimpleTextParser matchIgnoreCase(final String expected) {
699 matchInternal(expected, false, true);
700 return this;
701 }
702
703 /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
704 * The comparison is case-sensitive.
705 * @param expected expected token
706 * @return true if the argument exactly equals the current token
707 * @throws IllegalStateException if no token has been read
708 * @throws java.io.UncheckedIOException if an I/O error occurs
709 */
710 public boolean tryMatch(final String expected) {
711 return matchInternal(expected, true, false);
712 }
713
714 /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
715 * The comparison is <em>not</em> case-sensitive.
716 * @param expected expected token
717 * @return true if the argument equals the current token (ignoring case)
718 * @throws IllegalStateException if no token has been read
719 */
720 public boolean tryMatchIgnoreCase(final String expected) {
721 return matchInternal(expected, false, false);
722 }
723
724 /** Internal method to compare the current token with the argument.
725 * @param expected expected token
726 * @param caseSensitive if the comparison should be case-sensitive
727 * @param throwOnFailure if an exception should be thrown if the argument is not
728 * equal to the current token
729 * @return true if the argument is equal to the current token
730 * @throws IllegalStateException if no token has been read or {@code expected} does not match the
731 * current token and {@code throwOnFailure} is true
732 */
733 private boolean matchInternal(final String expected, final boolean caseSensitive,
734 final boolean throwOnFailure) {
735 ensureHasSetToken();
736
737 if (!stringsEqual(expected, currentToken, caseSensitive)) {
738 if (throwOnFailure) {
739 throw unexpectedToken("[" + expected + "]");
740 }
741
742 return false;
743 }
744
745 return true;
746 }
747
748 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
749 * An exception is thrown if no match is found. String comparisons are case-sensitive.
750 * @param expected strings to compare with the current token
751 * @return index of the argument that exactly matches the current token
752 * @throws IllegalStateException if no token has been read or no match is found among the arguments
753 */
754 public int choose(final String... expected) {
755 return choose(Arrays.asList(expected));
756 }
757
758 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
759 * An exception is thrown if no match is found. String comparisons are case-sensitive.
760 * @param expected strings to compare with the current token
761 * @return index of the argument that exactly matches the current token
762 * @throws IllegalStateException if no token has been read or no match is found among the arguments
763 */
764 public int choose(final List<String> expected) {
765 return chooseInternal(expected, true, true);
766 }
767
768 /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
769 * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
770 * case-sensitive.
771 * @param expected strings to compare with the current token
772 * @return index of the argument that matches the current token (ignoring case)
773 * @throws IllegalStateException if no token has been read or no match is found among the arguments
774 */
775 public int chooseIgnoreCase(final String... expected) {
776 return chooseIgnoreCase(Arrays.asList(expected));
777 }
778
779 /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
780 * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
781 * case-sensitive.
782 * @param expected strings to compare with the current token
783 * @return index of the argument that matches the current token (ignoring case)
784 * @throws IllegalStateException if no token has been read or no match is found among the arguments
785 */
786 public int chooseIgnoreCase(final List<String> expected) {
787 return chooseInternal(expected, false, true);
788 }
789
790 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
791 * or -1 if no match is found. String comparisons are case-sensitive.
792 * @param expected strings to compare with the current token
793 * @return index of the argument that exactly matches the current token or -1 if
794 * no match is found
795 * @throws IllegalStateException if no token has been read
796 */
797 public int tryChoose(final String... expected) {
798 return tryChoose(Arrays.asList(expected));
799 }
800
801 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
802 * or -1 if no match is found. String comparisons are case-sensitive.
803 * @param expected strings to compare with the current token
804 * @return index of the argument that exactly matches the current token or -1 if
805 * no match is found
806 * @throws IllegalStateException if no token has been read
807 */
808 public int tryChoose(final List<String> expected) {
809 return chooseInternal(expected, true, false);
810 }
811
812 /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
813 * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
814 * @param expected strings to compare with the current token
815 * @return index of the argument that matches the current token (ignoring case) or -1 if
816 * no match is found
817 * @throws IllegalStateException if no token has been read
818 */
819 public int tryChooseIgnoreCase(final String... expected) {
820 return tryChooseIgnoreCase(Arrays.asList(expected));
821 }
822
823 /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
824 * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
825 * @param expected strings to compare with the current token
826 * @return index of the argument that matches the current token (ignoring case) or -1 if
827 * no match is found
828 * @throws IllegalStateException if no token has been read
829 */
830 public int tryChooseIgnoreCase(final List<String> expected) {
831 return chooseInternal(expected, false, false);
832 }
833
834 /** Internal method to compare the current token with a list of possible strings. The index of
835 * the matching argument is returned.
836 * @param expected strings to compare with the current token
837 * @param caseSensitive if the comparisons should be case-sensitive
838 * @param throwOnFailure if an exception should be thrown if no match is found
839 * @return the index of the matching argument or -1 if no match is found
840 * @throws IllegalStateException if no token has been read or no match is found and
841 * {@code throwOnFailure} is true
842 */
843 private int chooseInternal(final List<String> expected, final boolean caseSensitive,
844 final boolean throwOnFailure) {
845 ensureHasSetToken();
846
847 int i = 0;
848 for (final String str : expected) {
849 if (stringsEqual(str, currentToken, caseSensitive)) {
850 return i;
851 }
852
853 ++i;
854 }
855
856 if (throwOnFailure) {
857 throw unexpectedToken("one of " + expected);
858 }
859
860 return -1;
861 }
862
863 /** Get an exception indicating that the current token was unexpected. The returned
864 * exception contains a message with the line number and column of the current token and
865 * a description of its value.
866 * @param expected string describing what was expected
867 * @return exception indicating that the current token was unexpected
868 */
869 public IllegalStateException unexpectedToken(final String expected) {
870 return unexpectedToken(expected, null);
871 }
872
873 /** Get an exception indicating that the current token was unexpected. The returned
874 * exception contains a message with the line number and column of the current token and
875 * a description of its value.
876 * @param expected string describing what was expected
877 * @param cause cause of the error
878 * @return exception indicating that the current token was unexpected
879 */
880 public IllegalStateException unexpectedToken(final String expected, final Throwable cause) {
881
882 StringBuilder msg = new StringBuilder();
883 msg.append("expected ")
884 .append(expected)
885 .append(" but found ")
886 .append(getCurrentTokenDescription());
887
888 final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
889 final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
890
891 return parseError(line, col, msg.toString(), cause);
892 }
893
894 /** Get an exception indicating an error during parsing at the current token position.
895 * @param msg error message
896 * @return an exception indicating an error during parsing at the current token position
897 */
898 public IllegalStateException tokenError(final String msg) {
899 return tokenError(msg, null);
900 }
901
902 /** Get an exception indicating an error during parsing at the current token position.
903 * @param msg error message
904 * @param cause the cause of the error; may be null
905 * @return an exception indicating an error during parsing at the current token position
906 */
907 public IllegalStateException tokenError(final String msg, final Throwable cause) {
908 final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
909 final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
910
911 return parseError(line, col, msg, cause);
912 }
913
914 /** Return an exception indicating an error occurring at the current parser position.
915 * @param msg error message
916 * @return an exception indicating an error during parsing
917 */
918 public IllegalStateException parseError(final String msg) {
919 return parseError(msg, null);
920 }
921
922 /** Return an exception indicating an error occurring at the current parser position.
923 * @param msg error message
924 * @param cause the cause of the error; may be null
925 * @return an exception indicating an error during parsing
926 */
927 public IllegalStateException parseError(final String msg, final Throwable cause) {
928 return parseError(lineNumber, columnNumber, msg, cause);
929 }
930
931 /** Return an exception indicating an error during parsing.
932 * @param line line number of the error
933 * @param col column number of the error
934 * @param msg error message
935 * @return an exception indicating an error during parsing
936 */
937 public IllegalStateException parseError(final int line, final int col, final String msg) {
938 return parseError(line, col, msg, null);
939 }
940
941 /** Return an exception indicating an error during parsing.
942 * @param line line number of the error
943 * @param col column number of the error
944 * @param msg error message
945 * @param cause the cause of the error
946 * @return an exception indicating an error during parsing
947 */
948 public IllegalStateException parseError(final int line, final int col, final String msg,
949 final Throwable cause) {
950 final String fullMsg = String.format("Parsing failed at line %d, column %d: %s",
951 line, col, msg);
952 return GeometryIOUtils.parseError(fullMsg, cause);
953 }
954
955 /** Set the current token string and position.
956 * @param line line number for the start of the token
957 * @param col column number for the start of the token
958 * @param token token to set
959 */
960 private void setToken(final int line, final int col, final String token) {
961 currentTokenLineNumber = line;
962 currentTokenColumnNumber = col;
963 currentToken = token;
964
965 hasSetToken = true;
966 }
967
968 /** Get a user-friendly description of the current token.
969 * @return a user-friendly description of the current token.
970 */
971 private String getCurrentTokenDescription() {
972 if (currentToken == null || currentToken.isEmpty()) {
973 // attempt to return a more helpful message about the location
974 // of empty tokens by checking the buffer content; if this fails
975 // we'll ignore the error and continue with a more generic message
976 try {
977 if (!hasMoreCharacters()) {
978 return "end of content";
979 } else if (currentToken != null) {
980 if (!hasMoreCharactersOnLine()) {
981 return "end of line";
982 }
983 return "empty token followed by [" + peek(1) + "]";
984 }
985 } catch (IllegalStateException exc) {
986 // ignore
987 }
988 }
989
990 if (currentToken == null) {
991 return "no current token";
992 } else if (currentToken.isEmpty()) {
993 return "empty token";
994 }
995
996 return "[" + currentToken + "]";
997 }
998
999 /** Validate the requested string length.
1000 * @param len requested string length
1001 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than {@code maxStringLength}
1002 */
1003 private void validateRequestedStringLength(final int len) {
1004 if (len < 0) {
1005 throw new IllegalArgumentException("Requested string length cannot be negative; was " + len);
1006 } else if (len > maxStringLength) {
1007 throw new IllegalArgumentException("Requested string length of " + len + " exceeds maximum value of " +
1008 maxStringLength);
1009 }
1010 }
1011
1012 /** Ensure that a token read operation has been performed, throwing an exception if not.
1013 * @throws IllegalStateException if no token read operation has been performed
1014 */
1015 private void ensureHasSetToken() {
1016 if (!hasSetToken) {
1017 throw new IllegalStateException("No token has been read from the character stream");
1018 }
1019 }
1020
1021 /** Return true if the given character (Unicode code point) is whitespace.
1022 * @param ch character (Unicode code point) to test
1023 * @return true if the given character is whitespace
1024 * @see Character#isWhitespace(int)
1025 */
1026 public static boolean isWhitespace(final int ch) {
1027 return Character.isWhitespace(ch);
1028 }
1029
1030 /** Return true if the given character (Unicode code point) is not whitespace.
1031 * @param ch character (Unicode code point) to test
1032 * @return true if the given character is not whitespace
1033 * @see #isWhitespace(int)
1034 */
1035 public static boolean isNotWhitespace(final int ch) {
1036 return !isWhitespace(ch);
1037 }
1038
1039 /** Return true if the given character (Unicode code point) is whitespace
1040 * that is not used in newline sequences (ie, not '\r' or '\n').
1041 * @param ch character (Unicode code point) to test
1042 * @return true if the given character is a whitespace character not used in newline
1043 * sequences
1044 */
1045 public static boolean isLineWhitespace(final int ch) {
1046 return isWhitespace(ch) && isNotNewLinePart(ch);
1047 }
1048
1049 /** Return true if the given character (Unicode code point) is used
1050 * as part of newline sequences (ie, is either '\r' or '\n').
1051 * @param ch character (Unicode code point) to test
1052 * @return true if the given character is used as part of newline sequences
1053 */
1054 public static boolean isNewLinePart(final int ch) {
1055 return ch == CR || ch == LF;
1056 }
1057
1058 /** Return true if the given character (Unicode code point) is not used as
1059 * part of newline sequences (ie, not '\r' or '\n').
1060 * @param ch character (Unicode code point) to test
1061 * @return true if the given character is not used as part of newline sequences
1062 * @see #isNewLinePart(int)
1063 */
1064 public static boolean isNotNewLinePart(final int ch) {
1065 return !isNewLinePart(ch);
1066 }
1067
1068 /** Return true if the given character (Unicode code point) is alphanumeric.
1069 * @param ch character (Unicode code point) to test
1070 * @return true if the argument is alphanumeric
1071 * @see Character#isAlphabetic(int)
1072 * @see Character#isDigit(int)
1073 */
1074 public static boolean isAlphanumeric(final int ch) {
1075 return Character.isAlphabetic(ch) ||
1076 Character.isDigit(ch);
1077 }
1078
1079 /** Return true if the given character (Unicode code point) is not alphanumeric.
1080 * @param ch character (Unicode code point) to test
1081 * @return true if the argument is not alphanumeric
1082 * @see #isAlphanumeric(int)
1083 */
1084 public static boolean isNotAlphanumeric(final int ch) {
1085 return !isAlphanumeric(ch);
1086 }
1087
1088 /** Return true if the given character (Unicode code point) can be used as part of
1089 * the string representation of an integer. This will be true for the following types
1090 * of characters:
1091 * <ul>
1092 * <li>{@link Character#isDigit(int) digits}</li>
1093 * <li>the '-' (minus) character</li>
1094 * <li>the '+' (plus) character</li>
1095 * </ul>
1096 * @param ch character (Unicode code point) to test
1097 * @return true if the given character can be used as part of an integer string
1098 */
1099 public static boolean isIntegerPart(final int ch) {
1100 return Character.isDigit(ch) ||
1101 ch == '-' ||
1102 ch == '+';
1103 }
1104
1105 /** Return true if the given character (Unicode code point) can be used as part of
1106 * the string representation of a decimal number. This will be true for the following types
1107 * of characters:
1108 * <ul>
1109 * <li>{@link Character#isDigit(int) digits}</li>
1110 * <li>the '-' (minus) character</li>
1111 * <li>the '+' (plus) character</li>
1112 * <li>the '.' (period) character</li>
1113 * <li>the 'e' character</li>
1114 * <li>the 'E' character</li>
1115 * </ul>
1116 * @param ch character (Unicode code point) to test
1117 * @return true if the given character can be used as part of a decimal number string
1118 */
1119 public static boolean isDecimalPart(final int ch) {
1120 return Character.isDigit(ch) ||
1121 ch == '-' ||
1122 ch == '+' ||
1123 ch == '.' ||
1124 ch == 'e' ||
1125 ch == 'E';
1126 }
1127
1128 /** Test two strings for equality. One or both arguments may be null.
1129 * @param a first string
1130 * @param b second string
1131 * @param caseSensitive comparison is case-sensitive if set to true
1132 * @return true if the string arguments are considered equal
1133 */
1134 private static boolean stringsEqual(final String a, final String b, final boolean caseSensitive) {
1135 if (a == null) {
1136 return b == null;
1137 }
1138
1139 return caseSensitive ?
1140 a.equals(b) :
1141 a.equalsIgnoreCase(b);
1142 }
1143
1144 /** Internal class used to collect strings from the character stream while ensuring that the
1145 * collected strings do not exceed the maximum configured string length.
1146 */
1147 private final class StringCollector implements IntPredicate, IntConsumer {
1148
1149 /** String builder instance. */
1150 private final StringBuilder sb = new StringBuilder();
1151
1152 /** Start position line. */
1153 private final int line;
1154
1155 /** Start position column. */
1156 private final int col;
1157
1158 /** Character predicate. */
1159 private final IntPredicate pred;
1160
1161 /** Construct a new instance with the given start position and character predicate.
1162 * @param line start position line
1163 * @param col start position col
1164 * @param pred character predicate
1165 */
1166 StringCollector(final int line, final int col, final IntPredicate pred) {
1167 this.line = line;
1168 this.col = col;
1169 this.pred = pred;
1170 }
1171
1172 /** {@inheritDoc} */
1173 @Override
1174 public boolean test(final int value) {
1175 return pred.test(value) && !hasExceededMaxStringLength();
1176 }
1177
1178 /** {@inheritDoc} */
1179 @Override
1180 public void accept(final int value) {
1181 sb.append((char) value);
1182 }
1183
1184 /** Get the string collected by this instance.
1185 * @return the string collected by this instance
1186 * @throws IllegalStateException if the string exceeds the maximum configured length
1187 */
1188 public String getString() {
1189 if (hasExceededMaxStringLength()) {
1190 throw parseError(line, col, STRING_LENGTH_ERR_MSG + maxStringLength);
1191 }
1192
1193 return sb.toString();
1194 }
1195
1196 /** Return true if this collector has exceeded the maximum configured string length.
1197 * @return true if this collector has exceeded the maximum string length
1198 */
1199 private boolean hasExceededMaxStringLength() {
1200 return sb.length() > maxStringLength;
1201 }
1202 }
1203 }