001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.VisibleForTesting;
024import java.util.Arrays;
025import java.util.BitSet;
026
027/**
028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029 * for any {@link Object}. Also offers basic text processing methods based on this function.
030 * Implementations are strongly encouraged to be side-effect-free and immutable.
031 *
032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
034 *
035 * <p><b>Warning:</b> This class deals only with {@code char} values; it does not understand
036 * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
037 * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
038 * treats these just as two separate characters.
039 *
040 * <p>Example usages:
041 *
042 * <pre>
043 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
044 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
045 *
046 * <p>See the Guava User Guide article on <a
047 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
048 * </a>.
049 *
050 * @author Kevin Bourrillion
051 * @since 1.0
052 */
053@GwtCompatible(emulated = true)
054public abstract class CharMatcher implements Predicate<Character> {
055  /*
056   *           N777777777NO
057   *         N7777777777777N
058   *        M777777777777777N
059   *        $N877777777D77777M
060   *       N M77777777ONND777M
061   *       MN777777777NN  D777
062   *     N7ZN777777777NN ~M7778
063   *    N777777777777MMNN88777N
064   *    N777777777777MNZZZ7777O
065   *    DZN7777O77777777777777
066   *     N7OONND7777777D77777N
067   *      8$M++++?N???$77777$
068   *       M7++++N+M77777777N
069   *        N77O777777777777$                              M
070   *          DNNM$$$$777777N                              D
071   *         N$N:=N$777N7777M                             NZ
072   *        77Z::::N777777777                          ODZZZ
073   *       77N::::::N77777777M                         NNZZZ$
074   *     $777:::::::77777777MN                        ZM8ZZZZZ
075   *     777M::::::Z7777777Z77                        N++ZZZZNN
076   *    7777M:::::M7777777$777M                       $++IZZZZM
077   *   M777$:::::N777777$M7777M                       +++++ZZZDN
078   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
079   *       N::::::N:7$O:77777777                      N++++ZZZZN
080   *       M::::::::::::N77777777+                   +?+++++ZZZM
081   *       8::::::::::::D77777777M                    O+++++ZZ
082   *        ::::::::::::M777777777N                      O+?D
083   *        M:::::::::::M77777777778                     77=
084   *        D=::::::::::N7777777777N                    777
085   *       INN===::::::=77777777777N                  I777N
086   *      ?777N========N7777777777787M               N7777
087   *      77777$D======N77777777777N777N?         N777777
088   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
089   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
090   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
091   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
092   *   7 :N MNN$$$$M$      $$$777$8      8D8I
093   *     NMM.:7O           777777778
094   *                       7777777MN
095   *                       M NO .7:
096   *                       M   :   M
097   *                            8
098   */
099
100  // Constant matcher factory methods
101
102  /**
103   * Matches any character.
104   *
105   * @since 19.0 (since 1.0 as constant {@code ANY})
106   */
107  public static CharMatcher any() {
108    return Any.INSTANCE;
109  }
110
111  /**
112   * Matches no characters.
113   *
114   * @since 19.0 (since 1.0 as constant {@code NONE})
115   */
116  public static CharMatcher none() {
117    return None.INSTANCE;
118  }
119
120  /**
121   * Determines whether a character is whitespace according to the latest Unicode standard, as
122   * illustrated
123   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
124   * This is not the same definition used by other Java APIs. (See a
125   * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
126   * definitions of "whitespace"</a>.)
127   *
128   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
129   * date.
130   *
131   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
132   */
133  public static CharMatcher whitespace() {
134    return Whitespace.INSTANCE;
135  }
136
137  /**
138   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
139   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
140   * discussion of that term.
141   *
142   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
143   */
144  public static CharMatcher breakingWhitespace() {
145    return BreakingWhitespace.INSTANCE;
146  }
147
148  /**
149   * Determines whether a character is ASCII, meaning that its code point is less than 128.
150   *
151   * @since 19.0 (since 1.0 as constant {@code ASCII})
152   */
153  public static CharMatcher ascii() {
154    return Ascii.INSTANCE;
155  }
156
157  /**
158   * Determines whether a character is a digit according to
159   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
160   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
161   *
162   * @since 19.0 (since 1.0 as constant {@code DIGIT})
163   */
164  public static CharMatcher digit() {
165    return Digit.INSTANCE;
166  }
167
168  /**
169   * Determines whether a character is a digit according to {@linkplain Character#isDigit(char)
170   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
171   * '9')}.
172   *
173   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
174   */
175  public static CharMatcher javaDigit() {
176    return JavaDigit.INSTANCE;
177  }
178
179  /**
180   * Determines whether a character is a letter according to {@linkplain Character#isLetter(char)
181   * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code
182   * inRange('a', 'z').or(inRange('A', 'Z'))}.
183   *
184   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
185   */
186  public static CharMatcher javaLetter() {
187    return JavaLetter.INSTANCE;
188  }
189
190  /**
191   * Determines whether a character is a letter or digit according to
192   * {@linkplain Character#isLetterOrDigit(char) Java's definition}.
193   *
194   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
195   */
196  public static CharMatcher javaLetterOrDigit() {
197    return JavaLetterOrDigit.INSTANCE;
198  }
199
200  /**
201   * Determines whether a character is upper case according to
202   * {@linkplain Character#isUpperCase(char) Java's definition}.
203   *
204   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
205   */
206  public static CharMatcher javaUpperCase() {
207    return JavaUpperCase.INSTANCE;
208  }
209
210  /**
211   * Determines whether a character is lower case according to
212   * {@linkplain Character#isLowerCase(char) Java's definition}.
213   *
214   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
215   */
216  public static CharMatcher javaLowerCase() {
217    return JavaLowerCase.INSTANCE;
218  }
219
220  /**
221   * Determines whether a character is an ISO control character as specified by
222   * {@link Character#isISOControl(char)}.
223   *
224   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
225   */
226  public static CharMatcher javaIsoControl() {
227    return JavaIsoControl.INSTANCE;
228  }
229
230  /**
231   * Determines whether a character is invisible; that is, if its Unicode category is any of
232   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
233   * PRIVATE_USE according to ICU4J.
234   *
235   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
236   */
237  public static CharMatcher invisible() {
238    return Invisible.INSTANCE;
239  }
240
241  /**
242   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
243   * errs on the side of returning {@code false} (that is, it tends to assume a character is
244   * double-width).
245   *
246   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
247   * date.
248   *
249   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
250   */
251  public static CharMatcher singleWidth() {
252    return SingleWidth.INSTANCE;
253  }
254
255  // Legacy constants
256
257  /**
258   * Determines whether a character is whitespace according to the latest Unicode
259   * standard, as illustrated
260   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
261   * This is not the same definition used by other Java APIs. (See a
262   * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">
263   * comparison of several definitions of "whitespace"</a>.)
264   *
265   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant
266   * to keep it up to date.
267   *
268   * @deprecated Use {@link #whitespace()} instead. This constant is scheduled to be
269   *     removed in June 2018.
270   */
271  @Deprecated
272  public static final CharMatcher WHITESPACE = whitespace();
273
274  /**
275   * Determines whether a character is a breaking whitespace (that is, a whitespace
276   * which can be interpreted as a break between words for formatting purposes). See
277   * {@link #whitespace} for a discussion of that term.
278   *
279   * @since 2.0
280   * @deprecated Use {@link #breakingWhitespace()} instead. This constant is scheduled
281   *     to be removed in June 2018.
282   */
283  @Deprecated
284  public static final CharMatcher BREAKING_WHITESPACE = breakingWhitespace();
285
286  /**
287   * Determines whether a character is ASCII, meaning that its code point is less than
288   * 128.
289   *
290   * @deprecated Use {@link #ascii()} instead. This constant is scheduled to be
291   *     removed in June 2018.
292   */
293  @Deprecated
294  public static final CharMatcher ASCII = ascii();
295
296  /**
297   * Determines whether a character is a digit according to
298   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">
299   * Unicode</a>. If you only care to match ASCII digits, you can use
300   * {@code inRange('0', '9')}.
301   *
302   * @deprecated Use {@link #digit()} instead. This constant is scheduled to be
303   *     removed in June 2018.
304   */
305  @Deprecated
306  public static final CharMatcher DIGIT = digit();
307
308  /**
309   * Determines whether a character is a digit according to
310   * {@linkplain Character#isDigit(char) Java's definition}. If you only care to match
311   * ASCII digits, you can use {@code inRange('0', '9')}.
312   *
313   * @deprecated Use {@link #javaDigit()} instead. This constant is scheduled to be
314   *     removed in June 2018.
315   */
316  @Deprecated
317  public static final CharMatcher JAVA_DIGIT = javaDigit();
318
319  /**
320   * Determines whether a character is a letter according to
321   * {@linkplain Character#isLetter(char) Java's definition}. If you only care to
322   * match letters of the Latin alphabet, you can use
323   * {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
324   *
325   * @deprecated Use {@link #javaLetter()} instead. This constant is scheduled to be
326   *     removed in June 2018.
327   */
328  @Deprecated
329  public static final CharMatcher JAVA_LETTER = javaLetter();
330
331  /**
332   * Determines whether a character is a letter or digit according to
333   * {@linkplain Character#isLetterOrDigit(char) Java's definition}.
334   *
335   * @deprecated Use {@link #javaLetterOrDigit()} instead. This constant is scheduled
336   *     to be removed in June 2018.
337   */
338  @Deprecated
339  public static final CharMatcher JAVA_LETTER_OR_DIGIT = javaLetterOrDigit();
340
341  /**
342   * Determines whether a character is upper case according to
343   * {@linkplain Character#isUpperCase(char) Java's definition}.
344   *
345   * @deprecated Use {@link #javaUpperCase()} instead. This constant is scheduled to
346   *     be removed in June 2018.
347   */
348  @Deprecated
349  public static final CharMatcher JAVA_UPPER_CASE = javaUpperCase();
350
351  /**
352   * Determines whether a character is lower case according to
353   * {@linkplain Character#isLowerCase(char) Java's definition}.
354   *
355   * @deprecated Use {@link #javaLowerCase()} instead. This constant is scheduled to
356   *     be removed in June 2018.
357   */
358  @Deprecated
359  public static final CharMatcher JAVA_LOWER_CASE = javaLowerCase();
360
361  /**
362   * Determines whether a character is an ISO control character as specified by
363   * {@link Character#isISOControl(char)}.
364   *
365   * @deprecated Use {@link #javaIsoControl()} instead. This constant is scheduled to
366   *     be removed in June 2018.
367   */
368  @Deprecated
369  public static final CharMatcher JAVA_ISO_CONTROL = javaIsoControl();
370
371  /**
372   * Determines whether a character is invisible; that is, if its Unicode category is
373   * any of SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT,
374   * SURROGATE, and PRIVATE_USE according to ICU4J.
375   *
376   * @deprecated Use {@link #invisible()} instead. This constant is scheduled to be
377   *     removed in June 2018.
378   */
379  @Deprecated
380  public static final CharMatcher INVISIBLE = invisible();
381
382  /**
383   * Determines whether a character is single-width (not double-width). When in doubt,
384   * this matcher errs on the side of returning {@code false} (that is, it tends to
385   * assume a character is double-width).
386   *
387   * <p><b>Note:</b> as the reference file evolves, we will modify this constant to
388   * keep it up to date.
389   *
390   * @deprecated Use {@link #singleWidth()} instead. This constant is scheduled to be
391   *     removed in June 2018.
392   */
393  @Deprecated
394  public static final CharMatcher SINGLE_WIDTH = singleWidth();
395
396  /**
397   * Matches any character.
398   *
399   * @deprecated Use {@link #any()} instead. This constant is scheduled to be
400   *     removed in June 2018.
401   */
402  @Deprecated
403  public static final CharMatcher ANY = any();
404
405  /**
406   * Matches no characters.
407   *
408   * @deprecated Use {@link #none()} instead. This constant is scheduled to be
409   *     removed in June 2018.
410   */
411  @Deprecated
412  public static final CharMatcher NONE = none();
413
414  // Static factories
415
416  /**
417   * Returns a {@code char} matcher that matches only one specified character.
418   */
419  public static CharMatcher is(final char match) {
420    return new Is(match);
421  }
422
423  /**
424   * Returns a {@code char} matcher that matches any character except the one specified.
425   *
426   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
427   */
428  public static CharMatcher isNot(final char match) {
429    return new IsNot(match);
430  }
431
432  /**
433   * Returns a {@code char} matcher that matches any character present in the given character
434   * sequence.
435   */
436  public static CharMatcher anyOf(final CharSequence sequence) {
437    switch (sequence.length()) {
438      case 0:
439        return none();
440      case 1:
441        return is(sequence.charAt(0));
442      case 2:
443        return isEither(sequence.charAt(0), sequence.charAt(1));
444      default:
445        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
446        // matcher?
447        return new AnyOf(sequence);
448    }
449  }
450
451  /**
452   * Returns a {@code char} matcher that matches any character not present in the given character
453   * sequence.
454   */
455  public static CharMatcher noneOf(CharSequence sequence) {
456    return anyOf(sequence).negate();
457  }
458
459  /**
460   * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
461   * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
462   * CharMatcher.inRange('a', 'z')}.
463   *
464   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
465   */
466  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
467    return new InRange(startInclusive, endInclusive);
468  }
469
470  /**
471   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
472   * which operates on primitive {@code char} instances instead.
473   */
474  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
475    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
476  }
477
478  // Constructors
479
480  /**
481   * Constructor for use by subclasses. When subclassing, you may want to override
482   * {@code toString()} to provide a useful description.
483   */
484  protected CharMatcher() {}
485
486  // Abstract methods
487
488  /** Determines a true or false value for the given character. */
489  public abstract boolean matches(char c);
490
491  // Non-static factories
492
493  /**
494   * Returns a matcher that matches any character not matched by this matcher.
495   */
496  public CharMatcher negate() {
497    return new Negated(this);
498  }
499
500  /**
501   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
502   */
503  public CharMatcher and(CharMatcher other) {
504    return new And(this, other);
505  }
506
507  /**
508   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
509   */
510  public CharMatcher or(CharMatcher other) {
511    return new Or(this, other);
512  }
513
514  /**
515   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
516   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
517   * worthwhile only if the precomputed matcher is queried many thousands of times.
518   *
519   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
520   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
521   * worthwhile tradeoff in a browser.
522   */
523  public CharMatcher precomputed() {
524    return Platform.precomputeCharMatcher(this);
525  }
526
527  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
528
529  /**
530   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
531   * on {@link Platform} so that we can have different behavior in GWT.
532   *
533   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
534   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
535   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
536   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
537   * matcher which is faster to query than the original.
538   */
539  @GwtIncompatible // SmallCharMatcher
540  CharMatcher precomputedInternal() {
541    final BitSet table = new BitSet();
542    setBits(table);
543    int totalCharacters = table.cardinality();
544    if (totalCharacters * 2 <= DISTINCT_CHARS) {
545      return precomputedPositive(totalCharacters, table, toString());
546    } else {
547      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
548      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
549      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
550      String suffix = ".negate()";
551      final String description = toString();
552      String negatedDescription =
553          description.endsWith(suffix)
554              ? description.substring(0, description.length() - suffix.length())
555              : description + suffix;
556      return new NegatedFastMatcher(
557          precomputedPositive(negatedCharacters, table, negatedDescription)) {
558        @Override
559        public String toString() {
560          return description;
561        }
562      };
563    }
564  }
565
566  /**
567   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
568   */
569  @GwtIncompatible // SmallCharMatcher
570  private static CharMatcher precomputedPositive(
571      int totalCharacters, BitSet table, String description) {
572    switch (totalCharacters) {
573      case 0:
574        return none();
575      case 1:
576        return is((char) table.nextSetBit(0));
577      case 2:
578        char c1 = (char) table.nextSetBit(0);
579        char c2 = (char) table.nextSetBit(c1 + 1);
580        return isEither(c1, c2);
581      default:
582        return isSmall(totalCharacters, table.length())
583            ? SmallCharMatcher.from(table, description)
584            : new BitSetMatcher(table, description);
585    }
586  }
587
588  @GwtIncompatible // SmallCharMatcher
589  private static boolean isSmall(int totalCharacters, int tableLength) {
590    return totalCharacters <= SmallCharMatcher.MAX_SIZE
591        && tableLength > (totalCharacters * 4 * Character.SIZE);
592    // err on the side of BitSetMatcher
593  }
594
595  /**
596   * Sets bits in {@code table} matched by this matcher.
597   */
598  @GwtIncompatible // used only from other GwtIncompatible code
599  void setBits(BitSet table) {
600    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
601      if (matches((char) c)) {
602        table.set(c);
603      }
604    }
605  }
606
607  // Text processing routines
608
609  /**
610   * Returns {@code true} if a character sequence contains at least one matching character.
611   * Equivalent to {@code !matchesNoneOf(sequence)}.
612   *
613   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
614   * character, until this returns {@code true} or the end is reached.
615   *
616   * @param sequence the character sequence to examine, possibly empty
617   * @return {@code true} if this matcher matches at least one character in the sequence
618   * @since 8.0
619   */
620  public boolean matchesAnyOf(CharSequence sequence) {
621    return !matchesNoneOf(sequence);
622  }
623
624  /**
625   * Returns {@code true} if a character sequence contains only matching characters.
626   *
627   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
628   * character, until this returns {@code false} or the end is reached.
629   *
630   * @param sequence the character sequence to examine, possibly empty
631   * @return {@code true} if this matcher matches every character in the sequence, including when
632   *     the sequence is empty
633   */
634  public boolean matchesAllOf(CharSequence sequence) {
635    for (int i = sequence.length() - 1; i >= 0; i--) {
636      if (!matches(sequence.charAt(i))) {
637        return false;
638      }
639    }
640    return true;
641  }
642
643  /**
644   * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
645   * {@code !matchesAnyOf(sequence)}.
646   *
647   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
648   * character, until this returns {@code true} or the end is reached.
649   *
650   * @param sequence the character sequence to examine, possibly empty
651   * @return {@code true} if this matcher matches no characters in the sequence, including when
652   *     the sequence is empty
653   */
654  public boolean matchesNoneOf(CharSequence sequence) {
655    return indexIn(sequence) == -1;
656  }
657
658  /**
659   * Returns the index of the first matching character in a character sequence, or {@code -1} if no
660   * matching character is present.
661   *
662   * <p>The default implementation iterates over the sequence in forward order calling
663   * {@link #matches} for each character.
664   *
665   * @param sequence the character sequence to examine from the beginning
666   * @return an index, or {@code -1} if no character matches
667   */
668  public int indexIn(CharSequence sequence) {
669    return indexIn(sequence, 0);
670  }
671
672  /**
673   * Returns the index of the first matching character in a character sequence, starting from a
674   * given position, or {@code -1} if no character matches after that position.
675   *
676   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
677   * start}, calling {@link #matches} for each character.
678   *
679   * @param sequence the character sequence to examine
680   * @param start the first index to examine; must be nonnegative and no greater than {@code
681   *        sequence.length()}
682   * @return the index of the first matching character, guaranteed to be no less than {@code start},
683   *     or {@code -1} if no character matches
684   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
685   *         sequence.length()}
686   */
687  public int indexIn(CharSequence sequence, int start) {
688    int length = sequence.length();
689    checkPositionIndex(start, length);
690    for (int i = start; i < length; i++) {
691      if (matches(sequence.charAt(i))) {
692        return i;
693      }
694    }
695    return -1;
696  }
697
698  /**
699   * Returns the index of the last matching character in a character sequence, or {@code -1} if no
700   * matching character is present.
701   *
702   * <p>The default implementation iterates over the sequence in reverse order calling
703   * {@link #matches} for each character.
704   *
705   * @param sequence the character sequence to examine from the end
706   * @return an index, or {@code -1} if no character matches
707   */
708  public int lastIndexIn(CharSequence sequence) {
709    for (int i = sequence.length() - 1; i >= 0; i--) {
710      if (matches(sequence.charAt(i))) {
711        return i;
712      }
713    }
714    return -1;
715  }
716
717  /**
718   * Returns the number of matching characters found in a character sequence.
719   */
720  public int countIn(CharSequence sequence) {
721    int count = 0;
722    for (int i = 0; i < sequence.length(); i++) {
723      if (matches(sequence.charAt(i))) {
724        count++;
725      }
726    }
727    return count;
728  }
729
730  /**
731   * Returns a string containing all non-matching characters of a character sequence, in order. For
732   * example: <pre>   {@code
733   *
734   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
735   *
736   * ... returns {@code "bzr"}.
737   */
738  public String removeFrom(CharSequence sequence) {
739    String string = sequence.toString();
740    int pos = indexIn(string);
741    if (pos == -1) {
742      return string;
743    }
744
745    char[] chars = string.toCharArray();
746    int spread = 1;
747
748    // This unusual loop comes from extensive benchmarking
749    OUT:
750    while (true) {
751      pos++;
752      while (true) {
753        if (pos == chars.length) {
754          break OUT;
755        }
756        if (matches(chars[pos])) {
757          break;
758        }
759        chars[pos - spread] = chars[pos];
760        pos++;
761      }
762      spread++;
763    }
764    return new String(chars, 0, pos - spread);
765  }
766
767  /**
768   * Returns a string containing all matching characters of a character sequence, in order. For
769   * example: <pre>   {@code
770   *
771   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
772   *
773   * ... returns {@code "aaa"}.
774   */
775  public String retainFrom(CharSequence sequence) {
776    return negate().removeFrom(sequence);
777  }
778
779  /**
780   * Returns a string copy of the input character sequence, with each character that matches this
781   * matcher replaced by a given replacement character. For example: <pre>   {@code
782   *
783   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
784   *
785   * ... returns {@code "rodor"}.
786   *
787   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
788   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
789   * character.
790   *
791   * @param sequence the character sequence to replace matching characters in
792   * @param replacement the character to append to the result string in place of each matching
793   *     character in {@code sequence}
794   * @return the new string
795   */
796  public String replaceFrom(CharSequence sequence, char replacement) {
797    String string = sequence.toString();
798    int pos = indexIn(string);
799    if (pos == -1) {
800      return string;
801    }
802    char[] chars = string.toCharArray();
803    chars[pos] = replacement;
804    for (int i = pos + 1; i < chars.length; i++) {
805      if (matches(chars[i])) {
806        chars[i] = replacement;
807      }
808    }
809    return new String(chars);
810  }
811
812  /**
813   * Returns a string copy of the input character sequence, with each character that matches this
814   * matcher replaced by a given replacement sequence. For example: <pre>   {@code
815   *
816   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
817   *
818   * ... returns {@code "yoohoo"}.
819   *
820   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
821   * off calling {@link #replaceFrom(CharSequence, char)} directly.
822   *
823   * @param sequence the character sequence to replace matching characters in
824   * @param replacement the characters to append to the result string in place of each matching
825   *     character in {@code sequence}
826   * @return the new string
827   */
828  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
829    int replacementLen = replacement.length();
830    if (replacementLen == 0) {
831      return removeFrom(sequence);
832    }
833    if (replacementLen == 1) {
834      return replaceFrom(sequence, replacement.charAt(0));
835    }
836
837    String string = sequence.toString();
838    int pos = indexIn(string);
839    if (pos == -1) {
840      return string;
841    }
842
843    int len = string.length();
844    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
845
846    int oldpos = 0;
847    do {
848      buf.append(string, oldpos, pos);
849      buf.append(replacement);
850      oldpos = pos + 1;
851      pos = indexIn(string, oldpos);
852    } while (pos != -1);
853
854    buf.append(string, oldpos, len);
855    return buf.toString();
856  }
857
858  /**
859   * Returns a substring of the input character sequence that omits all characters this matcher
860   * matches from the beginning and from the end of the string. For example: <pre>   {@code
861   *
862   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
863   *
864   * ... returns {@code "cat"}.
865   *
866   * <p>Note that: <pre>   {@code
867   *
868   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
869   *
870   * ... is equivalent to {@link String#trim()}.
871   */
872  public String trimFrom(CharSequence sequence) {
873    int len = sequence.length();
874    int first;
875    int last;
876
877    for (first = 0; first < len; first++) {
878      if (!matches(sequence.charAt(first))) {
879        break;
880      }
881    }
882    for (last = len - 1; last > first; last--) {
883      if (!matches(sequence.charAt(last))) {
884        break;
885      }
886    }
887
888    return sequence.subSequence(first, last + 1).toString();
889  }
890
891  /**
892   * Returns a substring of the input character sequence that omits all characters this matcher
893   * matches from the beginning of the string. For example: <pre> {@code
894   *
895   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
896   *
897   * ... returns {@code "catbab"}.
898   */
899  public String trimLeadingFrom(CharSequence sequence) {
900    int len = sequence.length();
901    for (int first = 0; first < len; first++) {
902      if (!matches(sequence.charAt(first))) {
903        return sequence.subSequence(first, len).toString();
904      }
905    }
906    return "";
907  }
908
909  /**
910   * Returns a substring of the input character sequence that omits all characters this matcher
911   * matches from the end of the string. For example: <pre> {@code
912   *
913   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
914   *
915   * ... returns {@code "abacat"}.
916   */
917  public String trimTrailingFrom(CharSequence sequence) {
918    int len = sequence.length();
919    for (int last = len - 1; last >= 0; last--) {
920      if (!matches(sequence.charAt(last))) {
921        return sequence.subSequence(0, last + 1).toString();
922      }
923    }
924    return "";
925  }
926
927  /**
928   * Returns a string copy of the input character sequence, with each group of consecutive
929   * characters that match this matcher replaced by a single replacement character. For example:
930   * <pre>   {@code
931   *
932   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
933   *
934   * ... returns {@code "b-p-r"}.
935   *
936   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
937   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
938   * character.
939   *
940   * @param sequence the character sequence to replace matching groups of characters in
941   * @param replacement the character to append to the result string in place of each group of
942   *     matching characters in {@code sequence}
943   * @return the new string
944   */
945  public String collapseFrom(CharSequence sequence, char replacement) {
946    // This implementation avoids unnecessary allocation.
947    int len = sequence.length();
948    for (int i = 0; i < len; i++) {
949      char c = sequence.charAt(i);
950      if (matches(c)) {
951        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
952          // a no-op replacement
953          i++;
954        } else {
955          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
956          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
957        }
958      }
959    }
960    // no replacement needed
961    return sequence.toString();
962  }
963
964  /**
965   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
966   * groups of matching characters at the start or end of the sequence are removed without
967   * replacement.
968   */
969  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
970    // This implementation avoids unnecessary allocation.
971    int len = sequence.length();
972    int first = 0;
973    int last = len - 1;
974
975    while (first < len && matches(sequence.charAt(first))) {
976      first++;
977    }
978
979    while (last > first && matches(sequence.charAt(last))) {
980      last--;
981    }
982
983    return (first == 0 && last == len - 1)
984        ? collapseFrom(sequence, replacement)
985        : finishCollapseFrom(
986            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
987  }
988
989  private String finishCollapseFrom(
990      CharSequence sequence,
991      int start,
992      int end,
993      char replacement,
994      StringBuilder builder,
995      boolean inMatchingGroup) {
996    for (int i = start; i < end; i++) {
997      char c = sequence.charAt(i);
998      if (matches(c)) {
999        if (!inMatchingGroup) {
1000          builder.append(replacement);
1001          inMatchingGroup = true;
1002        }
1003      } else {
1004        builder.append(c);
1005        inMatchingGroup = false;
1006      }
1007    }
1008    return builder.toString();
1009  }
1010
1011  /**
1012   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
1013   *     instead.
1014   */
1015  @Deprecated
1016  @Override
1017  public boolean apply(Character character) {
1018    return matches(character);
1019  }
1020
1021  /**
1022   * Returns a string representation of this {@code CharMatcher}, such as
1023   * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
1024   */
1025  @Override
1026  public String toString() {
1027    return super.toString();
1028  }
1029
1030  /**
1031   * Returns the Java Unicode escape sequence for the given character, in the form "\u12AB" where
1032   * "12AB" is the four hexadecimal digits representing the 16 bits of the UTF-16 character.
1033   */
1034  private static String showCharacter(char c) {
1035    String hex = "0123456789ABCDEF";
1036    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
1037    for (int i = 0; i < 4; i++) {
1038      tmp[5 - i] = hex.charAt(c & 0xF);
1039      c = (char) (c >> 4);
1040    }
1041    return String.copyValueOf(tmp);
1042  }
1043
1044  // Fast matchers
1045
1046  /** A matcher for which precomputation will not yield any significant benefit. */
1047  abstract static class FastMatcher extends CharMatcher {
1048
1049    @Override
1050    public final CharMatcher precomputed() {
1051      return this;
1052    }
1053
1054    @Override
1055    public CharMatcher negate() {
1056      return new NegatedFastMatcher(this);
1057    }
1058  }
1059
1060  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
1061  abstract static class NamedFastMatcher extends FastMatcher {
1062
1063    private final String description;
1064
1065    NamedFastMatcher(String description) {
1066      this.description = checkNotNull(description);
1067    }
1068
1069    @Override
1070    public final String toString() {
1071      return description;
1072    }
1073  }
1074
1075  /** Negation of a {@link FastMatcher}. */
1076  static class NegatedFastMatcher extends Negated {
1077
1078    NegatedFastMatcher(CharMatcher original) {
1079      super(original);
1080    }
1081
1082    @Override
1083    public final CharMatcher precomputed() {
1084      return this;
1085    }
1086  }
1087
1088  /** Fast matcher using a {@link BitSet} table of matching characters. */
1089  @GwtIncompatible // used only from other GwtIncompatible code
1090  private static final class BitSetMatcher extends NamedFastMatcher {
1091
1092    private final BitSet table;
1093
1094    private BitSetMatcher(BitSet table, String description) {
1095      super(description);
1096      if (table.length() + Long.SIZE < table.size()) {
1097        table = (BitSet) table.clone();
1098        // If only we could actually call BitSet.trimToSize() ourselves...
1099      }
1100      this.table = table;
1101    }
1102
1103    @Override
1104    public boolean matches(char c) {
1105      return table.get(c);
1106    }
1107
1108    @Override
1109    void setBits(BitSet bitSet) {
1110      bitSet.or(table);
1111    }
1112  }
1113
1114  // Static constant implementation classes
1115
1116  /** Implementation of {@link #any()}. */
1117  private static final class Any extends NamedFastMatcher {
1118
1119    static final Any INSTANCE = new Any();
1120
1121    private Any() {
1122      super("CharMatcher.any()");
1123    }
1124
1125    @Override
1126    public boolean matches(char c) {
1127      return true;
1128    }
1129
1130    @Override
1131    public int indexIn(CharSequence sequence) {
1132      return (sequence.length() == 0) ? -1 : 0;
1133    }
1134
1135    @Override
1136    public int indexIn(CharSequence sequence, int start) {
1137      int length = sequence.length();
1138      checkPositionIndex(start, length);
1139      return (start == length) ? -1 : start;
1140    }
1141
1142    @Override
1143    public int lastIndexIn(CharSequence sequence) {
1144      return sequence.length() - 1;
1145    }
1146
1147    @Override
1148    public boolean matchesAllOf(CharSequence sequence) {
1149      checkNotNull(sequence);
1150      return true;
1151    }
1152
1153    @Override
1154    public boolean matchesNoneOf(CharSequence sequence) {
1155      return sequence.length() == 0;
1156    }
1157
1158    @Override
1159    public String removeFrom(CharSequence sequence) {
1160      checkNotNull(sequence);
1161      return "";
1162    }
1163
1164    @Override
1165    public String replaceFrom(CharSequence sequence, char replacement) {
1166      char[] array = new char[sequence.length()];
1167      Arrays.fill(array, replacement);
1168      return new String(array);
1169    }
1170
1171    @Override
1172    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1173      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1174      for (int i = 0; i < sequence.length(); i++) {
1175        result.append(replacement);
1176      }
1177      return result.toString();
1178    }
1179
1180    @Override
1181    public String collapseFrom(CharSequence sequence, char replacement) {
1182      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1183    }
1184
1185    @Override
1186    public String trimFrom(CharSequence sequence) {
1187      checkNotNull(sequence);
1188      return "";
1189    }
1190
1191    @Override
1192    public int countIn(CharSequence sequence) {
1193      return sequence.length();
1194    }
1195
1196    @Override
1197    public CharMatcher and(CharMatcher other) {
1198      return checkNotNull(other);
1199    }
1200
1201    @Override
1202    public CharMatcher or(CharMatcher other) {
1203      checkNotNull(other);
1204      return this;
1205    }
1206
1207    @Override
1208    public CharMatcher negate() {
1209      return none();
1210    }
1211  }
1212
1213  /** Implementation of {@link #none()}. */
1214  private static final class None extends NamedFastMatcher {
1215
1216    static final None INSTANCE = new None();
1217
1218    private None() {
1219      super("CharMatcher.none()");
1220    }
1221
1222    @Override
1223    public boolean matches(char c) {
1224      return false;
1225    }
1226
1227    @Override
1228    public int indexIn(CharSequence sequence) {
1229      checkNotNull(sequence);
1230      return -1;
1231    }
1232
1233    @Override
1234    public int indexIn(CharSequence sequence, int start) {
1235      int length = sequence.length();
1236      checkPositionIndex(start, length);
1237      return -1;
1238    }
1239
1240    @Override
1241    public int lastIndexIn(CharSequence sequence) {
1242      checkNotNull(sequence);
1243      return -1;
1244    }
1245
1246    @Override
1247    public boolean matchesAllOf(CharSequence sequence) {
1248      return sequence.length() == 0;
1249    }
1250
1251    @Override
1252    public boolean matchesNoneOf(CharSequence sequence) {
1253      checkNotNull(sequence);
1254      return true;
1255    }
1256
1257    @Override
1258    public String removeFrom(CharSequence sequence) {
1259      return sequence.toString();
1260    }
1261
1262    @Override
1263    public String replaceFrom(CharSequence sequence, char replacement) {
1264      return sequence.toString();
1265    }
1266
1267    @Override
1268    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1269      checkNotNull(replacement);
1270      return sequence.toString();
1271    }
1272
1273    @Override
1274    public String collapseFrom(CharSequence sequence, char replacement) {
1275      return sequence.toString();
1276    }
1277
1278    @Override
1279    public String trimFrom(CharSequence sequence) {
1280      return sequence.toString();
1281    }
1282
1283    @Override
1284    public String trimLeadingFrom(CharSequence sequence) {
1285      return sequence.toString();
1286    }
1287
1288    @Override
1289    public String trimTrailingFrom(CharSequence sequence) {
1290      return sequence.toString();
1291    }
1292
1293    @Override
1294    public int countIn(CharSequence sequence) {
1295      checkNotNull(sequence);
1296      return 0;
1297    }
1298
1299    @Override
1300    public CharMatcher and(CharMatcher other) {
1301      checkNotNull(other);
1302      return this;
1303    }
1304
1305    @Override
1306    public CharMatcher or(CharMatcher other) {
1307      return checkNotNull(other);
1308    }
1309
1310    @Override
1311    public CharMatcher negate() {
1312      return any();
1313    }
1314  }
1315
1316  /** Implementation of {@link #whitespace()}. */
1317  @VisibleForTesting
1318  static final class Whitespace extends NamedFastMatcher {
1319
1320    static final String TABLE =
1321        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1322            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1323            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1324            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1325    static final int MULTIPLIER = 1682554634;
1326    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1327
1328    static final Whitespace INSTANCE = new Whitespace();
1329
1330    Whitespace() {
1331      super("CharMatcher.whitespace()");
1332    }
1333
1334    @Override
1335    public boolean matches(char c) {
1336      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1337    }
1338
1339    @GwtIncompatible // used only from other GwtIncompatible code
1340    @Override
1341    void setBits(BitSet table) {
1342      for (int i = 0; i < TABLE.length(); i++) {
1343        table.set(TABLE.charAt(i));
1344      }
1345    }
1346  }
1347
1348  /** Implementation of {@link #breakingWhitespace()}. */
1349  private static final class BreakingWhitespace extends CharMatcher {
1350
1351    static final CharMatcher INSTANCE = new BreakingWhitespace();
1352
1353    @Override
1354    public boolean matches(char c) {
1355      switch (c) {
1356        case '\t':
1357        case '\n':
1358        case '\013':
1359        case '\f':
1360        case '\r':
1361        case ' ':
1362        case '\u0085':
1363        case '\u1680':
1364        case '\u2028':
1365        case '\u2029':
1366        case '\u205f':
1367        case '\u3000':
1368          return true;
1369        case '\u2007':
1370          return false;
1371        default:
1372          return c >= '\u2000' && c <= '\u200a';
1373      }
1374    }
1375
1376    @Override
1377    public String toString() {
1378      return "CharMatcher.breakingWhitespace()";
1379    }
1380  }
1381
1382  /** Implementation of {@link #ascii()}. */
1383  private static final class Ascii extends NamedFastMatcher {
1384
1385    static final Ascii INSTANCE = new Ascii();
1386
1387    Ascii() {
1388      super("CharMatcher.ascii()");
1389    }
1390
1391    @Override
1392    public boolean matches(char c) {
1393      return c <= '\u007f';
1394    }
1395  }
1396
1397  /** Implementation that matches characters that fall within multiple ranges. */
1398  private static class RangesMatcher extends CharMatcher {
1399
1400    private final String description;
1401    private final char[] rangeStarts;
1402    private final char[] rangeEnds;
1403
1404    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1405      this.description = description;
1406      this.rangeStarts = rangeStarts;
1407      this.rangeEnds = rangeEnds;
1408      checkArgument(rangeStarts.length == rangeEnds.length);
1409      for (int i = 0; i < rangeStarts.length; i++) {
1410        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1411        if (i + 1 < rangeStarts.length) {
1412          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1413        }
1414      }
1415    }
1416
1417    @Override
1418    public boolean matches(char c) {
1419      int index = Arrays.binarySearch(rangeStarts, c);
1420      if (index >= 0) {
1421        return true;
1422      } else {
1423        index = ~index - 1;
1424        return index >= 0 && c <= rangeEnds[index];
1425      }
1426    }
1427
1428    @Override
1429    public String toString() {
1430      return description;
1431    }
1432  }
1433
1434  /** Implementation of {@link #digit()}. */
1435  private static final class Digit extends RangesMatcher {
1436
1437    // Must be in ascending order.
1438    private static final String ZEROES =
1439        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66"
1440            + "\u0be6\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810"
1441            + "\u1946\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
1442
1443    private static char[] zeroes() {
1444      return ZEROES.toCharArray();
1445    }
1446
1447    private static char[] nines() {
1448      char[] nines = new char[ZEROES.length()];
1449      for (int i = 0; i < ZEROES.length(); i++) {
1450        nines[i] = (char) (ZEROES.charAt(i) + 9);
1451      }
1452      return nines;
1453    }
1454
1455    static final Digit INSTANCE = new Digit();
1456
1457    private Digit() {
1458      super("CharMatcher.digit()", zeroes(), nines());
1459    }
1460  }
1461
1462  /** Implementation of {@link #javaDigit()}. */
1463  private static final class JavaDigit extends CharMatcher {
1464
1465    static final JavaDigit INSTANCE = new JavaDigit();
1466
1467    @Override
1468    public boolean matches(char c) {
1469      return Character.isDigit(c);
1470    }
1471
1472    @Override
1473    public String toString() {
1474      return "CharMatcher.javaDigit()";
1475    }
1476  }
1477
1478  /** Implementation of {@link #javaLetter()}. */
1479  private static final class JavaLetter extends CharMatcher {
1480
1481    static final JavaLetter INSTANCE = new JavaLetter();
1482
1483    @Override
1484    public boolean matches(char c) {
1485      return Character.isLetter(c);
1486    }
1487
1488    @Override
1489    public String toString() {
1490      return "CharMatcher.javaLetter()";
1491    }
1492  }
1493
1494  /** Implementation of {@link #javaLetterOrDigit()}. */
1495  private static final class JavaLetterOrDigit extends CharMatcher {
1496
1497    static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit();
1498
1499    @Override
1500    public boolean matches(char c) {
1501      return Character.isLetterOrDigit(c);
1502    }
1503
1504    @Override
1505    public String toString() {
1506      return "CharMatcher.javaLetterOrDigit()";
1507    }
1508  }
1509
1510  /** Implementation of {@link #javaUpperCase()}. */
1511  private static final class JavaUpperCase extends CharMatcher {
1512
1513    static final JavaUpperCase INSTANCE = new JavaUpperCase();
1514
1515    @Override
1516    public boolean matches(char c) {
1517      return Character.isUpperCase(c);
1518    }
1519
1520    @Override
1521    public String toString() {
1522      return "CharMatcher.javaUpperCase()";
1523    }
1524  }
1525
1526  /** Implementation of {@link #javaLowerCase()}. */
1527  private static final class JavaLowerCase extends CharMatcher {
1528
1529    static final JavaLowerCase INSTANCE = new JavaLowerCase();
1530
1531    @Override
1532    public boolean matches(char c) {
1533      return Character.isLowerCase(c);
1534    }
1535
1536    @Override
1537    public String toString() {
1538      return "CharMatcher.javaLowerCase()";
1539    }
1540  }
1541
1542  /** Implementation of {@link #javaIsoControl()}. */
1543  private static final class JavaIsoControl extends NamedFastMatcher {
1544
1545    static final JavaIsoControl INSTANCE = new JavaIsoControl();
1546
1547    private JavaIsoControl() {
1548      super("CharMatcher.javaIsoControl()");
1549    }
1550
1551    @Override
1552    public boolean matches(char c) {
1553      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1554    }
1555  }
1556
1557  /** Implementation of {@link #invisible()}. */
1558  private static final class Invisible extends RangesMatcher {
1559
1560    private static final String RANGE_STARTS =
1561        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067"
1562            + "\u2068\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa";
1563    private static final String RANGE_ENDS =
1564        "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067"
1565            + "\u2068\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb";
1566
1567    static final Invisible INSTANCE = new Invisible();
1568
1569    private Invisible() {
1570      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1571    }
1572  }
1573
1574  /** Implementation of {@link #singleWidth()}. */
1575  private static final class SingleWidth extends RangesMatcher {
1576
1577    static final SingleWidth INSTANCE = new SingleWidth();
1578
1579    private SingleWidth() {
1580      super(
1581          "CharMatcher.singleWidth()",
1582          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1583          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1584    }
1585  }
1586
1587  // Non-static factory implementation classes
1588
1589  /** Implementation of {@link #negate()}. */
1590  private static class Negated extends CharMatcher {
1591
1592    final CharMatcher original;
1593
1594    Negated(CharMatcher original) {
1595      this.original = checkNotNull(original);
1596    }
1597
1598    @Override
1599    public boolean matches(char c) {
1600      return !original.matches(c);
1601    }
1602
1603    @Override
1604    public boolean matchesAllOf(CharSequence sequence) {
1605      return original.matchesNoneOf(sequence);
1606    }
1607
1608    @Override
1609    public boolean matchesNoneOf(CharSequence sequence) {
1610      return original.matchesAllOf(sequence);
1611    }
1612
1613    @Override
1614    public int countIn(CharSequence sequence) {
1615      return sequence.length() - original.countIn(sequence);
1616    }
1617
1618    @GwtIncompatible // used only from other GwtIncompatible code
1619    @Override
1620    void setBits(BitSet table) {
1621      BitSet tmp = new BitSet();
1622      original.setBits(tmp);
1623      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1624      table.or(tmp);
1625    }
1626
1627    @Override
1628    public CharMatcher negate() {
1629      return original;
1630    }
1631
1632    @Override
1633    public String toString() {
1634      return original + ".negate()";
1635    }
1636  }
1637
1638  /** Implementation of {@link #and(CharMatcher)}. */
1639  private static final class And extends CharMatcher {
1640
1641    final CharMatcher first;
1642    final CharMatcher second;
1643
1644    And(CharMatcher a, CharMatcher b) {
1645      first = checkNotNull(a);
1646      second = checkNotNull(b);
1647    }
1648
1649    @Override
1650    public boolean matches(char c) {
1651      return first.matches(c) && second.matches(c);
1652    }
1653
1654    @GwtIncompatible // used only from other GwtIncompatible code
1655    @Override
1656    void setBits(BitSet table) {
1657      BitSet tmp1 = new BitSet();
1658      first.setBits(tmp1);
1659      BitSet tmp2 = new BitSet();
1660      second.setBits(tmp2);
1661      tmp1.and(tmp2);
1662      table.or(tmp1);
1663    }
1664
1665    @Override
1666    public String toString() {
1667      return "CharMatcher.and(" + first + ", " + second + ")";
1668    }
1669  }
1670
1671  /** Implementation of {@link #or(CharMatcher)}. */
1672  private static final class Or extends CharMatcher {
1673
1674    final CharMatcher first;
1675    final CharMatcher second;
1676
1677    Or(CharMatcher a, CharMatcher b) {
1678      first = checkNotNull(a);
1679      second = checkNotNull(b);
1680    }
1681
1682    @GwtIncompatible // used only from other GwtIncompatible code
1683    @Override
1684    void setBits(BitSet table) {
1685      first.setBits(table);
1686      second.setBits(table);
1687    }
1688
1689    @Override
1690    public boolean matches(char c) {
1691      return first.matches(c) || second.matches(c);
1692    }
1693
1694    @Override
1695    public String toString() {
1696      return "CharMatcher.or(" + first + ", " + second + ")";
1697    }
1698  }
1699
1700  // Static factory implementations
1701
1702  /** Implementation of {@link #is(char)}. */
1703  private static final class Is extends FastMatcher {
1704
1705    private final char match;
1706
1707    Is(char match) {
1708      this.match = match;
1709    }
1710
1711    @Override
1712    public boolean matches(char c) {
1713      return c == match;
1714    }
1715
1716    @Override
1717    public String replaceFrom(CharSequence sequence, char replacement) {
1718      return sequence.toString().replace(match, replacement);
1719    }
1720
1721    @Override
1722    public CharMatcher and(CharMatcher other) {
1723      return other.matches(match) ? this : none();
1724    }
1725
1726    @Override
1727    public CharMatcher or(CharMatcher other) {
1728      return other.matches(match) ? other : super.or(other);
1729    }
1730
1731    @Override
1732    public CharMatcher negate() {
1733      return isNot(match);
1734    }
1735
1736    @GwtIncompatible // used only from other GwtIncompatible code
1737    @Override
1738    void setBits(BitSet table) {
1739      table.set(match);
1740    }
1741
1742    @Override
1743    public String toString() {
1744      return "CharMatcher.is('" + showCharacter(match) + "')";
1745    }
1746  }
1747
1748  /** Implementation of {@link #isNot(char)}. */
1749  private static final class IsNot extends FastMatcher {
1750
1751    private final char match;
1752
1753    IsNot(char match) {
1754      this.match = match;
1755    }
1756
1757    @Override
1758    public boolean matches(char c) {
1759      return c != match;
1760    }
1761
1762    @Override
1763    public CharMatcher and(CharMatcher other) {
1764      return other.matches(match) ? super.and(other) : other;
1765    }
1766
1767    @Override
1768    public CharMatcher or(CharMatcher other) {
1769      return other.matches(match) ? any() : this;
1770    }
1771
1772    @GwtIncompatible // used only from other GwtIncompatible code
1773    @Override
1774    void setBits(BitSet table) {
1775      table.set(0, match);
1776      table.set(match + 1, Character.MAX_VALUE + 1);
1777    }
1778
1779    @Override
1780    public CharMatcher negate() {
1781      return is(match);
1782    }
1783
1784    @Override
1785    public String toString() {
1786      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1787    }
1788  }
1789
1790  private static CharMatcher.IsEither isEither(char c1, char c2) {
1791    return new CharMatcher.IsEither(c1, c2);
1792  }
1793
1794  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1795  private static final class IsEither extends FastMatcher {
1796
1797    private final char match1;
1798    private final char match2;
1799
1800    IsEither(char match1, char match2) {
1801      this.match1 = match1;
1802      this.match2 = match2;
1803    }
1804
1805    @Override
1806    public boolean matches(char c) {
1807      return c == match1 || c == match2;
1808    }
1809
1810    @GwtIncompatible // used only from other GwtIncompatible code
1811    @Override
1812    void setBits(BitSet table) {
1813      table.set(match1);
1814      table.set(match2);
1815    }
1816
1817    @Override
1818    public String toString() {
1819      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1820    }
1821  }
1822
1823  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1824  private static final class AnyOf extends CharMatcher {
1825
1826    private final char[] chars;
1827
1828    public AnyOf(CharSequence chars) {
1829      this.chars = chars.toString().toCharArray();
1830      Arrays.sort(this.chars);
1831    }
1832
1833    @Override
1834    public boolean matches(char c) {
1835      return Arrays.binarySearch(chars, c) >= 0;
1836    }
1837
1838    @Override
1839    @GwtIncompatible // used only from other GwtIncompatible code
1840    void setBits(BitSet table) {
1841      for (char c : chars) {
1842        table.set(c);
1843      }
1844    }
1845
1846    @Override
1847    public String toString() {
1848      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1849      for (char c : chars) {
1850        description.append(showCharacter(c));
1851      }
1852      description.append("\")");
1853      return description.toString();
1854    }
1855  }
1856
1857  /** Implementation of {@link #inRange(char, char)}. */
1858  private static final class InRange extends FastMatcher {
1859
1860    private final char startInclusive;
1861    private final char endInclusive;
1862
1863    InRange(char startInclusive, char endInclusive) {
1864      checkArgument(endInclusive >= startInclusive);
1865      this.startInclusive = startInclusive;
1866      this.endInclusive = endInclusive;
1867    }
1868
1869    @Override
1870    public boolean matches(char c) {
1871      return startInclusive <= c && c <= endInclusive;
1872    }
1873
1874    @GwtIncompatible // used only from other GwtIncompatible code
1875    @Override
1876    void setBits(BitSet table) {
1877      table.set(startInclusive, endInclusive + 1);
1878    }
1879
1880    @Override
1881    public String toString() {
1882      return "CharMatcher.inRange('"
1883          + showCharacter(startInclusive)
1884          + "', '"
1885          + showCharacter(endInclusive)
1886          + "')";
1887    }
1888  }
1889
1890  /** Implementation of {@link #forPredicate(Predicate)}. */
1891  private static final class ForPredicate extends CharMatcher {
1892
1893    private final Predicate<? super Character> predicate;
1894
1895    ForPredicate(Predicate<? super Character> predicate) {
1896      this.predicate = checkNotNull(predicate);
1897    }
1898
1899    @Override
1900    public boolean matches(char c) {
1901      return predicate.apply(c);
1902    }
1903
1904    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1905    @Override
1906    public boolean apply(Character character) {
1907      return predicate.apply(checkNotNull(character));
1908    }
1909
1910    @Override
1911    public String toString() {
1912      return "CharMatcher.forPredicate(" + predicate + ")";
1913    }
1914  }
1915}