001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.base;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021import static com.google.common.base.Preconditions.checkPositionIndex;
022
023import com.google.common.annotations.Beta;
024import com.google.common.annotations.GwtCompatible;
025import com.google.common.annotations.GwtIncompatible;
026import com.google.common.annotations.VisibleForTesting;
027
028import java.util.Arrays;
029import java.util.BitSet;
030
031import javax.annotation.CheckReturnValue;
032
033/**
034 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
035 * for any {@link Object}. Also offers basic text processing methods based on this function.
036 * Implementations are strongly encouraged to be side-effect-free and immutable.
037 *
038 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
039 * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
040 *
041 * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
042 * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
043 * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
044 * treats these just as two separate characters.
045 *
046 * <p>Example usages: <pre>
047 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
048 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
049 *
050 * <p>See the Guava User Guide article on <a href=
051 * "https://github.com/google/guava/wiki/StringsExplained#charmatcher">
052 * {@code CharMatcher}</a>.
053 *
054 * @author Kevin Bourrillion
055 * @since 1.0
056 */
057@Beta // Possibly change from chars to code points; decide constants vs. methods
058@GwtCompatible(emulated = true)
059public abstract class CharMatcher implements Predicate<Character> {
060
061  // Constant matcher factory methods
062
063  /**
064   * Matches any character.
065   *
066   * @since 19.0 (since 1.0 as constant {@code ANY})
067   */
068  public static CharMatcher any() {
069    return Any.INSTANCE;
070  }
071
072  /**
073   * Matches no characters.
074   *
075   * @since 19.0 (since 1.0 as constant {@code NONE})
076   */
077  public static CharMatcher none() {
078    return None.INSTANCE;
079  }
080
081  /**
082   * Determines whether a character is whitespace according to the latest Unicode standard, as
083   * illustrated <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
084   * This is not the same definition used by other Java APIs. (See a <a
085   * href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
086   * definitions of "whitespace"</a>.)
087   *
088   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up
089   * to date.
090   *
091   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
092   */
093  public static CharMatcher whitespace() {
094    return Whitespace.INSTANCE;
095  }
096
097  /**
098   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
099   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
100   * discussion of that term.
101   *
102   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
103   */
104  public static CharMatcher breakingWhitespace() {
105    return BreakingWhitespace.INSTANCE;
106  }
107
108  /**
109   * Determines whether a character is ASCII, meaning that its code point is less than 128.
110   *
111   * @since 19.0 (since 1.0 as constant {@code ASCII})
112   */
113  public static CharMatcher ascii() {
114    return Ascii.INSTANCE;
115  }
116
117  /**
118   * Determines whether a character is a digit according to
119   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
120   * If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
121   *
122   * @since 19.0 (since 1.0 as constant {@code DIGIT})
123   */
124  public static CharMatcher digit() {
125    return Digit.INSTANCE;
126  }
127
128  /**
129   * Determines whether a character is a digit according to {@linkplain Character#isDigit(char)
130   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
131   * '9')}.
132   *
133   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
134   */
135  public static CharMatcher javaDigit() {
136    return JavaDigit.INSTANCE;
137  }
138
139  /**
140   * Determines whether a character is a letter according to {@linkplain Character#isLetter(char)
141   * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code
142   * inRange('a', 'z').or(inRange('A', 'Z'))}.
143   *
144   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
145   */
146  public static CharMatcher javaLetter() {
147    return JavaLetter.INSTANCE;
148  }
149
150  /**
151   * Determines whether a character is a letter or digit according to {@linkplain
152   * Character#isLetterOrDigit(char) Java's definition}.
153   *
154   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
155   */
156  public static CharMatcher javaLetterOrDigit() {
157    return JavaLetterOrDigit.INSTANCE;
158  }
159
160  /**
161   * Determines whether a character is upper case according to {@linkplain
162   * Character#isUpperCase(char) Java's definition}.
163   *
164   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
165   */
166  public static CharMatcher javaUpperCase() {
167    return JavaUpperCase.INSTANCE;
168  }
169
170  /**
171   * Determines whether a character is lower case according to {@linkplain
172   * Character#isLowerCase(char) Java's definition}.
173   *
174   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
175   */
176  public static CharMatcher javaLowerCase() {
177    return JavaLowerCase.INSTANCE;
178  }
179
180  /**
181   * Determines whether a character is an ISO control character as specified by {@link
182   * Character#isISOControl(char)}.
183   *
184   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
185   */
186  public static CharMatcher javaIsoControl() {
187    return JavaIsoControl.INSTANCE;
188  }
189
190  /**
191   * Determines whether a character is invisible; that is, if its Unicode category is any of
192   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
193   * PRIVATE_USE according to ICU4J.
194   *
195   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
196   */
197  public static CharMatcher invisible() {
198    return Invisible.INSTANCE;
199  }
200
201  /**
202   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
203   * errs on the side of returning {@code false} (that is, it tends to assume a character is
204   * double-width).
205   *
206   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
207   * date.
208   *
209   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
210   */
211  public static CharMatcher singleWidth() {
212    return SingleWidth.INSTANCE;
213  }
214
215  // Legacy constants
216  // TODO(cgdecker): Deprecate these so they can be removed eventually
217
218  /**
219   * Determines whether a character is whitespace according to the latest Unicode standard, as
220   * illustrated <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
221   * This is not the same definition used by other Java APIs. (See a <a
222   * href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
223   * definitions of "whitespace"</a>.)
224   *
225   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
226   * to date.
227   */
228  public static final CharMatcher WHITESPACE = whitespace();
229
230  /**
231   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
232   * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
233   * discussion of that term.
234   *
235   * @since 2.0
236   */
237  public static final CharMatcher BREAKING_WHITESPACE = breakingWhitespace();
238
239  /**
240   * Determines whether a character is ASCII, meaning that its code point is less than 128.
241   */
242  public static final CharMatcher ASCII = ascii();
243
244  /**
245   * Determines whether a character is a digit according to
246   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
247   * If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
248   */
249  public static final CharMatcher DIGIT = digit();
250
251  /**
252   * Determines whether a character is a digit according to {@linkplain Character#isDigit(char)
253   * Java's definition}. If you only care to match ASCII digits, you can use {@code
254   * inRange('0', '9')}.
255   */
256  public static final CharMatcher JAVA_DIGIT = javaDigit();
257
258  /**
259   * Determines whether a character is a letter according to {@linkplain Character#isLetter(char)
260   * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code
261   * inRange('a', 'z').or(inRange('A', 'Z'))}.
262   */
263  public static final CharMatcher JAVA_LETTER = javaLetter();
264
265  /**
266   * Determines whether a character is a letter or digit according to {@linkplain
267   * Character#isLetterOrDigit(char) Java's definition}.
268   */
269  public static final CharMatcher JAVA_LETTER_OR_DIGIT = javaLetterOrDigit();
270
271  /**
272   * Determines whether a character is upper case according to {@linkplain
273   * Character#isUpperCase(char) Java's definition}.
274   */
275  public static final CharMatcher JAVA_UPPER_CASE = javaUpperCase();
276
277  /**
278   * Determines whether a character is lower case according to {@linkplain
279   * Character#isLowerCase(char) Java's definition}.
280   */
281  public static final CharMatcher JAVA_LOWER_CASE = javaLowerCase();
282
283  /**
284   * Determines whether a character is an ISO control character as specified by {@link
285   * Character#isISOControl(char)}.
286   */
287  public static final CharMatcher JAVA_ISO_CONTROL = javaIsoControl();
288
289  /**
290   * Determines whether a character is invisible; that is, if its Unicode category is any of
291   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
292   * PRIVATE_USE according to ICU4J.
293   */
294  public static final CharMatcher INVISIBLE = invisible();
295
296  /**
297   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
298   * errs on the side of returning {@code false} (that is, it tends to assume a character is
299   * double-width).
300   *
301   * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
302   * date.
303   */
304  public static final CharMatcher SINGLE_WIDTH = singleWidth();
305
306  /** Matches any character. */
307  public static final CharMatcher ANY = any();
308
309  /** Matches no characters. */
310  public static final CharMatcher NONE = none();
311
312  // Static factories
313
314  /**
315   * Returns a {@code char} matcher that matches only one specified character.
316   */
317  public static CharMatcher is(final char match) {
318    return new Is(match);
319  }
320
321  /**
322   * Returns a {@code char} matcher that matches any character except the one specified.
323   *
324   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
325   */
326  public static CharMatcher isNot(final char match) {
327    return new IsNot(match);
328  }
329
330  /**
331   * Returns a {@code char} matcher that matches any character present in the given character
332   * sequence.
333   */
334  public static CharMatcher anyOf(final CharSequence sequence) {
335    switch (sequence.length()) {
336      case 0:
337        return none();
338      case 1:
339        return is(sequence.charAt(0));
340      case 2:
341        return isEither(sequence.charAt(0), sequence.charAt(1));
342      default:
343        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
344        // matcher?
345        return new AnyOf(sequence);
346    }
347  }
348
349  /**
350   * Returns a {@code char} matcher that matches any character not present in the given character
351   * sequence.
352   */
353  public static CharMatcher noneOf(CharSequence sequence) {
354    return anyOf(sequence).negate();
355  }
356
357  /**
358   * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
359   * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
360   * CharMatcher.inRange('a', 'z')}.
361   *
362   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
363   */
364  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
365    return new InRange(startInclusive, endInclusive);
366  }
367
368  /**
369   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
370   * which operates on primitive {@code char} instances instead.
371   */
372  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
373    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
374  }
375
376  // Constructors
377
378  /**
379   * Constructor for use by subclasses. When subclassing, you may want to override
380   * {@code toString()} to provide a useful description.
381   */
382  protected CharMatcher() {}
383
384  // Abstract methods
385
386  /** Determines a true or false value for the given character. */
387  public abstract boolean matches(char c);
388
389  // Non-static factories
390
391  /**
392   * Returns a matcher that matches any character not matched by this matcher.
393   */
394  public CharMatcher negate() {
395    return new Negated(this);
396  }
397
398  /**
399   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
400   */
401  public CharMatcher and(CharMatcher other) {
402    return new And(this, other);
403  }
404
405  /**
406   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
407   */
408  public CharMatcher or(CharMatcher other) {
409    return new Or(this, other);
410  }
411
412  /**
413   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
414   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
415   * worthwhile only if the precomputed matcher is queried many thousands of times.
416   *
417   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
418   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
419   * worthwhile tradeoff in a browser.
420   */
421  public CharMatcher precomputed() {
422    return Platform.precomputeCharMatcher(this);
423  }
424
425  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
426
427  /**
428   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a
429   * method on {@link Platform} so that we can have different behavior in GWT.
430   *
431   * <p>This implementation tries to be smart in a number of ways.  It recognizes cases where
432   * the negation is cheaper to precompute than the matcher itself; it tries to build small
433   * hash tables for matchers that only match a few characters, and so on.  In the worst-case
434   * scenario, it constructs an eight-kilobyte bit array and queries that.
435   * In many situations this produces a matcher which is faster to query than the original.
436   */
437  @GwtIncompatible("java.util.BitSet")
438  CharMatcher precomputedInternal() {
439    final BitSet table = new BitSet();
440    setBits(table);
441    int totalCharacters = table.cardinality();
442    if (totalCharacters * 2 <= DISTINCT_CHARS) {
443      return precomputedPositive(totalCharacters, table, toString());
444    } else {
445      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
446      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
447      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
448      String suffix = ".negate()";
449      final String description = toString();
450      String negatedDescription =
451          description.endsWith(suffix)
452              ? description.substring(0, description.length() - suffix.length())
453              : description + suffix;
454      return new NegatedFastMatcher(
455          precomputedPositive(negatedCharacters, table, negatedDescription)) {
456        @Override
457        public String toString() {
458          return description;
459        }
460      };
461    }
462  }
463
464  /**
465   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
466   */
467  @GwtIncompatible("java.util.BitSet")
468  private static CharMatcher precomputedPositive(
469      int totalCharacters, BitSet table, String description) {
470    switch (totalCharacters) {
471      case 0:
472        return none();
473      case 1:
474        return is((char) table.nextSetBit(0));
475      case 2:
476        char c1 = (char) table.nextSetBit(0);
477        char c2 = (char) table.nextSetBit(c1 + 1);
478        return isEither(c1, c2);
479      default:
480        return isSmall(totalCharacters, table.length())
481            ? SmallCharMatcher.from(table, description)
482            : new BitSetMatcher(table, description);
483    }
484  }
485
486  @GwtIncompatible("SmallCharMatcher")
487  private static boolean isSmall(int totalCharacters, int tableLength) {
488    return totalCharacters <= SmallCharMatcher.MAX_SIZE
489        && tableLength > (totalCharacters * 4 * Character.SIZE);
490    // err on the side of BitSetMatcher
491  }
492
493  /**
494   * Sets bits in {@code table} matched by this matcher.
495   */
496  @GwtIncompatible("java.util.BitSet")
497  void setBits(BitSet table) {
498    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
499      if (matches((char) c)) {
500        table.set(c);
501      }
502    }
503  }
504
505  // Text processing routines
506
507  /**
508   * Returns {@code true} if a character sequence contains at least one matching character.
509   * Equivalent to {@code !matchesNoneOf(sequence)}.
510   *
511   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
512   * character, until this returns {@code true} or the end is reached.
513   *
514   * @param sequence the character sequence to examine, possibly empty
515   * @return {@code true} if this matcher matches at least one character in the sequence
516   * @since 8.0
517   */
518  public boolean matchesAnyOf(CharSequence sequence) {
519    return !matchesNoneOf(sequence);
520  }
521
522  /**
523   * Returns {@code true} if a character sequence contains only matching characters.
524   *
525   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
526   * character, until this returns {@code false} or the end is reached.
527   *
528   * @param sequence the character sequence to examine, possibly empty
529   * @return {@code true} if this matcher matches every character in the sequence, including when
530   *         the sequence is empty
531   */
532  public boolean matchesAllOf(CharSequence sequence) {
533    for (int i = sequence.length() - 1; i >= 0; i--) {
534      if (!matches(sequence.charAt(i))) {
535        return false;
536      }
537    }
538    return true;
539  }
540
541  /**
542   * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
543   * {@code !matchesAnyOf(sequence)}.
544   *
545   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
546   * character, until this returns {@code false} or the end is reached.
547   *
548   * @param sequence the character sequence to examine, possibly empty
549   * @return {@code true} if this matcher matches every character in the sequence, including when
550   *         the sequence is empty
551   */
552  public boolean matchesNoneOf(CharSequence sequence) {
553    return indexIn(sequence) == -1;
554  }
555
556  /**
557   * Returns the index of the first matching character in a character sequence, or {@code -1} if no
558   * matching character is present.
559   *
560   * <p>The default implementation iterates over the sequence in forward order calling {@link
561   * #matches} for each character.
562   *
563   * @param sequence the character sequence to examine from the beginning
564   * @return an index, or {@code -1} if no character matches
565   */
566  public int indexIn(CharSequence sequence) {
567    return indexIn(sequence, 0);
568  }
569
570  /**
571   * Returns the index of the first matching character in a character sequence, starting from a
572   * given position, or {@code -1} if no character matches after that position.
573   *
574   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
575   * start}, calling {@link #matches} for each character.
576   *
577   * @param sequence the character sequence to examine
578   * @param start the first index to examine; must be nonnegative and no greater than {@code
579   *        sequence.length()}
580   * @return the index of the first matching character, guaranteed to be no less than {@code start},
581   *         or {@code -1} if no character matches
582   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
583   *         sequence.length()}
584   */
585  public int indexIn(CharSequence sequence, int start) {
586    int length = sequence.length();
587    checkPositionIndex(start, length);
588    for (int i = start; i < length; i++) {
589      if (matches(sequence.charAt(i))) {
590        return i;
591      }
592    }
593    return -1;
594  }
595
596  /**
597   * Returns the index of the last matching character in a character sequence, or {@code -1} if no
598   * matching character is present.
599   *
600   * <p>The default implementation iterates over the sequence in reverse order calling {@link
601   * #matches} for each character.
602   *
603   * @param sequence the character sequence to examine from the end
604   * @return an index, or {@code -1} if no character matches
605   */
606  public int lastIndexIn(CharSequence sequence) {
607    for (int i = sequence.length() - 1; i >= 0; i--) {
608      if (matches(sequence.charAt(i))) {
609        return i;
610      }
611    }
612    return -1;
613  }
614
615  /**
616   * Returns the number of matching characters found in a character sequence.
617   */
618  public int countIn(CharSequence sequence) {
619    int count = 0;
620    for (int i = 0; i < sequence.length(); i++) {
621      if (matches(sequence.charAt(i))) {
622        count++;
623      }
624    }
625    return count;
626  }
627
628  /**
629   * Returns a string containing all non-matching characters of a character sequence, in order. For
630   * example: <pre>   {@code
631   *
632   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
633   *
634   * ... returns {@code "bzr"}.
635   */
636  @CheckReturnValue
637  public String removeFrom(CharSequence sequence) {
638    String string = sequence.toString();
639    int pos = indexIn(string);
640    if (pos == -1) {
641      return string;
642    }
643
644    char[] chars = string.toCharArray();
645    int spread = 1;
646
647    // This unusual loop comes from extensive benchmarking
648    OUT:
649    while (true) {
650      pos++;
651      while (true) {
652        if (pos == chars.length) {
653          break OUT;
654        }
655        if (matches(chars[pos])) {
656          break;
657        }
658        chars[pos - spread] = chars[pos];
659        pos++;
660      }
661      spread++;
662    }
663    return new String(chars, 0, pos - spread);
664  }
665
666  /**
667   * Returns a string containing all matching characters of a character sequence, in order. For
668   * example: <pre>   {@code
669   *
670   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
671   *
672   * ... returns {@code "aaa"}.
673   */
674  @CheckReturnValue
675  public String retainFrom(CharSequence sequence) {
676    return negate().removeFrom(sequence);
677  }
678
679  /**
680   * Returns a string copy of the input character sequence, with each character that matches this
681   * matcher replaced by a given replacement character. For example: <pre>   {@code
682   *
683   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
684   *
685   * ... returns {@code "rodor"}.
686   *
687   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
688   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
689   * character.
690   *
691   * @param sequence the character sequence to replace matching characters in
692   * @param replacement the character to append to the result string in place of each matching
693   *        character in {@code sequence}
694   * @return the new string
695   */
696  @CheckReturnValue
697  public String replaceFrom(CharSequence sequence, char replacement) {
698    String string = sequence.toString();
699    int pos = indexIn(string);
700    if (pos == -1) {
701      return string;
702    }
703    char[] chars = string.toCharArray();
704    chars[pos] = replacement;
705    for (int i = pos + 1; i < chars.length; i++) {
706      if (matches(chars[i])) {
707        chars[i] = replacement;
708      }
709    }
710    return new String(chars);
711  }
712
713  /**
714   * Returns a string copy of the input character sequence, with each character that matches this
715   * matcher replaced by a given replacement sequence. For example: <pre>   {@code
716   *
717   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
718   *
719   * ... returns {@code "yoohoo"}.
720   *
721   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
722   * off calling {@link #replaceFrom(CharSequence, char)} directly.
723   *
724   * @param sequence the character sequence to replace matching characters in
725   * @param replacement the characters to append to the result string in place of each matching
726   *        character in {@code sequence}
727   * @return the new string
728   */
729  @CheckReturnValue
730  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
731    int replacementLen = replacement.length();
732    if (replacementLen == 0) {
733      return removeFrom(sequence);
734    }
735    if (replacementLen == 1) {
736      return replaceFrom(sequence, replacement.charAt(0));
737    }
738
739    String string = sequence.toString();
740    int pos = indexIn(string);
741    if (pos == -1) {
742      return string;
743    }
744
745    int len = string.length();
746    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
747
748    int oldpos = 0;
749    do {
750      buf.append(string, oldpos, pos);
751      buf.append(replacement);
752      oldpos = pos + 1;
753      pos = indexIn(string, oldpos);
754    } while (pos != -1);
755
756    buf.append(string, oldpos, len);
757    return buf.toString();
758  }
759
760  /**
761   * Returns a substring of the input character sequence that omits all characters this matcher
762   * matches from the beginning and from the end of the string. For example: <pre>   {@code
763   *
764   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
765   *
766   * ... returns {@code "cat"}.
767   *
768   * <p>Note that: <pre>   {@code
769   *
770   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
771   *
772   * ... is equivalent to {@link String#trim()}.
773   */
774  @CheckReturnValue
775  public String trimFrom(CharSequence sequence) {
776    int len = sequence.length();
777    int first;
778    int last;
779
780    for (first = 0; first < len; first++) {
781      if (!matches(sequence.charAt(first))) {
782        break;
783      }
784    }
785    for (last = len - 1; last > first; last--) {
786      if (!matches(sequence.charAt(last))) {
787        break;
788      }
789    }
790
791    return sequence.subSequence(first, last + 1).toString();
792  }
793
794  /**
795   * Returns a substring of the input character sequence that omits all characters this matcher
796   * matches from the beginning of the string. For example: <pre> {@code
797   *
798   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
799   *
800   * ... returns {@code "catbab"}.
801   */
802  @CheckReturnValue
803  public String trimLeadingFrom(CharSequence sequence) {
804    int len = sequence.length();
805    for (int first = 0; first < len; first++) {
806      if (!matches(sequence.charAt(first))) {
807        return sequence.subSequence(first, len).toString();
808      }
809    }
810    return "";
811  }
812
813  /**
814   * Returns a substring of the input character sequence that omits all characters this matcher
815   * matches from the end of the string. For example: <pre> {@code
816   *
817   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
818   *
819   * ... returns {@code "abacat"}.
820   */
821  @CheckReturnValue
822  public String trimTrailingFrom(CharSequence sequence) {
823    int len = sequence.length();
824    for (int last = len - 1; last >= 0; last--) {
825      if (!matches(sequence.charAt(last))) {
826        return sequence.subSequence(0, last + 1).toString();
827      }
828    }
829    return "";
830  }
831
832  /**
833   * Returns a string copy of the input character sequence, with each group of consecutive
834   * characters that match this matcher replaced by a single replacement character. For example:
835   * <pre>   {@code
836   *
837   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
838   *
839   * ... returns {@code "b-p-r"}.
840   *
841   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
842   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
843   * character.
844   *
845   * @param sequence the character sequence to replace matching groups of characters in
846   * @param replacement the character to append to the result string in place of each group of
847   *        matching characters in {@code sequence}
848   * @return the new string
849   */
850  @CheckReturnValue
851  public String collapseFrom(CharSequence sequence, char replacement) {
852    // This implementation avoids unnecessary allocation.
853    int len = sequence.length();
854    for (int i = 0; i < len; i++) {
855      char c = sequence.charAt(i);
856      if (matches(c)) {
857        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
858          // a no-op replacement
859          i++;
860        } else {
861          StringBuilder builder =
862              new StringBuilder(len).append(sequence.subSequence(0, i)).append(replacement);
863          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
864        }
865      }
866    }
867    // no replacement needed
868    return sequence.toString();
869  }
870
871  /**
872   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
873   * groups of matching characters at the start or end of the sequence are removed without
874   * replacement.
875   */
876  @CheckReturnValue
877  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
878    // This implementation avoids unnecessary allocation.
879    int len = sequence.length();
880    int first = 0;
881    int last = len - 1;
882
883    while (first < len && matches(sequence.charAt(first))) {
884      first++;
885    }
886
887    while (last > first && matches(sequence.charAt(last))) {
888      last--;
889    }
890
891    return (first == 0 && last == len - 1)
892        ? collapseFrom(sequence, replacement)
893        : finishCollapseFrom(
894            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
895  }
896
897  private String finishCollapseFrom(
898      CharSequence sequence,
899      int start,
900      int end,
901      char replacement,
902      StringBuilder builder,
903      boolean inMatchingGroup) {
904    for (int i = start; i < end; i++) {
905      char c = sequence.charAt(i);
906      if (matches(c)) {
907        if (!inMatchingGroup) {
908          builder.append(replacement);
909          inMatchingGroup = true;
910        }
911      } else {
912        builder.append(c);
913        inMatchingGroup = false;
914      }
915    }
916    return builder.toString();
917  }
918
919  /**
920   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
921   *     instead.
922   */
923  @Deprecated
924  @Override
925  public boolean apply(Character character) {
926    return matches(character);
927  }
928
929  /**
930   * Returns a string representation of this {@code CharMatcher}, such as
931   * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
932   */
933  @Override
934  public String toString() {
935    return super.toString();
936  }
937
938  /**
939   * Returns the Java Unicode escape sequence for the given character, in the form "\u12AB" where
940   * "12AB" is the four hexadecimal digits representing the 16 bits of the UTF-16 character.
941   */
942  private static String showCharacter(char c) {
943    String hex = "0123456789ABCDEF";
944    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
945    for (int i = 0; i < 4; i++) {
946      tmp[5 - i] = hex.charAt(c & 0xF);
947      c = (char) (c >> 4);
948    }
949    return String.copyValueOf(tmp);
950  }
951
952  // Fast matchers
953
954  /** A matcher for which precomputation will not yield any significant benefit. */
955  abstract static class FastMatcher extends CharMatcher {
956
957    @Override
958    public final CharMatcher precomputed() {
959      return this;
960    }
961
962    @Override
963    public CharMatcher negate() {
964      return new NegatedFastMatcher(this);
965    }
966  }
967
968  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
969  abstract static class NamedFastMatcher extends FastMatcher {
970
971    private final String description;
972
973    NamedFastMatcher(String description) {
974      this.description = checkNotNull(description);
975    }
976
977    @Override
978    public final String toString() {
979      return description;
980    }
981  }
982
983  /** Negation of a {@link FastMatcher}. */
984  static class NegatedFastMatcher extends Negated {
985
986    NegatedFastMatcher(CharMatcher original) {
987      super(original);
988    }
989
990    @Override
991    public final CharMatcher precomputed() {
992      return this;
993    }
994  }
995
996  /** Fast matcher using a {@link BitSet} table of matching characters. */
997  @GwtIncompatible("java.util.BitSet")
998  private static final class BitSetMatcher extends NamedFastMatcher {
999
1000    private final BitSet table;
1001
1002    private BitSetMatcher(BitSet table, String description) {
1003      super(description);
1004      if (table.length() + Long.SIZE < table.size()) {
1005        table = (BitSet) table.clone();
1006        // If only we could actually call BitSet.trimToSize() ourselves...
1007      }
1008      this.table = table;
1009    }
1010
1011    @Override
1012    public boolean matches(char c) {
1013      return table.get(c);
1014    }
1015
1016    @Override
1017    void setBits(BitSet bitSet) {
1018      bitSet.or(table);
1019    }
1020  }
1021
1022  // Static constant implementation classes
1023
1024  /** Implementation of {@link #any()}. */
1025  private static final class Any extends NamedFastMatcher {
1026
1027    static final Any INSTANCE = new Any();
1028
1029    private Any() {
1030      super("CharMatcher.any()");
1031    }
1032
1033    @Override
1034    public boolean matches(char c) {
1035      return true;
1036    }
1037
1038    @Override
1039    public int indexIn(CharSequence sequence) {
1040      return (sequence.length() == 0) ? -1 : 0;
1041    }
1042
1043    @Override
1044    public int indexIn(CharSequence sequence, int start) {
1045      int length = sequence.length();
1046      checkPositionIndex(start, length);
1047      return (start == length) ? -1 : start;
1048    }
1049
1050    @Override
1051    public int lastIndexIn(CharSequence sequence) {
1052      return sequence.length() - 1;
1053    }
1054
1055    @Override
1056    public boolean matchesAllOf(CharSequence sequence) {
1057      checkNotNull(sequence);
1058      return true;
1059    }
1060
1061    @Override
1062    public boolean matchesNoneOf(CharSequence sequence) {
1063      return sequence.length() == 0;
1064    }
1065
1066    @Override
1067    public String removeFrom(CharSequence sequence) {
1068      checkNotNull(sequence);
1069      return "";
1070    }
1071
1072    @Override
1073    public String replaceFrom(CharSequence sequence, char replacement) {
1074      char[] array = new char[sequence.length()];
1075      Arrays.fill(array, replacement);
1076      return new String(array);
1077    }
1078
1079    @Override
1080    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1081      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1082      for (int i = 0; i < sequence.length(); i++) {
1083        result.append(replacement);
1084      }
1085      return result.toString();
1086    }
1087
1088    @Override
1089    public String collapseFrom(CharSequence sequence, char replacement) {
1090      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1091    }
1092
1093    @Override
1094    public String trimFrom(CharSequence sequence) {
1095      checkNotNull(sequence);
1096      return "";
1097    }
1098
1099    @Override
1100    public int countIn(CharSequence sequence) {
1101      return sequence.length();
1102    }
1103
1104    @Override
1105    public CharMatcher and(CharMatcher other) {
1106      return checkNotNull(other);
1107    }
1108
1109    @Override
1110    public CharMatcher or(CharMatcher other) {
1111      checkNotNull(other);
1112      return this;
1113    }
1114
1115    @Override
1116    public CharMatcher negate() {
1117      return none();
1118    }
1119  }
1120
1121  /** Implementation of {@link #none()}. */
1122  private static final class None extends NamedFastMatcher {
1123
1124    static final None INSTANCE = new None();
1125
1126    private None() {
1127      super("CharMatcher.none()");
1128    }
1129
1130    @Override
1131    public boolean matches(char c) {
1132      return false;
1133    }
1134
1135    @Override
1136    public int indexIn(CharSequence sequence) {
1137      checkNotNull(sequence);
1138      return -1;
1139    }
1140
1141    @Override
1142    public int indexIn(CharSequence sequence, int start) {
1143      int length = sequence.length();
1144      checkPositionIndex(start, length);
1145      return -1;
1146    }
1147
1148    @Override
1149    public int lastIndexIn(CharSequence sequence) {
1150      checkNotNull(sequence);
1151      return -1;
1152    }
1153
1154    @Override
1155    public boolean matchesAllOf(CharSequence sequence) {
1156      return sequence.length() == 0;
1157    }
1158
1159    @Override
1160    public boolean matchesNoneOf(CharSequence sequence) {
1161      checkNotNull(sequence);
1162      return true;
1163    }
1164
1165    @Override
1166    public String removeFrom(CharSequence sequence) {
1167      return sequence.toString();
1168    }
1169
1170    @Override
1171    public String replaceFrom(CharSequence sequence, char replacement) {
1172      return sequence.toString();
1173    }
1174
1175    @Override
1176    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1177      checkNotNull(replacement);
1178      return sequence.toString();
1179    }
1180
1181    @Override
1182    public String collapseFrom(CharSequence sequence, char replacement) {
1183      return sequence.toString();
1184    }
1185
1186    @Override
1187    public String trimFrom(CharSequence sequence) {
1188      return sequence.toString();
1189    }
1190
1191    @Override
1192    public String trimLeadingFrom(CharSequence sequence) {
1193      return sequence.toString();
1194    }
1195
1196    @Override
1197    public String trimTrailingFrom(CharSequence sequence) {
1198      return sequence.toString();
1199    }
1200
1201    @Override
1202    public int countIn(CharSequence sequence) {
1203      checkNotNull(sequence);
1204      return 0;
1205    }
1206
1207    @Override
1208    public CharMatcher and(CharMatcher other) {
1209      checkNotNull(other);
1210      return this;
1211    }
1212
1213    @Override
1214    public CharMatcher or(CharMatcher other) {
1215      return checkNotNull(other);
1216    }
1217
1218    @Override
1219    public CharMatcher negate() {
1220      return any();
1221    }
1222  }
1223
1224  /** Implementation of {@link #whitespace()}. */
1225  @VisibleForTesting
1226  static final class Whitespace extends NamedFastMatcher {
1227
1228    static final String TABLE =
1229        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1230            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1231            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1232            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1233    static final int MULTIPLIER = 1682554634;
1234    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1235
1236    static final Whitespace INSTANCE = new Whitespace();
1237
1238    Whitespace() {
1239      super("CharMatcher.whitespace()");
1240    }
1241
1242    @Override
1243    public boolean matches(char c) {
1244      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1245    }
1246
1247    @GwtIncompatible("java.util.BitSet")
1248    @Override
1249    void setBits(BitSet table) {
1250      for (int i = 0; i < TABLE.length(); i++) {
1251        table.set(TABLE.charAt(i));
1252      }
1253    }
1254  }
1255
1256  /** Implementation of {@link #breakingWhitespace()}. */
1257  private static final class BreakingWhitespace extends CharMatcher {
1258
1259    static final CharMatcher INSTANCE = new BreakingWhitespace();
1260
1261    @Override
1262    public boolean matches(char c) {
1263      switch (c) {
1264        case '\t':
1265        case '\n':
1266        case '\013':
1267        case '\f':
1268        case '\r':
1269        case ' ':
1270        case '\u0085':
1271        case '\u1680':
1272        case '\u2028':
1273        case '\u2029':
1274        case '\u205f':
1275        case '\u3000':
1276          return true;
1277        case '\u2007':
1278          return false;
1279        default:
1280          return c >= '\u2000' && c <= '\u200a';
1281      }
1282    }
1283
1284    @Override
1285    public String toString() {
1286      return "CharMatcher.breakingWhitespace()";
1287    }
1288  }
1289
1290  /** Implementation of {@link #ascii()}. */
1291  private static final class Ascii extends NamedFastMatcher {
1292
1293    static final Ascii INSTANCE = new Ascii();
1294
1295    Ascii() {
1296      super("CharMatcher.ascii()");
1297    }
1298
1299    @Override
1300    public boolean matches(char c) {
1301      return c <= '\u007f';
1302    }
1303  }
1304
1305  /** Implementation that matches characters that fall within multiple ranges. */
1306  private static class RangesMatcher extends CharMatcher {
1307
1308    private final String description;
1309    private final char[] rangeStarts;
1310    private final char[] rangeEnds;
1311
1312    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1313      this.description = description;
1314      this.rangeStarts = rangeStarts;
1315      this.rangeEnds = rangeEnds;
1316      checkArgument(rangeStarts.length == rangeEnds.length);
1317      for (int i = 0; i < rangeStarts.length; i++) {
1318        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1319        if (i + 1 < rangeStarts.length) {
1320          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1321        }
1322      }
1323    }
1324
1325    @Override
1326    public boolean matches(char c) {
1327      int index = Arrays.binarySearch(rangeStarts, c);
1328      if (index >= 0) {
1329        return true;
1330      } else {
1331        index = ~index - 1;
1332        return index >= 0 && c <= rangeEnds[index];
1333      }
1334    }
1335
1336    @Override
1337    public String toString() {
1338      return description;
1339    }
1340  }
1341
1342  /** Implementation of {@link #digit()}. */
1343  private static final class Digit extends RangesMatcher {
1344
1345    // Must be in ascending order.
1346    private static final String ZEROES =
1347        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66"
1348            + "\u0be6\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810"
1349            + "\u1946\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
1350
1351    private static char[] zeroes() {
1352      return ZEROES.toCharArray();
1353    }
1354
1355    private static char[] nines() {
1356      char[] nines = new char[ZEROES.length()];
1357      for (int i = 0; i < ZEROES.length(); i++) {
1358        nines[i] = (char) (ZEROES.charAt(i) + 9);
1359      }
1360      return nines;
1361    }
1362
1363    static final Digit INSTANCE = new Digit();
1364
1365    private Digit() {
1366      super("CharMatcher.digit()", zeroes(), nines());
1367    }
1368  }
1369
1370  /** Implementation of {@link #javaDigit()}. */
1371  private static final class JavaDigit extends CharMatcher {
1372
1373    static final JavaDigit INSTANCE = new JavaDigit();
1374
1375    @Override
1376    public boolean matches(char c) {
1377      return Character.isDigit(c);
1378    }
1379
1380    @Override
1381    public String toString() {
1382      return "CharMatcher.javaDigit()";
1383    }
1384  }
1385
1386  /** Implementation of {@link #javaLetter()}. */
1387  private static final class JavaLetter extends CharMatcher {
1388
1389    static final JavaLetter INSTANCE = new JavaLetter();
1390
1391    @Override
1392    public boolean matches(char c) {
1393      return Character.isLetter(c);
1394    }
1395
1396    @Override
1397    public String toString() {
1398      return "CharMatcher.javaLetter()";
1399    }
1400  }
1401
1402  /** Implementation of {@link #javaLetterOrDigit()}. */
1403  private static final class JavaLetterOrDigit extends CharMatcher {
1404
1405    static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit();
1406
1407    @Override
1408    public boolean matches(char c) {
1409      return Character.isLetterOrDigit(c);
1410    }
1411
1412    @Override
1413    public String toString() {
1414      return "CharMatcher.javaLetterOrDigit()";
1415    }
1416  }
1417
1418  /** Implementation of {@link #javaUpperCase()}. */
1419  private static final class JavaUpperCase extends CharMatcher {
1420
1421    static final JavaUpperCase INSTANCE = new JavaUpperCase();
1422
1423    @Override
1424    public boolean matches(char c) {
1425      return Character.isUpperCase(c);
1426    }
1427
1428    @Override
1429    public String toString() {
1430      return "CharMatcher.javaUpperCase()";
1431    }
1432  }
1433
1434  /** Implementation of {@link #javaLowerCase()}. */
1435  private static final class JavaLowerCase extends CharMatcher {
1436
1437    static final JavaLowerCase INSTANCE = new JavaLowerCase();
1438
1439    @Override
1440    public boolean matches(char c) {
1441      return Character.isLowerCase(c);
1442    }
1443
1444    @Override
1445    public String toString() {
1446      return "CharMatcher.javaLowerCase()";
1447    }
1448  }
1449
1450  /** Implementation of {@link #javaIsoControl()}. */
1451  private static final class JavaIsoControl extends NamedFastMatcher {
1452
1453    static final JavaIsoControl INSTANCE = new JavaIsoControl();
1454
1455    private JavaIsoControl() {
1456      super("CharMatcher.javaIsoControl()");
1457    }
1458
1459    @Override
1460    public boolean matches(char c) {
1461      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1462    }
1463  }
1464
1465  /** Implementation of {@link #invisible()}. */
1466  private static final class Invisible extends RangesMatcher {
1467
1468    private static final String RANGE_STARTS =
1469      "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067"
1470          + "\u2068\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa";
1471    private static final String RANGE_ENDS =
1472      "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067"
1473          + "\u2068\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb";
1474
1475    static final Invisible INSTANCE = new Invisible();
1476
1477    private Invisible() {
1478      super(
1479          "CharMatcher.invisible()",
1480          RANGE_STARTS.toCharArray(),
1481          RANGE_ENDS.toCharArray());
1482    }
1483  }
1484
1485  /** Implementation of {@link #singleWidth()}. */
1486  private static final class SingleWidth extends RangesMatcher {
1487
1488    static final SingleWidth INSTANCE = new SingleWidth();
1489
1490    private SingleWidth() {
1491      super(
1492          "CharMatcher.singleWidth()",
1493          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1494          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1495    }
1496  }
1497
1498  // Non-static factory implementation classes
1499
1500  /** Implementation of {@link #negate()}. */
1501  private static class Negated extends CharMatcher {
1502
1503    final CharMatcher original;
1504
1505    Negated(CharMatcher original) {
1506      this.original = checkNotNull(original);
1507    }
1508
1509    @Override
1510    public boolean matches(char c) {
1511      return !original.matches(c);
1512    }
1513
1514    @Override
1515    public boolean matchesAllOf(CharSequence sequence) {
1516      return original.matchesNoneOf(sequence);
1517    }
1518
1519    @Override
1520    public boolean matchesNoneOf(CharSequence sequence) {
1521      return original.matchesAllOf(sequence);
1522    }
1523
1524    @Override
1525    public int countIn(CharSequence sequence) {
1526      return sequence.length() - original.countIn(sequence);
1527    }
1528
1529    @GwtIncompatible("java.util.BitSet")
1530    @Override
1531    void setBits(BitSet table) {
1532      BitSet tmp = new BitSet();
1533      original.setBits(tmp);
1534      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1535      table.or(tmp);
1536    }
1537
1538    @Override
1539    public CharMatcher negate() {
1540      return original;
1541    }
1542
1543    @Override
1544    public String toString() {
1545      return original + ".negate()";
1546    }
1547  }
1548
1549  /** Implementation of {@link #and(CharMatcher)}. */
1550  private static final class And extends CharMatcher {
1551
1552    final CharMatcher first;
1553    final CharMatcher second;
1554
1555    And(CharMatcher a, CharMatcher b) {
1556      first = checkNotNull(a);
1557      second = checkNotNull(b);
1558    }
1559
1560    @Override
1561    public boolean matches(char c) {
1562      return first.matches(c) && second.matches(c);
1563    }
1564
1565    @GwtIncompatible("java.util.BitSet")
1566    @Override
1567    void setBits(BitSet table) {
1568      BitSet tmp1 = new BitSet();
1569      first.setBits(tmp1);
1570      BitSet tmp2 = new BitSet();
1571      second.setBits(tmp2);
1572      tmp1.and(tmp2);
1573      table.or(tmp1);
1574    }
1575
1576    @Override
1577    public String toString() {
1578      return "CharMatcher.and(" + first + ", " + second + ")";
1579    }
1580  }
1581
1582  /** Implementation of {@link #or(CharMatcher)}. */
1583  private static final class Or extends CharMatcher {
1584
1585    final CharMatcher first;
1586    final CharMatcher second;
1587
1588    Or(CharMatcher a, CharMatcher b) {
1589      first = checkNotNull(a);
1590      second = checkNotNull(b);
1591    }
1592
1593    @GwtIncompatible("java.util.BitSet")
1594    @Override
1595    void setBits(BitSet table) {
1596      first.setBits(table);
1597      second.setBits(table);
1598    }
1599
1600    @Override
1601    public boolean matches(char c) {
1602      return first.matches(c) || second.matches(c);
1603    }
1604
1605    @Override
1606    public String toString() {
1607      return "CharMatcher.or(" + first + ", " + second + ")";
1608    }
1609  }
1610
1611  // Static factory implementations
1612
1613  /** Implementation of {@link #is(char)}. */
1614  private static final class Is extends FastMatcher {
1615
1616    private final char match;
1617
1618    Is(char match) {
1619      this.match = match;
1620    }
1621
1622    @Override
1623    public boolean matches(char c) {
1624      return c == match;
1625    }
1626
1627    @Override
1628    public String replaceFrom(CharSequence sequence, char replacement) {
1629      return sequence.toString().replace(match, replacement);
1630    }
1631
1632    @Override
1633    public CharMatcher and(CharMatcher other) {
1634      return other.matches(match) ? this : none();
1635    }
1636
1637    @Override
1638    public CharMatcher or(CharMatcher other) {
1639      return other.matches(match) ? other : super.or(other);
1640    }
1641
1642    @Override
1643    public CharMatcher negate() {
1644      return isNot(match);
1645    }
1646
1647    @GwtIncompatible("java.util.BitSet")
1648    @Override
1649    void setBits(BitSet table) {
1650      table.set(match);
1651    }
1652
1653    @Override
1654    public String toString() {
1655      return "CharMatcher.is('" + showCharacter(match) + "')";
1656    }
1657  }
1658
1659  /** Implementation of {@link #isNot(char)}. */
1660  private static final class IsNot extends FastMatcher {
1661
1662    private final char match;
1663
1664    IsNot(char match) {
1665      this.match = match;
1666    }
1667
1668    @Override
1669    public boolean matches(char c) {
1670      return c != match;
1671    }
1672
1673    @Override
1674    public CharMatcher and(CharMatcher other) {
1675      return other.matches(match) ? super.and(other) : other;
1676    }
1677
1678    @Override
1679    public CharMatcher or(CharMatcher other) {
1680      return other.matches(match) ? any() : this;
1681    }
1682
1683    @GwtIncompatible("java.util.BitSet")
1684    @Override
1685    void setBits(BitSet table) {
1686      table.set(0, match);
1687      table.set(match + 1, Character.MAX_VALUE + 1);
1688    }
1689
1690    @Override
1691    public CharMatcher negate() {
1692      return is(match);
1693    }
1694
1695    @Override
1696    public String toString() {
1697      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1698    }
1699  }
1700
1701  private static CharMatcher.IsEither isEither(char c1, char c2) {
1702    return new CharMatcher.IsEither(c1, c2);
1703  }
1704
1705  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1706  private static final class IsEither extends FastMatcher {
1707
1708    private final char match1;
1709    private final char match2;
1710
1711    IsEither(char match1, char match2) {
1712      this.match1 = match1;
1713      this.match2 = match2;
1714    }
1715
1716    @Override
1717    public boolean matches(char c) {
1718      return c == match1 || c == match2;
1719    }
1720
1721    @GwtIncompatible("java.util.BitSet")
1722    @Override
1723    void setBits(BitSet table) {
1724      table.set(match1);
1725      table.set(match2);
1726    }
1727
1728    @Override
1729    public String toString() {
1730      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1731    }
1732  }
1733
1734  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1735  private static final class AnyOf extends CharMatcher {
1736
1737    private final char[] chars;
1738
1739    public AnyOf(CharSequence chars) {
1740      this.chars = chars.toString().toCharArray();
1741      Arrays.sort(this.chars);
1742    }
1743
1744    @Override
1745    public boolean matches(char c) {
1746      return Arrays.binarySearch(chars, c) >= 0;
1747    }
1748
1749    @Override
1750    @GwtIncompatible("java.util.BitSet")
1751    void setBits(BitSet table) {
1752      for (char c : chars) {
1753        table.set(c);
1754      }
1755    }
1756
1757    @Override
1758    public String toString() {
1759      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1760      for (char c : chars) {
1761        description.append(showCharacter(c));
1762      }
1763      description.append("\")");
1764      return description.toString();
1765    }
1766  }
1767
1768  /** Implementation of {@link #inRange(char, char)}. */
1769  private static final class InRange extends FastMatcher {
1770
1771    private final char startInclusive;
1772    private final char endInclusive;
1773
1774    InRange(char startInclusive, char endInclusive) {
1775      checkArgument(endInclusive >= startInclusive);
1776      this.startInclusive = startInclusive;
1777      this.endInclusive = endInclusive;
1778    }
1779
1780    @Override
1781    public boolean matches(char c) {
1782      return startInclusive <= c && c <= endInclusive;
1783    }
1784
1785    @GwtIncompatible("java.util.BitSet")
1786    @Override
1787    void setBits(BitSet table) {
1788      table.set(startInclusive, endInclusive + 1);
1789    }
1790
1791    @Override
1792    public String toString() {
1793      return "CharMatcher.inRange('"
1794          + showCharacter(startInclusive)
1795          + "', '"
1796          + showCharacter(endInclusive)
1797          + "')";
1798    }
1799  }
1800
1801  /** Implementation of {@link #forPredicate(Predicate)}. */
1802  private static final class ForPredicate extends CharMatcher {
1803
1804    private final Predicate<? super Character> predicate;
1805
1806    ForPredicate(Predicate<? super Character> predicate) {
1807      this.predicate = checkNotNull(predicate);
1808    }
1809
1810    @Override
1811    public boolean matches(char c) {
1812      return predicate.apply(c);
1813    }
1814
1815    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1816    @Override
1817    public boolean apply(Character character) {
1818      return predicate.apply(checkNotNull(character));
1819    }
1820
1821    @Override
1822    public String toString() {
1823      return "CharMatcher.forPredicate(" + predicate + ")";
1824    }
1825  }
1826}