001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.common.collect.Streams;
028import com.google.errorprone.annotations.CanIgnoreReturnValue;
029import com.google.errorprone.annotations.MustBeClosed;
030import java.io.BufferedReader;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.Reader;
034import java.io.StringReader;
035import java.io.UncheckedIOException;
036import java.io.Writer;
037import java.nio.charset.Charset;
038import java.util.Iterator;
039import java.util.List;
040import java.util.function.Consumer;
041import java.util.stream.Stream;
042import javax.annotation.Nullable;
043
044/**
045 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
046 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
047 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
048 *
049 * <p>{@code CharSource} provides two kinds of methods:
050 * <ul>
051 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
052 *     instance each time they are called. The caller is responsible for ensuring that the returned
053 *     reader is closed.
054 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically
055 *     implemented by opening a reader using one of the methods in the first category, doing
056 *     something and finally closing the reader that was opened.
057 * </ul>
058 *
059 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
060 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
061 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
062 * there to be an empty line at the end if the contents are terminated with a line separator.
063 *
064 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
065 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
066 *
067 * @since 14.0
068 * @author Colin Decker
069 */
070@GwtIncompatible
071public abstract class CharSource {
072
073  /**
074   * Constructor for use by subclasses.
075   */
076  protected CharSource() {}
077
078  /**
079   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
080   * as bytes using the given {@link Charset}.
081   *
082   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
083   * the default implementation of this method will ensure that the original {@code CharSource} is
084   * returned, rather than round-trip encoding. Subclasses that override this method should behave
085   * the same way.
086   *
087   * @since 20.0
088   */
089  @Beta
090  public ByteSource asByteSource(Charset charset) {
091    return new AsByteSource(charset);
092  }
093
094  /**
095   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
096   * reader each time it is called.
097   *
098   * <p>The caller is responsible for ensuring that the returned reader is closed.
099   *
100   * @throws IOException if an I/O error occurs while opening the reader
101   */
102  public abstract Reader openStream() throws IOException;
103
104  /**
105   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
106   * independent reader each time it is called.
107   *
108   * <p>The caller is responsible for ensuring that the returned reader is closed.
109   *
110   * @throws IOException if an I/O error occurs while of opening the reader
111   */
112  public BufferedReader openBufferedStream() throws IOException {
113    Reader reader = openStream();
114    return (reader instanceof BufferedReader)
115        ? (BufferedReader) reader
116        : new BufferedReader(reader);
117  }
118
119  /**
120   * Opens a new {@link Stream} for reading text one line at a time from this source. This method
121   * returns a new, independent stream each time it is called.
122   *
123   * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an
124   * I/O error occurs while the stream is reading from the source or when the stream is closed, an
125   * {@link UncheckedIOException} is thrown.
126   *
127   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
128   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
129   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
130   * as if it does.
131   *
132   * <p>The caller is responsible for ensuring that the returned stream is closed. For example:
133   *
134   * <pre>{@code
135   * try (Stream<String> lines = source.lines()) {
136   *   lines.map(...)
137   *      .filter(...)
138   *      .forEach(...);
139   * }
140   * }</pre>
141   *
142   * @throws IOException if an I/O error occurs while opening the stream
143   * @since 22.0
144   */
145  @Beta
146  @MustBeClosed
147  public Stream<String> lines() throws IOException {
148    BufferedReader reader = openBufferedStream();
149    return reader.lines().onClose(() -> {
150      try {
151        reader.close();
152      } catch (IOException e) {
153        throw new UncheckedIOException(e);
154      }
155    });
156  }
157
158  /**
159   * Returns the size of this source in chars, if the size can be easily determined without actually
160   * opening the data stream.
161   *
162   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a
163   * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is
164   * <i>possible</i> that this method will return a different number of chars than would be returned
165   * by reading all of the chars.
166   *
167   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
168   * return a different number of chars if the contents are changed.
169   *
170   * @since 19.0
171   */
172  @Beta
173  public Optional<Long> lengthIfKnown() {
174    return Optional.absent();
175  }
176
177  /**
178   * Returns the length of this source in chars, even if doing so requires opening and traversing an
179   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
180   *
181   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
182   * absent, it will fall back to a heavyweight operation that will open a stream,
183   * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars
184   * that were skipped.
185   *
186   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
187   * implementation, it is <i>possible</i> that this method will return a different number of chars
188   * than would be returned by reading all of the chars.
189   *
190   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
191   * number of chars if the contents are changed.
192   *
193   * @throws IOException if an I/O error occurs while reading the length of this source
194   * @since 19.0
195   */
196  @Beta
197  public long length() throws IOException {
198    Optional<Long> lengthIfKnown = lengthIfKnown();
199    if (lengthIfKnown.isPresent()) {
200      return lengthIfKnown.get();
201    }
202
203    Closer closer = Closer.create();
204    try {
205      Reader reader = closer.register(openStream());
206      return countBySkipping(reader);
207    } catch (Throwable e) {
208      throw closer.rethrow(e);
209    } finally {
210      closer.close();
211    }
212  }
213
214  private long countBySkipping(Reader reader) throws IOException {
215    long count = 0;
216    long read;
217    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
218      count += read;
219    }
220    return count;
221  }
222
223  /**
224   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
225   * Does not close {@code appendable} if it is {@code Closeable}.
226   *
227   * @return the number of characters copied
228   * @throws IOException if an I/O error occurs while reading from this source or writing to
229   *     {@code appendable}
230   */
231  @CanIgnoreReturnValue
232  public long copyTo(Appendable appendable) throws IOException {
233    checkNotNull(appendable);
234
235    Closer closer = Closer.create();
236    try {
237      Reader reader = closer.register(openStream());
238      return CharStreams.copy(reader, appendable);
239    } catch (Throwable e) {
240      throw closer.rethrow(e);
241    } finally {
242      closer.close();
243    }
244  }
245
246  /**
247   * Copies the contents of this source to the given sink.
248   *
249   * @return the number of characters copied
250   * @throws IOException if an I/O error occurs while reading from this source or writing to
251   *     {@code sink}
252   */
253  @CanIgnoreReturnValue
254  public long copyTo(CharSink sink) throws IOException {
255    checkNotNull(sink);
256
257    Closer closer = Closer.create();
258    try {
259      Reader reader = closer.register(openStream());
260      Writer writer = closer.register(sink.openStream());
261      return CharStreams.copy(reader, writer);
262    } catch (Throwable e) {
263      throw closer.rethrow(e);
264    } finally {
265      closer.close();
266    }
267  }
268
269  /**
270   * Reads the contents of this source as a string.
271   *
272   * @throws IOException if an I/O error occurs while reading from this source
273   */
274  public String read() throws IOException {
275    Closer closer = Closer.create();
276    try {
277      Reader reader = closer.register(openStream());
278      return CharStreams.toString(reader);
279    } catch (Throwable e) {
280      throw closer.rethrow(e);
281    } finally {
282      closer.close();
283    }
284  }
285
286  /**
287   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
288   *
289   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
290   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
291   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
292   * as if it does.
293   *
294   * @throws IOException if an I/O error occurs while reading from this source
295   */
296  @Nullable
297  public String readFirstLine() throws IOException {
298    Closer closer = Closer.create();
299    try {
300      BufferedReader reader = closer.register(openBufferedStream());
301      return reader.readLine();
302    } catch (Throwable e) {
303      throw closer.rethrow(e);
304    } finally {
305      closer.close();
306    }
307  }
308
309  /**
310   * Reads all the lines of this source as a list of strings. The returned list will be empty if
311   * this source is empty.
312   *
313   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
314   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
315   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
316   * as if it does.
317   *
318   * @throws IOException if an I/O error occurs while reading from this source
319   */
320  public ImmutableList<String> readLines() throws IOException {
321    Closer closer = Closer.create();
322    try {
323      BufferedReader reader = closer.register(openBufferedStream());
324      List<String> result = Lists.newArrayList();
325      String line;
326      while ((line = reader.readLine()) != null) {
327        result.add(line);
328      }
329      return ImmutableList.copyOf(result);
330    } catch (Throwable e) {
331      throw closer.rethrow(e);
332    } finally {
333      closer.close();
334    }
335  }
336
337  /**
338   * Reads lines of text from this source, processing each line as it is read using the given
339   * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
340   * returns {@code false} and returns the result produced by the processor.
341   *
342   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
343   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
344   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
345   * as if it does.
346   *
347   * @throws IOException if an I/O error occurs while reading from this source or if
348   *     {@code processor} throws an {@code IOException}
349   * @since 16.0
350   */
351  @Beta
352  @CanIgnoreReturnValue // some processors won't return a useful result
353  public <T> T readLines(LineProcessor<T> processor) throws IOException {
354    checkNotNull(processor);
355
356    Closer closer = Closer.create();
357    try {
358      Reader reader = closer.register(openStream());
359      return CharStreams.readLines(reader, processor);
360    } catch (Throwable e) {
361      throw closer.rethrow(e);
362    } finally {
363      closer.close();
364    }
365  }
366
367  /**
368   * Reads all lines of text from this source, running the given {@code action} for each line as
369   * it is read.
370   *
371   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
372   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
373   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
374   * as if it does.
375   *
376   * @throws IOException if an I/O error occurs while reading from this source or if
377   *     {@code action} throws an {@code UncheckedIOException}
378   * @since 22.0
379   */
380  @Beta
381  public void forEachLine(Consumer<? super String> action) throws IOException {
382    try (Stream<String> lines = lines()) {
383      // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure
384      lines.forEachOrdered(action);
385    } catch (UncheckedIOException e) {
386      throw e.getCause();
387    }
388  }
389
390  /**
391   * Returns whether the source has zero chars. The default implementation first checks
392   * {@link #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
393   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
394   *
395   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
396   * chars are actually available for reading. This means that a source may return {@code true} from
397   * {@code isEmpty()} despite having readable content.
398   *
399   * @throws IOException if an I/O error occurs
400   * @since 15.0
401   */
402  public boolean isEmpty() throws IOException {
403    Optional<Long> lengthIfKnown = lengthIfKnown();
404    if (lengthIfKnown.isPresent()) {
405      return lengthIfKnown.get() == 0L;
406    }
407    Closer closer = Closer.create();
408    try {
409      Reader reader = closer.register(openStream());
410      return reader.read() == -1;
411    } catch (Throwable e) {
412      throw closer.rethrow(e);
413    } finally {
414      closer.close();
415    }
416  }
417
418  /**
419   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
420   * the source will contain the concatenated data from the streams of the underlying sources.
421   *
422   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
423   * close the open underlying stream.
424   *
425   * @param sources the sources to concatenate
426   * @return a {@code CharSource} containing the concatenated data
427   * @since 15.0
428   */
429  public static CharSource concat(Iterable<? extends CharSource> sources) {
430    return new ConcatenatedCharSource(sources);
431  }
432
433  /**
434   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
435   * the source will contain the concatenated data from the streams of the underlying sources.
436   *
437   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
438   * close the open underlying stream.
439   *
440   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
441   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
442   * eagerly fetches data for each source when iterated (rather than producing sources that only
443   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
444   * possible.
445   *
446   * @param sources the sources to concatenate
447   * @return a {@code CharSource} containing the concatenated data
448   * @throws NullPointerException if any of {@code sources} is {@code null}
449   * @since 15.0
450   */
451  public static CharSource concat(Iterator<? extends CharSource> sources) {
452    return concat(ImmutableList.copyOf(sources));
453  }
454
455  /**
456   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
457   * the source will contain the concatenated data from the streams of the underlying sources.
458   *
459   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
460   * close the open underlying stream.
461   *
462   * @param sources the sources to concatenate
463   * @return a {@code CharSource} containing the concatenated data
464   * @throws NullPointerException if any of {@code sources} is {@code null}
465   * @since 15.0
466   */
467  public static CharSource concat(CharSource... sources) {
468    return concat(ImmutableList.copyOf(sources));
469  }
470
471  /**
472   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
473   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
474   * the {@code charSequence} is mutated while it is being read, so don't do that.
475   *
476   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
477   */
478  public static CharSource wrap(CharSequence charSequence) {
479    return charSequence instanceof String
480        ? new StringCharSource((String) charSequence)
481        : new CharSequenceCharSource(charSequence);
482  }
483
484  /**
485   * Returns an immutable {@link CharSource} that contains no characters.
486   *
487   * @since 15.0
488   */
489  public static CharSource empty() {
490    return EmptyCharSource.INSTANCE;
491  }
492
493  /**
494   * A byte source that reads chars from this source and encodes them as bytes using a charset.
495   */
496  private final class AsByteSource extends ByteSource {
497
498    final Charset charset;
499
500    AsByteSource(Charset charset) {
501      this.charset = checkNotNull(charset);
502    }
503
504    @Override
505    public CharSource asCharSource(Charset charset) {
506      if (charset.equals(this.charset)) {
507        return CharSource.this;
508      }
509      return super.asCharSource(charset);
510    }
511
512    @Override
513    public InputStream openStream() throws IOException {
514      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
515    }
516
517    @Override
518    public String toString() {
519      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
520    }
521  }
522
523  private static class CharSequenceCharSource extends CharSource {
524
525    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
526
527    protected final CharSequence seq;
528
529    protected CharSequenceCharSource(CharSequence seq) {
530      this.seq = checkNotNull(seq);
531    }
532
533    @Override
534    public Reader openStream() {
535      return new CharSequenceReader(seq);
536    }
537
538    @Override
539    public String read() {
540      return seq.toString();
541    }
542
543    @Override
544    public boolean isEmpty() {
545      return seq.length() == 0;
546    }
547
548    @Override
549    public long length() {
550      return seq.length();
551    }
552
553    @Override
554    public Optional<Long> lengthIfKnown() {
555      return Optional.of((long) seq.length());
556    }
557
558    /**
559     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
560     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
561     */
562    private Iterator<String> linesIterator() {
563      return new AbstractIterator<String>() {
564        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
565
566        @Override
567        protected String computeNext() {
568          if (lines.hasNext()) {
569            String next = lines.next();
570            // skip last line if it's empty
571            if (lines.hasNext() || !next.isEmpty()) {
572              return next;
573            }
574          }
575          return endOfData();
576        }
577      };
578    }
579
580    @Override
581    public Stream<String> lines() {
582      return Streams.stream(linesIterator());
583    }
584
585    @Override
586    public String readFirstLine() {
587      Iterator<String> lines = linesIterator();
588      return lines.hasNext() ? lines.next() : null;
589    }
590
591    @Override
592    public ImmutableList<String> readLines() {
593      return ImmutableList.copyOf(linesIterator());
594    }
595
596    @Override
597    public <T> T readLines(LineProcessor<T> processor) throws IOException {
598      Iterator<String> lines = linesIterator();
599      while (lines.hasNext()) {
600        if (!processor.processLine(lines.next())) {
601          break;
602        }
603      }
604      return processor.getResult();
605    }
606
607    @Override
608    public String toString() {
609      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
610    }
611  }
612
613  /**
614   * Subclass specialized for string instances.
615   *
616   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
617   *
618   * <ul>
619   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
620   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
621   *       one with {@link CharSequence#charAt(int)}.
622   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
623   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
624   *       can't change, and it is faster because many writers and appendables are optimized for
625   *       appending string instances.
626   * </ul>
627   */
628  private static class StringCharSource extends CharSequenceCharSource {
629    protected StringCharSource(String seq) {
630      super(seq);
631    }
632
633    @Override
634    public Reader openStream() {
635      return new StringReader((String) seq);
636    }
637
638    @Override
639    public long copyTo(Appendable appendable) throws IOException {
640      appendable.append(seq);
641      return seq.length();
642    }
643
644    @Override
645    public long copyTo(CharSink sink) throws IOException {
646      checkNotNull(sink);
647      Closer closer = Closer.create();
648      try {
649        Writer writer = closer.register(sink.openStream());
650        writer.write((String) seq);
651        return seq.length();
652      } catch (Throwable e) {
653        throw closer.rethrow(e);
654      } finally {
655        closer.close();
656      }
657    }
658  }
659
660  private static final class EmptyCharSource extends StringCharSource {
661
662    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
663
664    private EmptyCharSource() {
665      super("");
666    }
667
668    @Override
669    public String toString() {
670      return "CharSource.empty()";
671    }
672  }
673
674  private static final class ConcatenatedCharSource extends CharSource {
675
676    private final Iterable<? extends CharSource> sources;
677
678    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
679      this.sources = checkNotNull(sources);
680    }
681
682    @Override
683    public Reader openStream() throws IOException {
684      return new MultiReader(sources.iterator());
685    }
686
687    @Override
688    public boolean isEmpty() throws IOException {
689      for (CharSource source : sources) {
690        if (!source.isEmpty()) {
691          return false;
692        }
693      }
694      return true;
695    }
696
697    @Override
698    public Optional<Long> lengthIfKnown() {
699      long result = 0L;
700      for (CharSource source : sources) {
701        Optional<Long> lengthIfKnown = source.lengthIfKnown();
702        if (!lengthIfKnown.isPresent()) {
703          return Optional.absent();
704        }
705        result += lengthIfKnown.get();
706      }
707      return Optional.of(result);
708    }
709
710    @Override
711    public long length() throws IOException {
712      long result = 0L;
713      for (CharSource source : sources) {
714        result += source.length();
715      }
716      return result;
717    }
718
719    @Override
720    public String toString() {
721      return "CharSource.concat(" + sources + ")";
722    }
723  }
724}