001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.common.collect.Streams;
028import com.google.errorprone.annotations.CanIgnoreReturnValue;
029import com.google.errorprone.annotations.MustBeClosed;
030import java.io.BufferedReader;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.Reader;
034import java.io.UncheckedIOException;
035import java.io.Writer;
036import java.nio.charset.Charset;
037import java.util.Iterator;
038import java.util.List;
039import java.util.function.Consumer;
040import java.util.stream.Stream;
041import javax.annotation.Nullable;
042
043/**
044 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
045 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
046 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
047 *
048 * <p>{@code CharSource} provides two kinds of methods:
049 * <ul>
050 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
051 *     instance each time they are called. The caller is responsible for ensuring that the returned
052 *     reader is closed.
053 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically
054 *     implemented by opening a reader using one of the methods in the first category, doing
055 *     something and finally closing the reader that was opened.
056 * </ul>
057 *
058 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
059 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
060 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
061 * there to be an empty line at the end if the contents are terminated with a line separator.
062 *
063 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
064 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
065 *
066 * @since 14.0
067 * @author Colin Decker
068 */
069@GwtIncompatible
070public abstract class CharSource {
071
072  /**
073   * Constructor for use by subclasses.
074   */
075  protected CharSource() {}
076
077  /**
078   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
079   * as bytes using the given {@link Charset}.
080   *
081   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
082   * the default implementation of this method will ensure that the original {@code CharSource} is
083   * returned, rather than round-trip encoding. Subclasses that override this method should behave
084   * the same way.
085   *
086   * @since 20.0
087   */
088  @Beta
089  public ByteSource asByteSource(Charset charset) {
090    return new AsByteSource(charset);
091  }
092
093  /**
094   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
095   * reader each time it is called.
096   *
097   * <p>The caller is responsible for ensuring that the returned reader is closed.
098   *
099   * @throws IOException if an I/O error occurs while opening the reader
100   */
101  public abstract Reader openStream() throws IOException;
102
103  /**
104   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
105   * independent reader each time it is called.
106   *
107   * <p>The caller is responsible for ensuring that the returned reader is closed.
108   *
109   * @throws IOException if an I/O error occurs while of opening the reader
110   */
111  public BufferedReader openBufferedStream() throws IOException {
112    Reader reader = openStream();
113    return (reader instanceof BufferedReader)
114        ? (BufferedReader) reader
115        : new BufferedReader(reader);
116  }
117
118  /**
119   * Opens a new {@link Stream} for reading text one line at a time from this source. This method
120   * returns a new, independent stream each time it is called.
121   *
122   * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an
123   * I/O error occurs while the stream is reading from the source or when the stream is closed, an
124   * {@link UncheckedIOException} is thrown.
125   *
126   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
127   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
128   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
129   * as if it does.
130   *
131   * <p>The caller is responsible for ensuring that the returned stream is closed. For example:
132   *
133   * <pre>{@code
134   * try (Stream<String> lines = source.lines()) {
135   *   lines.map(...)
136   *      .filter(...)
137   *      .forEach(...);
138   * }
139   * }</pre>
140   *
141   * @throws IOException if an I/O error occurs while opening the stream
142   * @since 22.0
143   */
144  @Beta
145  @MustBeClosed
146  public Stream<String> lines() throws IOException {
147    BufferedReader reader = openBufferedStream();
148    return reader.lines().onClose(() -> {
149      try {
150        reader.close();
151      } catch (IOException e) {
152        throw new UncheckedIOException(e);
153      }
154    });
155  }
156
157  /**
158   * Returns the size of this source in chars, if the size can be easily determined without actually
159   * opening the data stream.
160   *
161   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a
162   * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is
163   * <i>possible</i> that this method will return a different number of chars than would be returned
164   * by reading all of the chars.
165   *
166   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
167   * return a different number of chars if the contents are changed.
168   *
169   * @since 19.0
170   */
171  @Beta
172  public Optional<Long> lengthIfKnown() {
173    return Optional.absent();
174  }
175
176  /**
177   * Returns the length of this source in chars, even if doing so requires opening and traversing an
178   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
179   *
180   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
181   * absent, it will fall back to a heavyweight operation that will open a stream,
182   * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars
183   * that were skipped.
184   *
185   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
186   * implementation, it is <i>possible</i> that this method will return a different number of chars
187   * than would be returned by reading all of the chars.
188   *
189   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
190   * number of chars if the contents are changed.
191   *
192   * @throws IOException if an I/O error occurs while reading the length of this source
193   * @since 19.0
194   */
195  @Beta
196  public long length() throws IOException {
197    Optional<Long> lengthIfKnown = lengthIfKnown();
198    if (lengthIfKnown.isPresent()) {
199      return lengthIfKnown.get();
200    }
201
202    Closer closer = Closer.create();
203    try {
204      Reader reader = closer.register(openStream());
205      return countBySkipping(reader);
206    } catch (Throwable e) {
207      throw closer.rethrow(e);
208    } finally {
209      closer.close();
210    }
211  }
212
213  private long countBySkipping(Reader reader) throws IOException {
214    long count = 0;
215    long read;
216    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
217      count += read;
218    }
219    return count;
220  }
221
222  /**
223   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
224   * Does not close {@code appendable} if it is {@code Closeable}.
225   *
226   * @return the number of characters copied
227   * @throws IOException if an I/O error occurs while reading from this source or writing to
228   *     {@code appendable}
229   */
230  @CanIgnoreReturnValue
231  public long copyTo(Appendable appendable) throws IOException {
232    checkNotNull(appendable);
233
234    Closer closer = Closer.create();
235    try {
236      Reader reader = closer.register(openStream());
237      return CharStreams.copy(reader, appendable);
238    } catch (Throwable e) {
239      throw closer.rethrow(e);
240    } finally {
241      closer.close();
242    }
243  }
244
245  /**
246   * Copies the contents of this source to the given sink.
247   *
248   * @return the number of characters copied
249   * @throws IOException if an I/O error occurs while reading from this source or writing to
250   *     {@code sink}
251   */
252  @CanIgnoreReturnValue
253  public long copyTo(CharSink sink) throws IOException {
254    checkNotNull(sink);
255
256    Closer closer = Closer.create();
257    try {
258      Reader reader = closer.register(openStream());
259      Writer writer = closer.register(sink.openStream());
260      return CharStreams.copy(reader, writer);
261    } catch (Throwable e) {
262      throw closer.rethrow(e);
263    } finally {
264      closer.close();
265    }
266  }
267
268  /**
269   * Reads the contents of this source as a string.
270   *
271   * @throws IOException if an I/O error occurs while reading from this source
272   */
273  public String read() throws IOException {
274    Closer closer = Closer.create();
275    try {
276      Reader reader = closer.register(openStream());
277      return CharStreams.toString(reader);
278    } catch (Throwable e) {
279      throw closer.rethrow(e);
280    } finally {
281      closer.close();
282    }
283  }
284
285  /**
286   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
287   *
288   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
289   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
290   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
291   * as if it does.
292   *
293   * @throws IOException if an I/O error occurs while reading from this source
294   */
295  @Nullable
296  public String readFirstLine() throws IOException {
297    Closer closer = Closer.create();
298    try {
299      BufferedReader reader = closer.register(openBufferedStream());
300      return reader.readLine();
301    } catch (Throwable e) {
302      throw closer.rethrow(e);
303    } finally {
304      closer.close();
305    }
306  }
307
308  /**
309   * Reads all the lines of this source as a list of strings. The returned list will be empty if
310   * this source is empty.
311   *
312   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
313   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
314   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
315   * as if it does.
316   *
317   * @throws IOException if an I/O error occurs while reading from this source
318   */
319  public ImmutableList<String> readLines() throws IOException {
320    Closer closer = Closer.create();
321    try {
322      BufferedReader reader = closer.register(openBufferedStream());
323      List<String> result = Lists.newArrayList();
324      String line;
325      while ((line = reader.readLine()) != null) {
326        result.add(line);
327      }
328      return ImmutableList.copyOf(result);
329    } catch (Throwable e) {
330      throw closer.rethrow(e);
331    } finally {
332      closer.close();
333    }
334  }
335
336  /**
337   * Reads lines of text from this source, processing each line as it is read using the given
338   * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
339   * returns {@code false} and returns the result produced by the processor.
340   *
341   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
342   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
343   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
344   * as if it does.
345   *
346   * @throws IOException if an I/O error occurs while reading from this source or if
347   *     {@code processor} throws an {@code IOException}
348   * @since 16.0
349   */
350  @Beta
351  @CanIgnoreReturnValue // some processors won't return a useful result
352  public <T> T readLines(LineProcessor<T> processor) throws IOException {
353    checkNotNull(processor);
354
355    Closer closer = Closer.create();
356    try {
357      Reader reader = closer.register(openStream());
358      return CharStreams.readLines(reader, processor);
359    } catch (Throwable e) {
360      throw closer.rethrow(e);
361    } finally {
362      closer.close();
363    }
364  }
365
366  /**
367   * Reads all lines of text from this source, running the given {@code action} for each line as
368   * it is read.
369   *
370   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
371   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
372   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
373   * as if it does.
374   *
375   * @throws IOException if an I/O error occurs while reading from this source or if
376   *     {@code action} throws an {@code UncheckedIOException}
377   * @since 22.0
378   */
379  @Beta
380  public void forEachLine(Consumer<? super String> action) throws IOException {
381    try (Stream<String> lines = lines()) {
382      // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure
383      lines.forEachOrdered(action);
384    } catch (UncheckedIOException e) {
385      throw e.getCause();
386    }
387  }
388
389  /**
390   * Returns whether the source has zero chars. The default implementation returns true if
391   * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking for EOF if
392   * the length is not known.
393   *
394   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
395   * chars are actually available for reading. This means that a source may return {@code true} from
396   * {@code isEmpty()} despite having readable content.
397   *
398   * @throws IOException if an I/O error occurs
399   * @since 15.0
400   */
401  public boolean isEmpty() throws IOException {
402    Optional<Long> lengthIfKnown = lengthIfKnown();
403    if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) {
404      return true;
405    }
406    Closer closer = Closer.create();
407    try {
408      Reader reader = closer.register(openStream());
409      return reader.read() == -1;
410    } catch (Throwable e) {
411      throw closer.rethrow(e);
412    } finally {
413      closer.close();
414    }
415  }
416
417  /**
418   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
419   * the source will contain the concatenated data from the streams of the underlying sources.
420   *
421   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
422   * close the open underlying stream.
423   *
424   * @param sources the sources to concatenate
425   * @return a {@code CharSource} containing the concatenated data
426   * @since 15.0
427   */
428  public static CharSource concat(Iterable<? extends CharSource> sources) {
429    return new ConcatenatedCharSource(sources);
430  }
431
432  /**
433   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
434   * the source will contain the concatenated data from the streams of the underlying sources.
435   *
436   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
437   * close the open underlying stream.
438   *
439   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
440   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
441   * eagerly fetches data for each source when iterated (rather than producing sources that only
442   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
443   * possible.
444   *
445   * @param sources the sources to concatenate
446   * @return a {@code CharSource} containing the concatenated data
447   * @throws NullPointerException if any of {@code sources} is {@code null}
448   * @since 15.0
449   */
450  public static CharSource concat(Iterator<? extends CharSource> sources) {
451    return concat(ImmutableList.copyOf(sources));
452  }
453
454  /**
455   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
456   * the source will contain the concatenated data from the streams of the underlying sources.
457   *
458   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
459   * close the open underlying stream.
460   *
461   * @param sources the sources to concatenate
462   * @return a {@code CharSource} containing the concatenated data
463   * @throws NullPointerException if any of {@code sources} is {@code null}
464   * @since 15.0
465   */
466  public static CharSource concat(CharSource... sources) {
467    return concat(ImmutableList.copyOf(sources));
468  }
469
470  /**
471   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
472   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
473   * the {@code charSequence} is mutated while it is being read, so don't do that.
474   *
475   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
476   */
477  public static CharSource wrap(CharSequence charSequence) {
478    return new CharSequenceCharSource(charSequence);
479  }
480
481  /**
482   * Returns an immutable {@link CharSource} that contains no characters.
483   *
484   * @since 15.0
485   */
486  public static CharSource empty() {
487    return EmptyCharSource.INSTANCE;
488  }
489
490  /**
491   * A byte source that reads chars from this source and encodes them as bytes using a charset.
492   */
493  private final class AsByteSource extends ByteSource {
494
495    final Charset charset;
496
497    AsByteSource(Charset charset) {
498      this.charset = checkNotNull(charset);
499    }
500
501    @Override
502    public CharSource asCharSource(Charset charset) {
503      if (charset.equals(this.charset)) {
504        return CharSource.this;
505      }
506      return super.asCharSource(charset);
507    }
508
509    @Override
510    public InputStream openStream() throws IOException {
511      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
512    }
513
514    @Override
515    public String toString() {
516      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
517    }
518  }
519
520  private static class CharSequenceCharSource extends CharSource {
521
522    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
523
524    private final CharSequence seq;
525
526    protected CharSequenceCharSource(CharSequence seq) {
527      this.seq = checkNotNull(seq);
528    }
529
530    @Override
531    public Reader openStream() {
532      return new CharSequenceReader(seq);
533    }
534
535    @Override
536    public String read() {
537      return seq.toString();
538    }
539
540    @Override
541    public boolean isEmpty() {
542      return seq.length() == 0;
543    }
544
545    @Override
546    public long length() {
547      return seq.length();
548    }
549
550    @Override
551    public Optional<Long> lengthIfKnown() {
552      return Optional.of((long) seq.length());
553    }
554
555    /**
556     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
557     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
558     */
559    private Iterator<String> linesIterator() {
560      return new AbstractIterator<String>() {
561        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
562
563        @Override
564        protected String computeNext() {
565          if (lines.hasNext()) {
566            String next = lines.next();
567            // skip last line if it's empty
568            if (lines.hasNext() || !next.isEmpty()) {
569              return next;
570            }
571          }
572          return endOfData();
573        }
574      };
575    }
576
577    @Override
578    public Stream<String> lines() {
579      return Streams.stream(linesIterator());
580    }
581
582    @Override
583    public String readFirstLine() {
584      Iterator<String> lines = linesIterator();
585      return lines.hasNext() ? lines.next() : null;
586    }
587
588    @Override
589    public ImmutableList<String> readLines() {
590      return ImmutableList.copyOf(linesIterator());
591    }
592
593    @Override
594    public <T> T readLines(LineProcessor<T> processor) throws IOException {
595      Iterator<String> lines = linesIterator();
596      while (lines.hasNext()) {
597        if (!processor.processLine(lines.next())) {
598          break;
599        }
600      }
601      return processor.getResult();
602    }
603
604    @Override
605    public String toString() {
606      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
607    }
608  }
609
610  private static final class EmptyCharSource extends CharSequenceCharSource {
611
612    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
613
614    private EmptyCharSource() {
615      super("");
616    }
617
618    @Override
619    public String toString() {
620      return "CharSource.empty()";
621    }
622  }
623
624  private static final class ConcatenatedCharSource extends CharSource {
625
626    private final Iterable<? extends CharSource> sources;
627
628    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
629      this.sources = checkNotNull(sources);
630    }
631
632    @Override
633    public Reader openStream() throws IOException {
634      return new MultiReader(sources.iterator());
635    }
636
637    @Override
638    public boolean isEmpty() throws IOException {
639      for (CharSource source : sources) {
640        if (!source.isEmpty()) {
641          return false;
642        }
643      }
644      return true;
645    }
646
647    @Override
648    public Optional<Long> lengthIfKnown() {
649      long result = 0L;
650      for (CharSource source : sources) {
651        Optional<Long> lengthIfKnown = source.lengthIfKnown();
652        if (!lengthIfKnown.isPresent()) {
653          return Optional.absent();
654        }
655        result += lengthIfKnown.get();
656      }
657      return Optional.of(result);
658    }
659
660    @Override
661    public long length() throws IOException {
662      long result = 0L;
663      for (CharSource source : sources) {
664        result += source.length();
665      }
666      return result;
667    }
668
669    @Override
670    public String toString() {
671      return "CharSource.concat(" + sources + ")";
672    }
673  }
674}