001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static com.google.common.math.DoubleUtils.ensureNonNegative;
021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite;
022import static com.google.common.primitives.Doubles.isFinite;
023import static java.lang.Double.NaN;
024import static java.lang.Double.doubleToLongBits;
025import static java.lang.Double.isNaN;
026
027import com.google.common.annotations.Beta;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.MoreObjects;
030import com.google.common.base.Objects;
031import java.io.Serializable;
032import java.nio.ByteBuffer;
033import java.nio.ByteOrder;
034import java.util.Iterator;
035import javax.annotation.Nullable;
036
037/**
038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several
039 * forms of variance -- that were computed from a single set of zero or more floating-point values.
040 *
041 * <p>There are two ways to obtain a {@code Stats} instance:
042 *
043 * <ul>
044 * <li>If all the values you want to summarize are already known, use the appropriate {@code
045 *     Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of
046 *     {@code Number}, and primitive varargs are supported.
047 * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, feed
048 *     values to it as you get them, then call {@link StatsAccumulator#snapshot}.
049 * </ul>
050 *
051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to
052 * calculate <i>only</i> the mean.
053 *
054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use
055 * built-in JDK libraries instead of this class.
056 *
057 * @author Pete Gillin
058 * @author Kevin Bourrillion
059 * @since 20.0
060 */
061@Beta
062@GwtIncompatible
063public final class Stats implements Serializable {
064
065  private final long count;
066  private final double mean;
067  private final double sumOfSquaresOfDeltas;
068  private final double min;
069  private final double max;
070
071  /**
072   * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}.
073   *
074   * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the
075   * following constraints. This is the callers responsibility and is not enforced here.
076   * <ul>
077   * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to
078   * get multiplied by 0 to calculate the sum), and the other parameters may have any values (they
079   * will not be used).
080   * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or
081   * {@link Double#NaN}.
082   * </ul>
083   */
084  Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) {
085    this.count = count;
086    this.mean = mean;
087    this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas;
088    this.min = min;
089    this.max = max;
090  }
091
092  /**
093   * Returns statistics over a dataset containing the given values.
094   *
095   * @param values a series of values, which will be converted to {@code double} values (this may
096   *     cause loss of precision)
097   */
098  public static Stats of(Iterable<? extends Number> values) {
099    StatsAccumulator accumulator = new StatsAccumulator();
100    accumulator.addAll(values);
101    return accumulator.snapshot();
102  }
103
104  /**
105   * Returns statistics over a dataset containing the given values.
106   *
107   * @param values a series of values, which will be converted to {@code double} values (this may
108   *     cause loss of precision)
109   */
110  public static Stats of(Iterator<? extends Number> values) {
111    StatsAccumulator accumulator = new StatsAccumulator();
112    accumulator.addAll(values);
113    return accumulator.snapshot();
114  }
115
116  /**
117   * Returns statistics over a dataset containing the given values.
118   *
119   * @param values a series of values
120   */
121  public static Stats of(double... values) {
122    StatsAccumulator acummulator = new StatsAccumulator();
123    acummulator.addAll(values);
124    return acummulator.snapshot();
125  }
126
127  /**
128   * Returns statistics over a dataset containing the given values.
129   *
130   * @param values a series of values
131   */
132  public static Stats of(int... values) {
133    StatsAccumulator acummulator = new StatsAccumulator();
134    acummulator.addAll(values);
135    return acummulator.snapshot();
136  }
137
138  /**
139   * Returns statistics over a dataset containing the given values.
140   *
141   * @param values a series of values, which will be converted to {@code double} values (this may
142   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
143   */
144  public static Stats of(long... values) {
145    StatsAccumulator acummulator = new StatsAccumulator();
146    acummulator.addAll(values);
147    return acummulator.snapshot();
148  }
149
150  /**
151   * Returns the number of values.
152   */
153  public long count() {
154    return count;
155  }
156
157  /**
158   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
159   * values. The count must be non-zero.
160   *
161   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
162   * the arithmetic mean of the population.
163   *
164   * <h3>Non-finite values</h3>
165   *
166   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
167   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
168   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
169   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
170   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
171   * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
172   *
173   * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats}
174   * instance.
175   *
176   * @throws IllegalStateException if the dataset is empty
177   */
178  public double mean() {
179    checkState(count != 0);
180    return mean;
181  }
182
183  /**
184   * Returns the sum of the values.
185   *
186   * <h3>Non-finite values</h3>
187   *
188   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
189   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
190   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
191   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
192   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
193   * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
194   */
195  public double sum() {
196    return mean * count;
197  }
198
199  /**
200   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
201   * variance</a> of the values. The count must be non-zero.
202   *
203   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
204   * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
205   * due to numerical errors. However, it is guaranteed never to return a negative result.
206   *
207   * <h3>Non-finite values</h3>
208   *
209   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
210   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
211   *
212   * @throws IllegalStateException if the dataset is empty
213   */
214  public double populationVariance() {
215    checkState(count > 0);
216    if (isNaN(sumOfSquaresOfDeltas)) {
217      return NaN;
218    }
219    if (count == 1) {
220      return 0.0;
221    }
222    return ensureNonNegative(sumOfSquaresOfDeltas) / count();
223  }
224
225  /**
226   * Returns the
227   * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
228   * population standard deviation</a> of the values. The count must be non-zero.
229   *
230   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
231   * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
232   * due to numerical errors. However, it is guaranteed never to return a negative result.
233   *
234   * <h3>Non-finite values</h3>
235   *
236   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
237   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
238   *
239   * @throws IllegalStateException if the dataset is empty
240   */
241  public double populationStandardDeviation() {
242    return Math.sqrt(populationVariance());
243  }
244
245  /**
246   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbaised sample
247   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
248   * unbiased estimator of the population variance of the population. The count must be greater than
249   * one.
250   *
251   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
252   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
253   *
254   * <h3>Non-finite values</h3>
255   *
256   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
257   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
258   *
259   * @throws IllegalStateException if the dataset is empty or contains a single value
260   */
261  public double sampleVariance() {
262    checkState(count > 1);
263    if (isNaN(sumOfSquaresOfDeltas)) {
264      return NaN;
265    }
266    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
267  }
268
269  /**
270   * Returns the
271   * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
272   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
273   * population, this is an estimator of the population standard deviation of the population which
274   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
275   * the distribution). The count must be greater than one.
276   *
277   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
278   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
279   *
280   * <h3>Non-finite values</h3>
281   *
282   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
283   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
284   *
285   * @throws IllegalStateException if the dataset is empty or contains a single value
286   */
287  public double sampleStandardDeviation() {
288    return Math.sqrt(sampleVariance());
289  }
290
291  /**
292   * Returns the lowest value in the dataset. The count must be non-zero.
293   *
294   * <h3>Non-finite values</h3>
295   *
296   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
297   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is
298   * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite
299   * values only then the result is the lowest finite value. If it contains
300   * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
301   *
302   * @throws IllegalStateException if the dataset is empty
303   */
304  public double min() {
305    checkState(count != 0);
306    return min;
307  }
308
309  /**
310   * Returns the highest value in the dataset. The count must be non-zero.
311   *
312   * <h3>Non-finite values</h3>
313   *
314   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
315   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is
316   * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite
317   * values only then the result is the highest finite value. If it contains
318   * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
319   *
320   * @throws IllegalStateException if the dataset is empty
321   */
322  public double max() {
323    checkState(count != 0);
324    return max;
325  }
326
327  /**
328   * {@inheritDoc}
329   *
330   * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating
331   * point values. It is definitely true for instances constructed from exactly the same values in
332   * the same order. It is also true for an instance round-tripped through java serialization.
333   * However, floating point rounding errors mean that it may be false for some instances where the
334   * statistics are mathematically equal, including the same values in a different order.
335   */
336  @Override
337  public boolean equals(@Nullable Object obj) {
338    if (obj == null) {
339      return false;
340    }
341    if (getClass() != obj.getClass()) {
342      return false;
343    }
344    Stats other = (Stats) obj;
345    return (count == other.count)
346        && (doubleToLongBits(mean) == doubleToLongBits(other.mean))
347        && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas))
348        && (doubleToLongBits(min) == doubleToLongBits(other.min))
349        && (doubleToLongBits(max) == doubleToLongBits(other.max));
350  }
351
352  /**
353   * {@inheritDoc}
354   *
355   * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics,
356   * including the floating point values. See the note on {@link #equals} for details.
357   */
358  @Override
359  public int hashCode() {
360    return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max);
361  }
362
363  @Override
364  public String toString() {
365    if (count() > 0) {
366      return MoreObjects.toStringHelper(this)
367          .add("count", count)
368          .add("mean", mean)
369          .add("populationStandardDeviation", populationStandardDeviation())
370          .add("min", min)
371          .add("max", max)
372          .toString();
373    } else {
374      return MoreObjects.toStringHelper(this).add("count", count).toString();
375    }
376  }
377
378  double sumOfSquaresOfDeltas() {
379    return sumOfSquaresOfDeltas;
380  }
381
382  /**
383   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
384   * values. The count must be non-zero.
385   *
386   * <p>The definition of the mean is the same as {@link Stats#mean}.
387   *
388   * @param values a series of values, which will be converted to {@code double} values (this may
389   *     cause loss of precision)
390   * @throws IllegalArgumentException if the dataset is empty
391   */
392  public static double meanOf(Iterable<? extends Number> values) {
393    return meanOf(values.iterator());
394  }
395
396  /**
397   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
398   * values. The count must be non-zero.
399   *
400   * <p>The definition of the mean is the same as {@link Stats#mean}.
401   *
402   * @param values a series of values, which will be converted to {@code double} values (this may
403   *     cause loss of precision)
404   * @throws IllegalArgumentException if the dataset is empty
405   */
406  public static double meanOf(Iterator<? extends Number> values) {
407    checkArgument(values.hasNext());
408    long count = 1;
409    double mean = values.next().doubleValue();
410    while (values.hasNext()) {
411      double value = values.next().doubleValue();
412      count++;
413      if (isFinite(value) && isFinite(mean)) {
414        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
415        mean += (value - mean) / count;
416      } else {
417        mean = calculateNewMeanNonFinite(mean, value);
418      }
419    }
420    return mean;
421  }
422
423  /**
424   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
425   * values. The count must be non-zero.
426   *
427   * <p>The definition of the mean is the same as {@link Stats#mean}.
428   *
429   * @param values a series of values
430   * @throws IllegalArgumentException if the dataset is empty
431   */
432  public static double meanOf(double... values) {
433    checkArgument(values.length > 0);
434    double mean = values[0];
435    for (int index = 1; index < values.length; index++) {
436      double value = values[index];
437      if (isFinite(value) && isFinite(mean)) {
438        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
439        mean += (value - mean) / (index + 1);
440      } else {
441        mean = calculateNewMeanNonFinite(mean, value);
442      }
443    }
444    return mean;
445  }
446
447  /**
448   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
449   * values. The count must be non-zero.
450   *
451   * <p>The definition of the mean is the same as {@link Stats#mean}.
452   *
453   * @param values a series of values
454   * @throws IllegalArgumentException if the dataset is empty
455   */
456  public static double meanOf(int... values) {
457    checkArgument(values.length > 0);
458    double mean = values[0];
459    for (int index = 1; index < values.length; index++) {
460      double value = values[index];
461      if (isFinite(value) && isFinite(mean)) {
462        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
463        mean += (value - mean) / (index + 1);
464      } else {
465        mean = calculateNewMeanNonFinite(mean, value);
466      }
467    }
468    return mean;
469  }
470
471  /**
472   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
473   * values. The count must be non-zero.
474   *
475   * <p>The definition of the mean is the same as {@link Stats#mean}.
476   *
477   * @param values a series of values, which will be converted to {@code double} values (this may
478   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
479   * @throws IllegalArgumentException if the dataset is empty
480   */
481  public static double meanOf(long... values) {
482    checkArgument(values.length > 0);
483    double mean = values[0];
484    for (int index = 1; index < values.length; index++) {
485      double value = values[index];
486      if (isFinite(value) && isFinite(mean)) {
487        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
488        mean += (value - mean) / (index + 1);
489      } else {
490        mean = calculateNewMeanNonFinite(mean, value);
491      }
492    }
493    return mean;
494  }
495
496  // Serialization helpers
497
498  /**
499   * The size of byte array representaion in bytes.
500   */
501  static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE;
502
503  /**
504   * Gets a byte array representation of this instance.
505   *
506   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
507   * versions.
508   */
509  public byte[] toByteArray() {
510    ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN);
511    writeTo(buff);
512    return buff.array();
513  }
514
515  /**
516   * Writes to the given {@link ByteBuffer} a byte representation of this instance.
517   *
518   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
519   * versions.
520   *
521   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
522   *     {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance
523   *     is written. In the process increases the position of {@link ByteBuffer} by BYTES.
524   */
525  void writeTo(ByteBuffer buffer) {
526    checkNotNull(buffer);
527    checkArgument(
528        buffer.remaining() >= BYTES,
529        "Expected at least Stats.BYTES = %s remaining , got %s",
530        BYTES,
531        buffer.remaining());
532    buffer
533        .putLong(count)
534        .putDouble(mean)
535        .putDouble(sumOfSquaresOfDeltas)
536        .putDouble(min)
537        .putDouble(max);
538  }
539
540  /**
541   * Creates a Stats instance from the given byte representation which was obtained by
542   * {@link #toByteArray}.
543   *
544   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
545   * versions.
546   */
547  public static Stats fromByteArray(byte[] byteArray) {
548    checkNotNull(byteArray);
549    checkArgument(
550        byteArray.length == BYTES,
551        "Expected Stats.BYTES = %s remaining , got %s",
552        BYTES,
553        byteArray.length);
554    return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN));
555  }
556
557  /**
558   * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}.
559   *
560   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
561   * versions.
562   *
563   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
564   *     {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this
565   *     instance is read. In the process increases the position of {@link ByteBuffer} by BYTES.
566   */
567  static Stats readFrom(ByteBuffer buffer) {
568    checkNotNull(buffer);
569    checkArgument(
570        buffer.remaining() >= BYTES,
571        "Expected at least Stats.BYTES = %s remaining , got %s",
572        BYTES,
573        buffer.remaining());
574    return new Stats(
575        buffer.getLong(),
576        buffer.getDouble(),
577        buffer.getDouble(),
578        buffer.getDouble(),
579        buffer.getDouble());
580  }
581
582  private static final long serialVersionUID = 0;
583}