001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static com.google.common.math.DoubleUtils.ensureNonNegative;
021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite;
022import static com.google.common.primitives.Doubles.isFinite;
023import static java.lang.Double.NaN;
024import static java.lang.Double.doubleToLongBits;
025import static java.lang.Double.isNaN;
026
027import com.google.common.annotations.Beta;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.MoreObjects;
030import com.google.common.base.Objects;
031import java.io.Serializable;
032import java.nio.ByteBuffer;
033import java.nio.ByteOrder;
034import java.util.Iterator;
035import javax.annotation.Nullable;
036
037/**
038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several
039 * forms of variance -- that were computed from a single set of zero or more floating-point values.
040 *
041 * <p>There are two ways to obtain a {@code Stats} instance:
042 *
043 * <ul>
044 * <li>If all the values you want to summarize are already known, use the appropriate {@code
045 *     Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of
046 *     {@code Number}, and primitive varargs are supported.
047 * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, feed
048 *     values to it as you get them, then call {@link StatsAccumulator#snapshot}.
049 * </ul>
050 *
051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to
052 * calculate <i>only</i> the mean.
053 *
054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use
055 * built-in JDK libraries instead of this class.
056 *
057 * @author Pete Gillin
058 * @author Kevin Bourrillion
059 * @since 20.0
060 */
061@Beta
062@GwtIncompatible
063public final class Stats implements Serializable {
064
065  private final long count;
066  private final double mean;
067  private final double sumOfSquaresOfDeltas;
068  private final double min;
069  private final double max;
070
071  /**
072   * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}.
073   *
074   * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the
075   * following constraints. This is the callers responsibility and is not enforced here.
076   * <ul>
077   * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to
078   * get multiplied by 0 to calculate the sum), and the other parameters may have any values (they
079   * will not be used).
080   * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or
081   * {@link Double#NaN}.
082   * </ul>
083   */
084  Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) {
085    this.count = count;
086    this.mean = mean;
087    this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas;
088    this.min = min;
089    this.max = max;
090  }
091
092  /**
093   * Returns statistics over a dataset containing the given values.
094   *
095   * @param values a series of values, which will be converted to {@code double} values (this may
096   *     cause loss of precision)
097   */
098  public static Stats of(Iterable<? extends Number> values) {
099    StatsAccumulator accumulator = new StatsAccumulator();
100    accumulator.addAll(values);
101    return accumulator.snapshot();
102  }
103
104  /**
105   * Returns statistics over a dataset containing the given values.
106   *
107   * @param values a series of values, which will be converted to {@code double} values (this may
108   *     cause loss of precision)
109   */
110  public static Stats of(Iterator<? extends Number> values) {
111    StatsAccumulator accumulator = new StatsAccumulator();
112    accumulator.addAll(values);
113    return accumulator.snapshot();
114  }
115
116  /**
117   * Returns statistics over a dataset containing the given values.
118   *
119   * @param values a series of values
120   */
121  public static Stats of(double... values) {
122    StatsAccumulator acummulator = new StatsAccumulator();
123    acummulator.addAll(values);
124    return acummulator.snapshot();
125  }
126
127  /**
128   * Returns statistics over a dataset containing the given values.
129   *
130   * @param values a series of values
131   */
132  public static Stats of(int... values) {
133    StatsAccumulator acummulator = new StatsAccumulator();
134    acummulator.addAll(values);
135    return acummulator.snapshot();
136  }
137
138  /**
139   * Returns statistics over a dataset containing the given values.
140   *
141   * @param values a series of values, which will be converted to {@code double} values (this may
142   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
143   */
144  public static Stats of(long... values) {
145    StatsAccumulator acummulator = new StatsAccumulator();
146    acummulator.addAll(values);
147    return acummulator.snapshot();
148  }
149
150  /**
151   * Returns the number of values.
152   */
153  public long count() {
154    return count;
155  }
156
157  /**
158   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
159   * values. The count must be non-zero.
160   *
161   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
162   * the arithmetic mean of the population.
163   *
164   * <h3>Non-finite values</h3>
165   *
166   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
167   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
168   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
169   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
170   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
171   * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
172   *
173   * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats}
174   * instance.
175   *
176   * @throws IllegalStateException if the dataset is empty
177   */
178  public double mean() {
179    checkState(count != 0);
180    return mean;
181  }
182
183  /**
184   * Returns the sum of the values.
185   *
186   * <h3>Non-finite values</h3>
187   *
188   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
189   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
190   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
191   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
192   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
193   * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
194   */
195  public double sum() {
196    return mean * count;
197  }
198
199  /**
200   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
201   * variance</a> of the values. The count must be non-zero.
202   *
203   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
204   * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
205   * due to numerical errors. However, it is guaranteed never to return a negative result.
206   *
207   * <h3>Non-finite values</h3>
208   *
209   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
210   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
211   *
212   * @throws IllegalStateException if the dataset is empty
213   */
214  public double populationVariance() {
215    checkState(count > 0);
216    if (isNaN(sumOfSquaresOfDeltas)) {
217      return NaN;
218    }
219    if (count == 1) {
220      return 0.0;
221    }
222    return ensureNonNegative(sumOfSquaresOfDeltas) / count();
223  }
224
225  /**
226   * Returns the
227   * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
228   * population standard deviation</a> of the values. The count must be non-zero.
229   *
230   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
231   * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
232   * due to numerical errors. However, it is guaranteed never to return a negative result.
233   *
234   * <h3>Non-finite values</h3>
235   *
236   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
237   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
238   *
239   * @throws IllegalStateException if the dataset is empty
240   */
241  public double populationStandardDeviation() {
242    return Math.sqrt(populationVariance());
243  }
244
245  /**
246   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
247   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
248   * unbiased estimator of the population variance of the population. The count must be greater than
249   * one.
250   *
251   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
252   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
253   *
254   * <h3>Non-finite values</h3>
255   *
256   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
257   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
258   *
259   * @throws IllegalStateException if the dataset is empty or contains a single value
260   */
261  public double sampleVariance() {
262    checkState(count > 1);
263    if (isNaN(sumOfSquaresOfDeltas)) {
264      return NaN;
265    }
266    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
267  }
268
269  /**
270   * Returns the
271   * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
272   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
273   * population, this is an estimator of the population standard deviation of the population which
274   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
275   * the distribution). The count must be greater than one.
276   *
277   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
278   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
279   *
280   * <h3>Non-finite values</h3>
281   *
282   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
283   * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
284   *
285   * @throws IllegalStateException if the dataset is empty or contains a single value
286   */
287  public double sampleStandardDeviation() {
288    return Math.sqrt(sampleVariance());
289  }
290
291  /**
292   * Returns the lowest value in the dataset. The count must be non-zero.
293   *
294   * <h3>Non-finite values</h3>
295   *
296   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
297   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is
298   * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite
299   * values only then the result is the lowest finite value. If it contains
300   * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
301   *
302   * @throws IllegalStateException if the dataset is empty
303   */
304  public double min() {
305    checkState(count != 0);
306    return min;
307  }
308
309  /**
310   * Returns the highest value in the dataset. The count must be non-zero.
311   *
312   * <h3>Non-finite values</h3>
313   *
314   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
315   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is
316   * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite
317   * values only then the result is the highest finite value. If it contains
318   * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
319   *
320   * @throws IllegalStateException if the dataset is empty
321   */
322  public double max() {
323    checkState(count != 0);
324    return max;
325  }
326
327  /**
328   * {@inheritDoc}
329   *
330   * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating
331   * point values. Two instances are guaranteed to be considered equal if one is copied from the
332   * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were
333   * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any
334   * values in between the two calls, or if one is obtained from the other after round-tripping
335   * through java serialization. However, floating point rounding errors mean that it may be false
336   * for some instances where the statistics are mathematically equal, including instances
337   * constructed from the same values in a different order... or (in the general case) even in the
338   * same order. (It is guaranteed to return true for instances constructed from the same values in
339   * the same order if {@code strictfp} is in effect, or if the system architecture guarantees
340   * {@code strictfp}-like semantics.)
341   */
342  @Override
343  public boolean equals(@Nullable Object obj) {
344    if (obj == null) {
345      return false;
346    }
347    if (getClass() != obj.getClass()) {
348      return false;
349    }
350    Stats other = (Stats) obj;
351    return (count == other.count)
352        && (doubleToLongBits(mean) == doubleToLongBits(other.mean))
353        && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas))
354        && (doubleToLongBits(min) == doubleToLongBits(other.min))
355        && (doubleToLongBits(max) == doubleToLongBits(other.max));
356  }
357
358  /**
359   * {@inheritDoc}
360   *
361   * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics,
362   * including the floating point values. See the note on {@link #equals} for details.
363   */
364  @Override
365  public int hashCode() {
366    return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max);
367  }
368
369  @Override
370  public String toString() {
371    if (count() > 0) {
372      return MoreObjects.toStringHelper(this)
373          .add("count", count)
374          .add("mean", mean)
375          .add("populationStandardDeviation", populationStandardDeviation())
376          .add("min", min)
377          .add("max", max)
378          .toString();
379    } else {
380      return MoreObjects.toStringHelper(this).add("count", count).toString();
381    }
382  }
383
384  double sumOfSquaresOfDeltas() {
385    return sumOfSquaresOfDeltas;
386  }
387
388  /**
389   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
390   * values. The count must be non-zero.
391   *
392   * <p>The definition of the mean is the same as {@link Stats#mean}.
393   *
394   * @param values a series of values, which will be converted to {@code double} values (this may
395   *     cause loss of precision)
396   * @throws IllegalArgumentException if the dataset is empty
397   */
398  public static double meanOf(Iterable<? extends Number> values) {
399    return meanOf(values.iterator());
400  }
401
402  /**
403   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
404   * values. The count must be non-zero.
405   *
406   * <p>The definition of the mean is the same as {@link Stats#mean}.
407   *
408   * @param values a series of values, which will be converted to {@code double} values (this may
409   *     cause loss of precision)
410   * @throws IllegalArgumentException if the dataset is empty
411   */
412  public static double meanOf(Iterator<? extends Number> values) {
413    checkArgument(values.hasNext());
414    long count = 1;
415    double mean = values.next().doubleValue();
416    while (values.hasNext()) {
417      double value = values.next().doubleValue();
418      count++;
419      if (isFinite(value) && isFinite(mean)) {
420        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
421        mean += (value - mean) / count;
422      } else {
423        mean = calculateNewMeanNonFinite(mean, value);
424      }
425    }
426    return mean;
427  }
428
429  /**
430   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
431   * values. The count must be non-zero.
432   *
433   * <p>The definition of the mean is the same as {@link Stats#mean}.
434   *
435   * @param values a series of values
436   * @throws IllegalArgumentException if the dataset is empty
437   */
438  public static double meanOf(double... values) {
439    checkArgument(values.length > 0);
440    double mean = values[0];
441    for (int index = 1; index < values.length; index++) {
442      double value = values[index];
443      if (isFinite(value) && isFinite(mean)) {
444        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
445        mean += (value - mean) / (index + 1);
446      } else {
447        mean = calculateNewMeanNonFinite(mean, value);
448      }
449    }
450    return mean;
451  }
452
453  /**
454   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
455   * values. The count must be non-zero.
456   *
457   * <p>The definition of the mean is the same as {@link Stats#mean}.
458   *
459   * @param values a series of values
460   * @throws IllegalArgumentException if the dataset is empty
461   */
462  public static double meanOf(int... values) {
463    checkArgument(values.length > 0);
464    double mean = values[0];
465    for (int index = 1; index < values.length; index++) {
466      double value = values[index];
467      if (isFinite(value) && isFinite(mean)) {
468        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
469        mean += (value - mean) / (index + 1);
470      } else {
471        mean = calculateNewMeanNonFinite(mean, value);
472      }
473    }
474    return mean;
475  }
476
477  /**
478   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
479   * values. The count must be non-zero.
480   *
481   * <p>The definition of the mean is the same as {@link Stats#mean}.
482   *
483   * @param values a series of values, which will be converted to {@code double} values (this may
484   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
485   * @throws IllegalArgumentException if the dataset is empty
486   */
487  public static double meanOf(long... values) {
488    checkArgument(values.length > 0);
489    double mean = values[0];
490    for (int index = 1; index < values.length; index++) {
491      double value = values[index];
492      if (isFinite(value) && isFinite(mean)) {
493        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
494        mean += (value - mean) / (index + 1);
495      } else {
496        mean = calculateNewMeanNonFinite(mean, value);
497      }
498    }
499    return mean;
500  }
501
502  // Serialization helpers
503
504  /**
505   * The size of byte array representation in bytes.
506   */
507  static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE;
508
509  /**
510   * Gets a byte array representation of this instance.
511   *
512   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
513   * versions.
514   */
515  public byte[] toByteArray() {
516    ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN);
517    writeTo(buff);
518    return buff.array();
519  }
520
521  /**
522   * Writes to the given {@link ByteBuffer} a byte representation of this instance.
523   *
524   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
525   * versions.
526   *
527   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
528   *     {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance
529   *     is written. In the process increases the position of {@link ByteBuffer} by BYTES.
530   */
531  void writeTo(ByteBuffer buffer) {
532    checkNotNull(buffer);
533    checkArgument(
534        buffer.remaining() >= BYTES,
535        "Expected at least Stats.BYTES = %s remaining , got %s",
536        BYTES,
537        buffer.remaining());
538    buffer
539        .putLong(count)
540        .putDouble(mean)
541        .putDouble(sumOfSquaresOfDeltas)
542        .putDouble(min)
543        .putDouble(max);
544  }
545
546  /**
547   * Creates a Stats instance from the given byte representation which was obtained by
548   * {@link #toByteArray}.
549   *
550   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
551   * versions.
552   */
553  public static Stats fromByteArray(byte[] byteArray) {
554    checkNotNull(byteArray);
555    checkArgument(
556        byteArray.length == BYTES,
557        "Expected Stats.BYTES = %s remaining , got %s",
558        BYTES,
559        byteArray.length);
560    return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN));
561  }
562
563  /**
564   * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}.
565   *
566   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
567   * versions.
568   *
569   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
570   *     {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this
571   *     instance is read. In the process increases the position of {@link ByteBuffer} by BYTES.
572   */
573  static Stats readFrom(ByteBuffer buffer) {
574    checkNotNull(buffer);
575    checkArgument(
576        buffer.remaining() >= BYTES,
577        "Expected at least Stats.BYTES = %s remaining , got %s",
578        BYTES,
579        buffer.remaining());
580    return new Stats(
581        buffer.getLong(),
582        buffer.getDouble(),
583        buffer.getDouble(),
584        buffer.getDouble(),
585        buffer.getDouble());
586  }
587
588  private static final long serialVersionUID = 0;
589}