Ion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
discrepancy.h
Go to the documentation of this file.
1 
18 
42 
43 #ifndef ION_ANALYTICS_DISCREPANCY_H_
44 #define ION_ANALYTICS_DISCREPANCY_H_
45 
46 #include <algorithm>
47 #include <vector>
48 
49 #include "ion/base/logging.h"
50 
51 namespace ion {
52 namespace analytics {
53 
69  public:
70  SampleMapping(double time_begin, double time_end, size_t num_samples) {
71  CHECK_LT(1, num_samples);
72  CHECK_GT(time_end, time_begin);
73  time_begin_ = time_begin;
74  normalized_begin_ = 0.5 / static_cast<double>(num_samples);
75  const double normalized_end = (static_cast<double>(num_samples) - 0.5) /
76  static_cast<double>(num_samples);
77  scale_ = (normalized_end - normalized_begin_) / (time_end - time_begin_);
78  inv_scale_ = 1.0 / scale_;
79  }
80 
82  double NormalizedFromTime(double time_sample) const {
83  return normalized_begin_ + scale_ * (time_sample - time_begin_);
84  }
85 
87  double TimeFromNormalized(double normalized_sample) const {
88  return time_begin_ + inv_scale_ * (normalized_sample - normalized_begin_);
89  }
90 
92  double DurationFromLength(double length) const { return length * inv_scale_; }
93 
94  private:
96  double time_begin_;
98  double normalized_begin_;
100  double scale_;
102  double inv_scale_;
103 };
104 
107 template <class InputIterator>
108 std::vector<double> NormalizeSamples(InputIterator first, InputIterator last,
109  const SampleMapping& sample_mapping) {
110  size_t num_samples = last - first;
111  std::vector<double> result(num_samples);
112  std::copy(first, last, result.begin());
113  std::sort(result.begin(), result.end());
114  for (auto& iter : result) iter = sample_mapping.NormalizedFromTime(iter);
115  return result;
116 }
117 
119 template <class ContainerType>
120 std::vector<double> NormalizeSamples(const ContainerType& samples,
121  const SampleMapping& sample_mapping) {
122  return NormalizeSamples(samples.begin(), samples.end(), sample_mapping);
123 }
124 
131  : discrepancy(0.0), begin(0.0), end(0.0), num_samples(0) {}
133  IntervalDiscrepancy(double discrepancy, double begin, double end,
134  size_t num_samples)
135  : discrepancy(discrepancy),
136  begin(begin),
137  end(end),
138  num_samples(num_samples) {}
140  double discrepancy;
142  double begin;
144  double end;
148  size_t num_samples;
149 };
150 
161 template <class InputIterator>
162 IntervalDiscrepancy Discrepancy(InputIterator first, InputIterator last) {
163  IntervalDiscrepancy largest_interval_discrepancy;
164  const size_t num_samples = last - first;
165  if (num_samples == 0) return largest_interval_discrepancy;
166 
167  const double inv_sample_count = 1.0 / static_cast<double>(num_samples);
168  std::vector<double> locations;
170  std::vector<size_t> count_less;
173  std::vector<size_t> count_less_equal;
174 
176  if (*first > 0.0) {
177  locations.push_back(0.0);
178  count_less.push_back(0);
179  count_less_equal.push_back(0);
180  }
181  size_t i = 0;
182  for (auto iter = first; iter != last; ++iter) {
183  locations.push_back(*iter);
184  count_less.push_back(i);
185  count_less_equal.push_back(i + 1);
186  ++i;
187  }
188  if (*(last - 1) < 1.0) {
189  locations.push_back(1.0);
190  count_less.push_back(num_samples);
191  count_less_equal.push_back(num_samples);
192  }
193 
196 
200  double interval_discrepancy = 0.0;
203  size_t interval_begin = 0;
204  size_t interval_end = 0;
205  for (size_t i = 1; i < locations.size(); ++i) {
207  const double length = locations[i] - locations[i - 1];
208 
210  const size_t count_open_increment = count_less[i] - count_less[i - 1];
212  const double extended_interval_discrepancy =
213  interval_discrepancy +
214  (length - static_cast<double>(count_open_increment) * inv_sample_count);
215 
218  const size_t new_count_open = count_less[i] - count_less_equal[i - 1];
220  const double new_interval_discrepancy =
221  length - static_cast<double>(new_count_open) * inv_sample_count;
222 
224  if (extended_interval_discrepancy >= new_interval_discrepancy) {
226  interval_discrepancy = extended_interval_discrepancy;
227  interval_end = i;
228  } else {
230  interval_discrepancy = new_interval_discrepancy;
231  interval_begin = i - 1;
232  interval_end = i;
233  }
234 
236  if (interval_discrepancy > largest_interval_discrepancy.discrepancy) {
237  largest_interval_discrepancy = IntervalDiscrepancy(
238  interval_discrepancy, locations[interval_begin],
239  locations[interval_end],
240  count_less[interval_end] - count_less_equal[interval_begin]);
241  }
242  }
243 
244  return largest_interval_discrepancy;
245 }
246 
248 template <class ContainerType>
249 IntervalDiscrepancy Discrepancy(const ContainerType& samples) {
250  return Discrepancy(samples.begin(), samples.end());
251 }
252 
265 template <class RandomIt>
267  RandomIt last) {
268  const size_t num_samples = last - first;
269  if (num_samples == 0) return IntervalDiscrepancy();
270  if (num_samples == 1) return IntervalDiscrepancy(0.5, *first, *first, 0);
271 
272  SampleMapping sample_mapping(*std::min_element(first, last),
273  *std::max_element(first, last), num_samples);
274  std::vector<double> normalized_timestamps =
275  NormalizeSamples(first, last, sample_mapping);
276  IntervalDiscrepancy largest_interval_discrepancy =
277  Discrepancy(normalized_timestamps.begin(), normalized_timestamps.end());
278 
280  largest_interval_discrepancy.discrepancy = sample_mapping.DurationFromLength(
281  largest_interval_discrepancy.discrepancy);
282  largest_interval_discrepancy.begin =
283  sample_mapping.TimeFromNormalized(largest_interval_discrepancy.begin);
284  largest_interval_discrepancy.end =
285  sample_mapping.TimeFromNormalized(largest_interval_discrepancy.end);
286 
287  return largest_interval_discrepancy;
288 }
289 
291 template <class ContainerType>
293  const ContainerType& timestamps) {
294  return AbsoluteTimestampDiscrepancy(timestamps.begin(), timestamps.end());
295 }
296 
297 } // namespace analytics
298 } // namespace ion
299 
300 #endif // ION_ANALYTICS_DISCREPANCY_H_
Helper class for transforming samples between the time domain and the (unitless) normalized domain us...
Definition: discrepancy.h:68
SampleMapping(double time_begin, double time_end, size_t num_samples)
Definition: discrepancy.h:70
#define CHECK_GT(val1, val2)
Definition: logging.h:329
IntervalDiscrepancy()
Default constructor, initializing all values to zero.
Definition: discrepancy.h:130
IntervalDiscrepancy AbsoluteTimestampDiscrepancy(RandomIt first, RandomIt last)
A discrepancy-based metric for measuring the irregularity of timestamps.
Definition: discrepancy.h:266
uint32 length
#define CHECK_LT(val1, val2)
Definition: logging.h:327
double discrepancy
The discrepancy of the samples in the interval.
Definition: discrepancy.h:140
double DurationFromLength(double length) const
Maps a duration back from normalized (unitless) domain to time domain.
Definition: discrepancy.h:92
std::vector< double > NormalizeSamples(InputIterator first, InputIterator last, const SampleMapping &sample_mapping)
Sorts a sequence of numbers and normalizes it to the range [0, 1] using the given sample_mapping...
Definition: discrepancy.h:108
Copyright 2016 Google Inc.
double NormalizedFromTime(double time_sample) const
Maps a sample from time domain to normalized (unitless) domain.
Definition: discrepancy.h:82
size_t num_samples
The number of samples in the interval from begin to end.
Definition: discrepancy.h:148
IntervalDiscrepancy Discrepancy(InputIterator first, InputIterator last)
Computes the discrepancy of a sequence of numbers in the range [0,1].
Definition: discrepancy.h:162
double end
The end of the interval where the value was measured.
Definition: discrepancy.h:144
IntervalDiscrepancy(double discrepancy, double begin, double end, size_t num_samples)
Constructor.
Definition: discrepancy.h:133
Result of a discrepancy computation, including the value measured and the bounds of the interval wher...
Definition: discrepancy.h:128
double TimeFromNormalized(double normalized_sample) const
Maps a sample back from normalized (unitless) domain to time domain.
Definition: discrepancy.h:87
double begin
The beginning of the interval where the value was measured.
Definition: discrepancy.h:142